Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get
  10from sqlglot.tokens import Token, Tokenizer, TokenType
  11from sqlglot.trie import in_trie, new_trie
  12
  13if t.TYPE_CHECKING:
  14    from sqlglot._typing import E
  15
  16logger = logging.getLogger("sqlglot")
  17
  18
  19def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap:
  20    if len(args) == 1 and args[0].is_star:
  21        return exp.StarMap(this=args[0])
  22
  23    keys = []
  24    values = []
  25    for i in range(0, len(args), 2):
  26        keys.append(args[i])
  27        values.append(args[i + 1])
  28    return exp.VarMap(
  29        keys=exp.Array(expressions=keys),
  30        values=exp.Array(expressions=values),
  31    )
  32
  33
  34def parse_like(args: t.List) -> exp.Expression:
  35    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  36    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  37
  38
  39def binary_range_parser(
  40    expr_type: t.Type[exp.Expression],
  41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  42    return lambda self, this: self._parse_escape(
  43        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  44    )
  45
  46
  47class _Parser(type):
  48    def __new__(cls, clsname, bases, attrs):
  49        klass = super().__new__(cls, clsname, bases, attrs)
  50        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  51        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  52
  53        return klass
  54
  55
  56class Parser(metaclass=_Parser):
  57    """
  58    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  59    a parsed syntax tree.
  60
  61    Args:
  62        error_level: the desired error level.
  63            Default: ErrorLevel.IMMEDIATE
  64        error_message_context: determines the amount of context to capture from a
  65            query string when displaying the error message (in number of characters).
  66            Default: 50.
  67        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  68            Default: 0
  69        alias_post_tablesample: If the table alias comes after tablesample.
  70            Default: False
  71        max_errors: Maximum number of error messages to include in a raised ParseError.
  72            This is only relevant if error_level is ErrorLevel.RAISE.
  73            Default: 3
  74        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  75            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  76            Default: "nulls_are_small"
  77    """
  78
  79    FUNCTIONS: t.Dict[str, t.Callable] = {
  80        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  81        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  82            this=seq_get(args, 0),
  83            to=exp.DataType(this=exp.DataType.Type.TEXT),
  84        ),
  85        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  86        "IFNULL": exp.Coalesce.from_arg_list,
  87        "LIKE": parse_like,
  88        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  89            this=seq_get(args, 0),
  90            to=exp.DataType(this=exp.DataType.Type.TEXT),
  91        ),
  92        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  93            this=exp.Cast(
  94                this=seq_get(args, 0),
  95                to=exp.DataType(this=exp.DataType.Type.TEXT),
  96            ),
  97            start=exp.Literal.number(1),
  98            length=exp.Literal.number(10),
  99        ),
 100        "VAR_MAP": parse_var_map,
 101    }
 102
 103    NO_PAREN_FUNCTIONS = {
 104        TokenType.CURRENT_DATE: exp.CurrentDate,
 105        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 106        TokenType.CURRENT_TIME: exp.CurrentTime,
 107        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 108        TokenType.CURRENT_USER: exp.CurrentUser,
 109    }
 110
 111    JOIN_HINTS: t.Set[str] = set()
 112
 113    NESTED_TYPE_TOKENS = {
 114        TokenType.ARRAY,
 115        TokenType.MAP,
 116        TokenType.NULLABLE,
 117        TokenType.STRUCT,
 118    }
 119
 120    TYPE_TOKENS = {
 121        TokenType.BIT,
 122        TokenType.BOOLEAN,
 123        TokenType.TINYINT,
 124        TokenType.UTINYINT,
 125        TokenType.SMALLINT,
 126        TokenType.USMALLINT,
 127        TokenType.INT,
 128        TokenType.UINT,
 129        TokenType.BIGINT,
 130        TokenType.UBIGINT,
 131        TokenType.INT128,
 132        TokenType.UINT128,
 133        TokenType.INT256,
 134        TokenType.UINT256,
 135        TokenType.FLOAT,
 136        TokenType.DOUBLE,
 137        TokenType.CHAR,
 138        TokenType.NCHAR,
 139        TokenType.VARCHAR,
 140        TokenType.NVARCHAR,
 141        TokenType.TEXT,
 142        TokenType.MEDIUMTEXT,
 143        TokenType.LONGTEXT,
 144        TokenType.MEDIUMBLOB,
 145        TokenType.LONGBLOB,
 146        TokenType.BINARY,
 147        TokenType.VARBINARY,
 148        TokenType.JSON,
 149        TokenType.JSONB,
 150        TokenType.INTERVAL,
 151        TokenType.TIME,
 152        TokenType.TIMESTAMP,
 153        TokenType.TIMESTAMPTZ,
 154        TokenType.TIMESTAMPLTZ,
 155        TokenType.DATETIME,
 156        TokenType.DATETIME64,
 157        TokenType.DATE,
 158        TokenType.DECIMAL,
 159        TokenType.BIGDECIMAL,
 160        TokenType.UUID,
 161        TokenType.GEOGRAPHY,
 162        TokenType.GEOMETRY,
 163        TokenType.HLLSKETCH,
 164        TokenType.HSTORE,
 165        TokenType.PSEUDO_TYPE,
 166        TokenType.SUPER,
 167        TokenType.SERIAL,
 168        TokenType.SMALLSERIAL,
 169        TokenType.BIGSERIAL,
 170        TokenType.XML,
 171        TokenType.UNIQUEIDENTIFIER,
 172        TokenType.MONEY,
 173        TokenType.SMALLMONEY,
 174        TokenType.ROWVERSION,
 175        TokenType.IMAGE,
 176        TokenType.VARIANT,
 177        TokenType.OBJECT,
 178        TokenType.INET,
 179        *NESTED_TYPE_TOKENS,
 180    }
 181
 182    SUBQUERY_PREDICATES = {
 183        TokenType.ANY: exp.Any,
 184        TokenType.ALL: exp.All,
 185        TokenType.EXISTS: exp.Exists,
 186        TokenType.SOME: exp.Any,
 187    }
 188
 189    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 190
 191    DB_CREATABLES = {
 192        TokenType.DATABASE,
 193        TokenType.SCHEMA,
 194        TokenType.TABLE,
 195        TokenType.VIEW,
 196    }
 197
 198    CREATABLES = {
 199        TokenType.COLUMN,
 200        TokenType.FUNCTION,
 201        TokenType.INDEX,
 202        TokenType.PROCEDURE,
 203        *DB_CREATABLES,
 204    }
 205
 206    ID_VAR_TOKENS = {
 207        TokenType.VAR,
 208        TokenType.ANTI,
 209        TokenType.APPLY,
 210        TokenType.ASC,
 211        TokenType.AUTO_INCREMENT,
 212        TokenType.BEGIN,
 213        TokenType.CACHE,
 214        TokenType.COLLATE,
 215        TokenType.COMMAND,
 216        TokenType.COMMENT,
 217        TokenType.COMMIT,
 218        TokenType.CONSTRAINT,
 219        TokenType.DEFAULT,
 220        TokenType.DELETE,
 221        TokenType.DESC,
 222        TokenType.DESCRIBE,
 223        TokenType.DIV,
 224        TokenType.END,
 225        TokenType.EXECUTE,
 226        TokenType.ESCAPE,
 227        TokenType.FALSE,
 228        TokenType.FIRST,
 229        TokenType.FILTER,
 230        TokenType.FORMAT,
 231        TokenType.FULL,
 232        TokenType.IF,
 233        TokenType.IS,
 234        TokenType.ISNULL,
 235        TokenType.INTERVAL,
 236        TokenType.KEEP,
 237        TokenType.LEFT,
 238        TokenType.LOAD,
 239        TokenType.MERGE,
 240        TokenType.NATURAL,
 241        TokenType.NEXT,
 242        TokenType.OFFSET,
 243        TokenType.ORDINALITY,
 244        TokenType.OVERWRITE,
 245        TokenType.PARTITION,
 246        TokenType.PERCENT,
 247        TokenType.PIVOT,
 248        TokenType.PRAGMA,
 249        TokenType.RANGE,
 250        TokenType.REFERENCES,
 251        TokenType.RIGHT,
 252        TokenType.ROW,
 253        TokenType.ROWS,
 254        TokenType.SEMI,
 255        TokenType.SET,
 256        TokenType.SETTINGS,
 257        TokenType.SHOW,
 258        TokenType.TEMPORARY,
 259        TokenType.TOP,
 260        TokenType.TRUE,
 261        TokenType.UNIQUE,
 262        TokenType.UNPIVOT,
 263        TokenType.VOLATILE,
 264        TokenType.WINDOW,
 265        *CREATABLES,
 266        *SUBQUERY_PREDICATES,
 267        *TYPE_TOKENS,
 268        *NO_PAREN_FUNCTIONS,
 269    }
 270
 271    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 272
 273    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 274        TokenType.APPLY,
 275        TokenType.FULL,
 276        TokenType.LEFT,
 277        TokenType.LOCK,
 278        TokenType.NATURAL,
 279        TokenType.OFFSET,
 280        TokenType.RIGHT,
 281        TokenType.WINDOW,
 282    }
 283
 284    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 285
 286    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 287
 288    TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
 289
 290    FUNC_TOKENS = {
 291        TokenType.COMMAND,
 292        TokenType.CURRENT_DATE,
 293        TokenType.CURRENT_DATETIME,
 294        TokenType.CURRENT_TIMESTAMP,
 295        TokenType.CURRENT_TIME,
 296        TokenType.CURRENT_USER,
 297        TokenType.FILTER,
 298        TokenType.FIRST,
 299        TokenType.FORMAT,
 300        TokenType.GLOB,
 301        TokenType.IDENTIFIER,
 302        TokenType.INDEX,
 303        TokenType.ISNULL,
 304        TokenType.ILIKE,
 305        TokenType.LIKE,
 306        TokenType.MERGE,
 307        TokenType.OFFSET,
 308        TokenType.PRIMARY_KEY,
 309        TokenType.RANGE,
 310        TokenType.REPLACE,
 311        TokenType.ROW,
 312        TokenType.UNNEST,
 313        TokenType.VAR,
 314        TokenType.LEFT,
 315        TokenType.RIGHT,
 316        TokenType.DATE,
 317        TokenType.DATETIME,
 318        TokenType.TABLE,
 319        TokenType.TIMESTAMP,
 320        TokenType.TIMESTAMPTZ,
 321        TokenType.WINDOW,
 322        *TYPE_TOKENS,
 323        *SUBQUERY_PREDICATES,
 324    }
 325
 326    CONJUNCTION = {
 327        TokenType.AND: exp.And,
 328        TokenType.OR: exp.Or,
 329    }
 330
 331    EQUALITY = {
 332        TokenType.EQ: exp.EQ,
 333        TokenType.NEQ: exp.NEQ,
 334        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 335    }
 336
 337    COMPARISON = {
 338        TokenType.GT: exp.GT,
 339        TokenType.GTE: exp.GTE,
 340        TokenType.LT: exp.LT,
 341        TokenType.LTE: exp.LTE,
 342    }
 343
 344    BITWISE = {
 345        TokenType.AMP: exp.BitwiseAnd,
 346        TokenType.CARET: exp.BitwiseXor,
 347        TokenType.PIPE: exp.BitwiseOr,
 348        TokenType.DPIPE: exp.DPipe,
 349    }
 350
 351    TERM = {
 352        TokenType.DASH: exp.Sub,
 353        TokenType.PLUS: exp.Add,
 354        TokenType.MOD: exp.Mod,
 355        TokenType.COLLATE: exp.Collate,
 356    }
 357
 358    FACTOR = {
 359        TokenType.DIV: exp.IntDiv,
 360        TokenType.LR_ARROW: exp.Distance,
 361        TokenType.SLASH: exp.Div,
 362        TokenType.STAR: exp.Mul,
 363    }
 364
 365    TIMESTAMPS = {
 366        TokenType.TIME,
 367        TokenType.TIMESTAMP,
 368        TokenType.TIMESTAMPTZ,
 369        TokenType.TIMESTAMPLTZ,
 370    }
 371
 372    SET_OPERATIONS = {
 373        TokenType.UNION,
 374        TokenType.INTERSECT,
 375        TokenType.EXCEPT,
 376    }
 377
 378    JOIN_SIDES = {
 379        TokenType.LEFT,
 380        TokenType.RIGHT,
 381        TokenType.FULL,
 382    }
 383
 384    JOIN_KINDS = {
 385        TokenType.INNER,
 386        TokenType.OUTER,
 387        TokenType.CROSS,
 388        TokenType.SEMI,
 389        TokenType.ANTI,
 390    }
 391
 392    LAMBDAS = {
 393        TokenType.ARROW: lambda self, expressions: self.expression(
 394            exp.Lambda,
 395            this=self._replace_lambda(
 396                self._parse_conjunction(),
 397                {node.name for node in expressions},
 398            ),
 399            expressions=expressions,
 400        ),
 401        TokenType.FARROW: lambda self, expressions: self.expression(
 402            exp.Kwarg,
 403            this=exp.Var(this=expressions[0].name),
 404            expression=self._parse_conjunction(),
 405        ),
 406    }
 407
 408    COLUMN_OPERATORS = {
 409        TokenType.DOT: None,
 410        TokenType.DCOLON: lambda self, this, to: self.expression(
 411            exp.Cast if self.STRICT_CAST else exp.TryCast,
 412            this=this,
 413            to=to,
 414        ),
 415        TokenType.ARROW: lambda self, this, path: self.expression(
 416            exp.JSONExtract,
 417            this=this,
 418            expression=path,
 419        ),
 420        TokenType.DARROW: lambda self, this, path: self.expression(
 421            exp.JSONExtractScalar,
 422            this=this,
 423            expression=path,
 424        ),
 425        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 426            exp.JSONBExtract,
 427            this=this,
 428            expression=path,
 429        ),
 430        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 431            exp.JSONBExtractScalar,
 432            this=this,
 433            expression=path,
 434        ),
 435        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 436            exp.JSONBContains,
 437            this=this,
 438            expression=key,
 439        ),
 440    }
 441
 442    EXPRESSION_PARSERS = {
 443        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"),
 444        exp.Column: lambda self: self._parse_column(),
 445        exp.Condition: lambda self: self._parse_conjunction(),
 446        exp.DataType: lambda self: self._parse_types(),
 447        exp.Expression: lambda self: self._parse_statement(),
 448        exp.From: lambda self: self._parse_from(),
 449        exp.Group: lambda self: self._parse_group(),
 450        exp.Having: lambda self: self._parse_having(),
 451        exp.Identifier: lambda self: self._parse_id_var(),
 452        exp.Join: lambda self: self._parse_join(),
 453        exp.Lambda: lambda self: self._parse_lambda(),
 454        exp.Lateral: lambda self: self._parse_lateral(),
 455        exp.Limit: lambda self: self._parse_limit(),
 456        exp.Offset: lambda self: self._parse_offset(),
 457        exp.Order: lambda self: self._parse_order(),
 458        exp.Ordered: lambda self: self._parse_ordered(),
 459        exp.Properties: lambda self: self._parse_properties(),
 460        exp.Qualify: lambda self: self._parse_qualify(),
 461        exp.Returning: lambda self: self._parse_returning(),
 462        exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"),
 463        exp.Table: lambda self: self._parse_table_parts(),
 464        exp.TableAlias: lambda self: self._parse_table_alias(),
 465        exp.Where: lambda self: self._parse_where(),
 466        exp.Window: lambda self: self._parse_named_window(),
 467        exp.With: lambda self: self._parse_with(),
 468        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 469    }
 470
 471    STATEMENT_PARSERS = {
 472        TokenType.ALTER: lambda self: self._parse_alter(),
 473        TokenType.BEGIN: lambda self: self._parse_transaction(),
 474        TokenType.CACHE: lambda self: self._parse_cache(),
 475        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 476        TokenType.COMMENT: lambda self: self._parse_comment(),
 477        TokenType.CREATE: lambda self: self._parse_create(),
 478        TokenType.DELETE: lambda self: self._parse_delete(),
 479        TokenType.DESC: lambda self: self._parse_describe(),
 480        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 481        TokenType.DROP: lambda self: self._parse_drop(),
 482        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 483        TokenType.FROM: lambda self: exp.select("*").from_(
 484            t.cast(exp.From, self._parse_from(skip_from_token=True))
 485        ),
 486        TokenType.INSERT: lambda self: self._parse_insert(),
 487        TokenType.LOAD: lambda self: self._parse_load(),
 488        TokenType.MERGE: lambda self: self._parse_merge(),
 489        TokenType.PIVOT: lambda self: self._parse_simplified_pivot(),
 490        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 491        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 492        TokenType.SET: lambda self: self._parse_set(),
 493        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 494        TokenType.UPDATE: lambda self: self._parse_update(),
 495        TokenType.USE: lambda self: self.expression(
 496            exp.Use,
 497            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 498            and exp.Var(this=self._prev.text),
 499            this=self._parse_table(schema=False),
 500        ),
 501    }
 502
 503    UNARY_PARSERS = {
 504        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 505        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 506        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 507        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 508    }
 509
 510    PRIMARY_PARSERS = {
 511        TokenType.STRING: lambda self, token: self.expression(
 512            exp.Literal, this=token.text, is_string=True
 513        ),
 514        TokenType.NUMBER: lambda self, token: self.expression(
 515            exp.Literal, this=token.text, is_string=False
 516        ),
 517        TokenType.STAR: lambda self, _: self.expression(
 518            exp.Star,
 519            **{"except": self._parse_except(), "replace": self._parse_replace()},
 520        ),
 521        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 522        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 523        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 524        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 525        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 526        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 527        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 528        TokenType.NATIONAL_STRING: lambda self, token: self.expression(
 529            exp.National, this=token.text
 530        ),
 531        TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
 532        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 533    }
 534
 535    PLACEHOLDER_PARSERS = {
 536        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 537        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 538        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 539        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 540        else None,
 541    }
 542
 543    RANGE_PARSERS = {
 544        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 545        TokenType.GLOB: binary_range_parser(exp.Glob),
 546        TokenType.ILIKE: binary_range_parser(exp.ILike),
 547        TokenType.IN: lambda self, this: self._parse_in(this),
 548        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 549        TokenType.IS: lambda self, this: self._parse_is(this),
 550        TokenType.LIKE: binary_range_parser(exp.Like),
 551        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 552        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 553        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 554    }
 555
 556    PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
 557        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 558        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 559        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 560        "CHARACTER SET": lambda self: self._parse_character_set(),
 561        "CHECKSUM": lambda self: self._parse_checksum(),
 562        "CLUSTER": lambda self: self._parse_cluster(),
 563        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 564        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 565        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 566        "DEFINER": lambda self: self._parse_definer(),
 567        "DETERMINISTIC": lambda self: self.expression(
 568            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 569        ),
 570        "DISTKEY": lambda self: self._parse_distkey(),
 571        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 572        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 573        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 574        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 575        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 576        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 577        "FREESPACE": lambda self: self._parse_freespace(),
 578        "IMMUTABLE": lambda self: self.expression(
 579            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 580        ),
 581        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 582        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 583        "LIKE": lambda self: self._parse_create_like(),
 584        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 585        "LOCK": lambda self: self._parse_locking(),
 586        "LOCKING": lambda self: self._parse_locking(),
 587        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 588        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 589        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 590        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 591        "NO": lambda self: self._parse_no_property(),
 592        "ON": lambda self: self._parse_on_property(),
 593        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 594        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 595        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 596        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 597        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 598        "RETURNS": lambda self: self._parse_returns(),
 599        "ROW": lambda self: self._parse_row(),
 600        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 601        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 602        "SETTINGS": lambda self: self.expression(
 603            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 604        ),
 605        "SORTKEY": lambda self: self._parse_sortkey(),
 606        "STABLE": lambda self: self.expression(
 607            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 608        ),
 609        "STORED": lambda self: self._parse_stored(),
 610        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 611        "TEMP": lambda self: self.expression(exp.TemporaryProperty),
 612        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
 613        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 614        "TTL": lambda self: self._parse_ttl(),
 615        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 616        "VOLATILE": lambda self: self._parse_volatile_property(),
 617        "WITH": lambda self: self._parse_with_property(),
 618    }
 619
 620    CONSTRAINT_PARSERS = {
 621        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 622        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 623        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 624        "CHARACTER SET": lambda self: self.expression(
 625            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 626        ),
 627        "CHECK": lambda self: self.expression(
 628            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 629        ),
 630        "COLLATE": lambda self: self.expression(
 631            exp.CollateColumnConstraint, this=self._parse_var()
 632        ),
 633        "COMMENT": lambda self: self.expression(
 634            exp.CommentColumnConstraint, this=self._parse_string()
 635        ),
 636        "COMPRESS": lambda self: self._parse_compress(),
 637        "DEFAULT": lambda self: self.expression(
 638            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 639        ),
 640        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 641        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 642        "FORMAT": lambda self: self.expression(
 643            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 644        ),
 645        "GENERATED": lambda self: self._parse_generated_as_identity(),
 646        "IDENTITY": lambda self: self._parse_auto_increment(),
 647        "INLINE": lambda self: self._parse_inline(),
 648        "LIKE": lambda self: self._parse_create_like(),
 649        "NOT": lambda self: self._parse_not_constraint(),
 650        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 651        "ON": lambda self: self._match(TokenType.UPDATE)
 652        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 653        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 654        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 655        "REFERENCES": lambda self: self._parse_references(match=False),
 656        "TITLE": lambda self: self.expression(
 657            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 658        ),
 659        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 660        "UNIQUE": lambda self: self._parse_unique(),
 661        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 662    }
 663
 664    ALTER_PARSERS = {
 665        "ADD": lambda self: self._parse_alter_table_add(),
 666        "ALTER": lambda self: self._parse_alter_table_alter(),
 667        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 668        "DROP": lambda self: self._parse_alter_table_drop(),
 669        "RENAME": lambda self: self._parse_alter_table_rename(),
 670    }
 671
 672    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 673
 674    NO_PAREN_FUNCTION_PARSERS = {
 675        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 676        TokenType.CASE: lambda self: self._parse_case(),
 677        TokenType.IF: lambda self: self._parse_if(),
 678        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 679            exp.NextValueFor,
 680            this=self._parse_column(),
 681            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 682        ),
 683    }
 684
 685    FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
 686
 687    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 688        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 689        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 690        "DECODE": lambda self: self._parse_decode(),
 691        "EXTRACT": lambda self: self._parse_extract(),
 692        "JSON_OBJECT": lambda self: self._parse_json_object(),
 693        "LOG": lambda self: self._parse_logarithm(),
 694        "MATCH": lambda self: self._parse_match_against(),
 695        "OPENJSON": lambda self: self._parse_open_json(),
 696        "POSITION": lambda self: self._parse_position(),
 697        "SAFE_CAST": lambda self: self._parse_cast(False),
 698        "STRING_AGG": lambda self: self._parse_string_agg(),
 699        "SUBSTRING": lambda self: self._parse_substring(),
 700        "TRIM": lambda self: self._parse_trim(),
 701        "TRY_CAST": lambda self: self._parse_cast(False),
 702        "TRY_CONVERT": lambda self: self._parse_convert(False),
 703    }
 704
 705    QUERY_MODIFIER_PARSERS = {
 706        "joins": lambda self: list(iter(self._parse_join, None)),
 707        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 708        "match": lambda self: self._parse_match_recognize(),
 709        "where": lambda self: self._parse_where(),
 710        "group": lambda self: self._parse_group(),
 711        "having": lambda self: self._parse_having(),
 712        "qualify": lambda self: self._parse_qualify(),
 713        "windows": lambda self: self._parse_window_clause(),
 714        "order": lambda self: self._parse_order(),
 715        "limit": lambda self: self._parse_limit(),
 716        "offset": lambda self: self._parse_offset(),
 717        "locks": lambda self: self._parse_locks(),
 718        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 719    }
 720
 721    SET_PARSERS = {
 722        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 723        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 724        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 725        "TRANSACTION": lambda self: self._parse_set_transaction(),
 726    }
 727
 728    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 729
 730    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 731
 732    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 733
 734    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 735
 736    TRANSACTION_CHARACTERISTICS = {
 737        "ISOLATION LEVEL REPEATABLE READ",
 738        "ISOLATION LEVEL READ COMMITTED",
 739        "ISOLATION LEVEL READ UNCOMMITTED",
 740        "ISOLATION LEVEL SERIALIZABLE",
 741        "READ WRITE",
 742        "READ ONLY",
 743    }
 744
 745    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 746
 747    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 748
 749    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 750    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 751    WINDOW_SIDES = {"FOLLOWING", "PRECEDING"}
 752
 753    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 754
 755    STRICT_CAST = True
 756
 757    CONVERT_TYPE_FIRST = False
 758
 759    PREFIXED_PIVOT_COLUMNS = False
 760    IDENTIFY_PIVOT_STRINGS = False
 761
 762    LOG_BASE_FIRST = True
 763    LOG_DEFAULTS_TO_LN = False
 764
 765    __slots__ = (
 766        "error_level",
 767        "error_message_context",
 768        "sql",
 769        "errors",
 770        "index_offset",
 771        "unnest_column_only",
 772        "alias_post_tablesample",
 773        "max_errors",
 774        "null_ordering",
 775        "_tokens",
 776        "_index",
 777        "_curr",
 778        "_next",
 779        "_prev",
 780        "_prev_comments",
 781        "_show_trie",
 782        "_set_trie",
 783    )
 784
 785    def __init__(
 786        self,
 787        error_level: t.Optional[ErrorLevel] = None,
 788        error_message_context: int = 100,
 789        index_offset: int = 0,
 790        unnest_column_only: bool = False,
 791        alias_post_tablesample: bool = False,
 792        max_errors: int = 3,
 793        null_ordering: t.Optional[str] = None,
 794    ):
 795        self.error_level = error_level or ErrorLevel.IMMEDIATE
 796        self.error_message_context = error_message_context
 797        self.index_offset = index_offset
 798        self.unnest_column_only = unnest_column_only
 799        self.alias_post_tablesample = alias_post_tablesample
 800        self.max_errors = max_errors
 801        self.null_ordering = null_ordering
 802        self.reset()
 803
 804    def reset(self):
 805        self.sql = ""
 806        self.errors = []
 807        self._tokens = []
 808        self._index = 0
 809        self._curr = None
 810        self._next = None
 811        self._prev = None
 812        self._prev_comments = None
 813
 814    def parse(
 815        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 816    ) -> t.List[t.Optional[exp.Expression]]:
 817        """
 818        Parses a list of tokens and returns a list of syntax trees, one tree
 819        per parsed SQL statement.
 820
 821        Args:
 822            raw_tokens: the list of tokens.
 823            sql: the original SQL string, used to produce helpful debug messages.
 824
 825        Returns:
 826            The list of syntax trees.
 827        """
 828        return self._parse(
 829            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 830        )
 831
 832    def parse_into(
 833        self,
 834        expression_types: exp.IntoType,
 835        raw_tokens: t.List[Token],
 836        sql: t.Optional[str] = None,
 837    ) -> t.List[t.Optional[exp.Expression]]:
 838        """
 839        Parses a list of tokens into a given Expression type. If a collection of Expression
 840        types is given instead, this method will try to parse the token list into each one
 841        of them, stopping at the first for which the parsing succeeds.
 842
 843        Args:
 844            expression_types: the expression type(s) to try and parse the token list into.
 845            raw_tokens: the list of tokens.
 846            sql: the original SQL string, used to produce helpful debug messages.
 847
 848        Returns:
 849            The target Expression.
 850        """
 851        errors = []
 852        for expression_type in ensure_collection(expression_types):
 853            parser = self.EXPRESSION_PARSERS.get(expression_type)
 854            if not parser:
 855                raise TypeError(f"No parser registered for {expression_type}")
 856            try:
 857                return self._parse(parser, raw_tokens, sql)
 858            except ParseError as e:
 859                e.errors[0]["into_expression"] = expression_type
 860                errors.append(e)
 861        raise ParseError(
 862            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
 863            errors=merge_errors(errors),
 864        ) from errors[-1]
 865
 866    def _parse(
 867        self,
 868        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 869        raw_tokens: t.List[Token],
 870        sql: t.Optional[str] = None,
 871    ) -> t.List[t.Optional[exp.Expression]]:
 872        self.reset()
 873        self.sql = sql or ""
 874        total = len(raw_tokens)
 875        chunks: t.List[t.List[Token]] = [[]]
 876
 877        for i, token in enumerate(raw_tokens):
 878            if token.token_type == TokenType.SEMICOLON:
 879                if i < total - 1:
 880                    chunks.append([])
 881            else:
 882                chunks[-1].append(token)
 883
 884        expressions = []
 885
 886        for tokens in chunks:
 887            self._index = -1
 888            self._tokens = tokens
 889            self._advance()
 890
 891            expressions.append(parse_method(self))
 892
 893            if self._index < len(self._tokens):
 894                self.raise_error("Invalid expression / Unexpected token")
 895
 896            self.check_errors()
 897
 898        return expressions
 899
 900    def check_errors(self) -> None:
 901        """
 902        Logs or raises any found errors, depending on the chosen error level setting.
 903        """
 904        if self.error_level == ErrorLevel.WARN:
 905            for error in self.errors:
 906                logger.error(str(error))
 907        elif self.error_level == ErrorLevel.RAISE and self.errors:
 908            raise ParseError(
 909                concat_messages(self.errors, self.max_errors),
 910                errors=merge_errors(self.errors),
 911            )
 912
 913    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 914        """
 915        Appends an error in the list of recorded errors or raises it, depending on the chosen
 916        error level setting.
 917        """
 918        token = token or self._curr or self._prev or Token.string("")
 919        start = token.start
 920        end = token.end + 1
 921        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 922        highlight = self.sql[start:end]
 923        end_context = self.sql[end : end + self.error_message_context]
 924
 925        error = ParseError.new(
 926            f"{message}. Line {token.line}, Col: {token.col}.\n"
 927            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 928            description=message,
 929            line=token.line,
 930            col=token.col,
 931            start_context=start_context,
 932            highlight=highlight,
 933            end_context=end_context,
 934        )
 935
 936        if self.error_level == ErrorLevel.IMMEDIATE:
 937            raise error
 938
 939        self.errors.append(error)
 940
 941    def expression(
 942        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 943    ) -> E:
 944        """
 945        Creates a new, validated Expression.
 946
 947        Args:
 948            exp_class: the expression class to instantiate.
 949            comments: an optional list of comments to attach to the expression.
 950            kwargs: the arguments to set for the expression along with their respective values.
 951
 952        Returns:
 953            The target expression.
 954        """
 955        instance = exp_class(**kwargs)
 956        instance.add_comments(comments) if comments else self._add_comments(instance)
 957        self.validate_expression(instance)
 958        return instance
 959
 960    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 961        if expression and self._prev_comments:
 962            expression.add_comments(self._prev_comments)
 963            self._prev_comments = None
 964
 965    def validate_expression(
 966        self, expression: exp.Expression, args: t.Optional[t.List] = None
 967    ) -> None:
 968        """
 969        Validates an already instantiated expression, making sure that all its mandatory arguments
 970        are set.
 971
 972        Args:
 973            expression: the expression to validate.
 974            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 975        """
 976        if self.error_level == ErrorLevel.IGNORE:
 977            return
 978
 979        for error_message in expression.error_messages(args):
 980            self.raise_error(error_message)
 981
 982    def _find_sql(self, start: Token, end: Token) -> str:
 983        return self.sql[start.start : end.end + 1]
 984
 985    def _advance(self, times: int = 1) -> None:
 986        self._index += times
 987        self._curr = seq_get(self._tokens, self._index)
 988        self._next = seq_get(self._tokens, self._index + 1)
 989        if self._index > 0:
 990            self._prev = self._tokens[self._index - 1]
 991            self._prev_comments = self._prev.comments
 992        else:
 993            self._prev = None
 994            self._prev_comments = None
 995
 996    def _retreat(self, index: int) -> None:
 997        if index != self._index:
 998            self._advance(index - self._index)
 999
1000    def _parse_command(self) -> exp.Command:
1001        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1002
1003    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1004        start = self._prev
1005        exists = self._parse_exists() if allow_exists else None
1006
1007        self._match(TokenType.ON)
1008
1009        kind = self._match_set(self.CREATABLES) and self._prev
1010
1011        if not kind:
1012            return self._parse_as_command(start)
1013
1014        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1015            this = self._parse_user_defined_function(kind=kind.token_type)
1016        elif kind.token_type == TokenType.TABLE:
1017            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1018        elif kind.token_type == TokenType.COLUMN:
1019            this = self._parse_column()
1020        else:
1021            this = self._parse_id_var()
1022
1023        self._match(TokenType.IS)
1024
1025        return self.expression(
1026            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1027        )
1028
1029    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1030    def _parse_ttl(self) -> exp.Expression:
1031        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1032            this = self._parse_bitwise()
1033
1034            if self._match_text_seq("DELETE"):
1035                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1036            if self._match_text_seq("RECOMPRESS"):
1037                return self.expression(
1038                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1039                )
1040            if self._match_text_seq("TO", "DISK"):
1041                return self.expression(
1042                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1043                )
1044            if self._match_text_seq("TO", "VOLUME"):
1045                return self.expression(
1046                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1047                )
1048
1049            return this
1050
1051        expressions = self._parse_csv(_parse_ttl_action)
1052        where = self._parse_where()
1053        group = self._parse_group()
1054
1055        aggregates = None
1056        if group and self._match(TokenType.SET):
1057            aggregates = self._parse_csv(self._parse_set_item)
1058
1059        return self.expression(
1060            exp.MergeTreeTTL,
1061            expressions=expressions,
1062            where=where,
1063            group=group,
1064            aggregates=aggregates,
1065        )
1066
1067    def _parse_statement(self) -> t.Optional[exp.Expression]:
1068        if self._curr is None:
1069            return None
1070
1071        if self._match_set(self.STATEMENT_PARSERS):
1072            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1073
1074        if self._match_set(Tokenizer.COMMANDS):
1075            return self._parse_command()
1076
1077        expression = self._parse_expression()
1078        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1079        return self._parse_query_modifiers(expression)
1080
1081    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1082        start = self._prev
1083        temporary = self._match(TokenType.TEMPORARY)
1084        materialized = self._match_text_seq("MATERIALIZED")
1085        kind = self._match_set(self.CREATABLES) and self._prev.text
1086        if not kind:
1087            return self._parse_as_command(start)
1088
1089        return self.expression(
1090            exp.Drop,
1091            exists=self._parse_exists(),
1092            this=self._parse_table(schema=True),
1093            kind=kind,
1094            temporary=temporary,
1095            materialized=materialized,
1096            cascade=self._match_text_seq("CASCADE"),
1097            constraints=self._match_text_seq("CONSTRAINTS"),
1098            purge=self._match_text_seq("PURGE"),
1099        )
1100
1101    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1102        return (
1103            self._match(TokenType.IF)
1104            and (not not_ or self._match(TokenType.NOT))
1105            and self._match(TokenType.EXISTS)
1106        )
1107
1108    def _parse_create(self) -> t.Optional[exp.Expression]:
1109        start = self._prev
1110        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1111            TokenType.OR, TokenType.REPLACE
1112        )
1113        unique = self._match(TokenType.UNIQUE)
1114
1115        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1116            self._match(TokenType.TABLE)
1117
1118        properties = None
1119        create_token = self._match_set(self.CREATABLES) and self._prev
1120
1121        if not create_token:
1122            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1123            create_token = self._match_set(self.CREATABLES) and self._prev
1124
1125            if not properties or not create_token:
1126                return self._parse_as_command(start)
1127
1128        exists = self._parse_exists(not_=True)
1129        this = None
1130        expression = None
1131        indexes = None
1132        no_schema_binding = None
1133        begin = None
1134        clone = None
1135
1136        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1137            this = self._parse_user_defined_function(kind=create_token.token_type)
1138            temp_properties = self._parse_properties()
1139            if properties and temp_properties:
1140                properties.expressions.extend(temp_properties.expressions)
1141            elif temp_properties:
1142                properties = temp_properties
1143
1144            self._match(TokenType.ALIAS)
1145            begin = self._match(TokenType.BEGIN)
1146            return_ = self._match_text_seq("RETURN")
1147            expression = self._parse_statement()
1148
1149            if return_:
1150                expression = self.expression(exp.Return, this=expression)
1151        elif create_token.token_type == TokenType.INDEX:
1152            this = self._parse_index(index=self._parse_id_var())
1153        elif create_token.token_type in self.DB_CREATABLES:
1154            table_parts = self._parse_table_parts(schema=True)
1155
1156            # exp.Properties.Location.POST_NAME
1157            if self._match(TokenType.COMMA):
1158                temp_properties = self._parse_properties(before=True)
1159                if properties and temp_properties:
1160                    properties.expressions.extend(temp_properties.expressions)
1161                elif temp_properties:
1162                    properties = temp_properties
1163
1164            this = self._parse_schema(this=table_parts)
1165
1166            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1167            temp_properties = self._parse_properties()
1168            if properties and temp_properties:
1169                properties.expressions.extend(temp_properties.expressions)
1170            elif temp_properties:
1171                properties = temp_properties
1172
1173            self._match(TokenType.ALIAS)
1174
1175            # exp.Properties.Location.POST_ALIAS
1176            if not (
1177                self._match(TokenType.SELECT, advance=False)
1178                or self._match(TokenType.WITH, advance=False)
1179                or self._match(TokenType.L_PAREN, advance=False)
1180            ):
1181                temp_properties = self._parse_properties()
1182                if properties and temp_properties:
1183                    properties.expressions.extend(temp_properties.expressions)
1184                elif temp_properties:
1185                    properties = temp_properties
1186
1187            expression = self._parse_ddl_select()
1188
1189            if create_token.token_type == TokenType.TABLE:
1190                indexes = []
1191                while True:
1192                    index = self._parse_index()
1193
1194                    # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX
1195                    temp_properties = self._parse_properties()
1196                    if properties and temp_properties:
1197                        properties.expressions.extend(temp_properties.expressions)
1198                    elif temp_properties:
1199                        properties = temp_properties
1200
1201                    if not index:
1202                        break
1203                    else:
1204                        self._match(TokenType.COMMA)
1205                        indexes.append(index)
1206            elif create_token.token_type == TokenType.VIEW:
1207                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1208                    no_schema_binding = True
1209
1210            if self._match_text_seq("CLONE"):
1211                clone = self._parse_table(schema=True)
1212                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1213                clone_kind = (
1214                    self._match(TokenType.L_PAREN)
1215                    and self._match_texts(self.CLONE_KINDS)
1216                    and self._prev.text.upper()
1217                )
1218                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1219                self._match(TokenType.R_PAREN)
1220                clone = self.expression(
1221                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1222                )
1223
1224        return self.expression(
1225            exp.Create,
1226            this=this,
1227            kind=create_token.text,
1228            replace=replace,
1229            unique=unique,
1230            expression=expression,
1231            exists=exists,
1232            properties=properties,
1233            indexes=indexes,
1234            no_schema_binding=no_schema_binding,
1235            begin=begin,
1236            clone=clone,
1237        )
1238
1239    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1240        # only used for teradata currently
1241        self._match(TokenType.COMMA)
1242
1243        kwargs = {
1244            "no": self._match_text_seq("NO"),
1245            "dual": self._match_text_seq("DUAL"),
1246            "before": self._match_text_seq("BEFORE"),
1247            "default": self._match_text_seq("DEFAULT"),
1248            "local": (self._match_text_seq("LOCAL") and "LOCAL")
1249            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1250            "after": self._match_text_seq("AFTER"),
1251            "minimum": self._match_texts(("MIN", "MINIMUM")),
1252            "maximum": self._match_texts(("MAX", "MAXIMUM")),
1253        }
1254
1255        if self._match_texts(self.PROPERTY_PARSERS):
1256            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1257            try:
1258                return parser(self, **{k: v for k, v in kwargs.items() if v})
1259            except TypeError:
1260                self.raise_error(f"Cannot parse property '{self._prev.text}'")
1261
1262        return None
1263
1264    def _parse_property(self) -> t.Optional[exp.Expression]:
1265        if self._match_texts(self.PROPERTY_PARSERS):
1266            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1267
1268        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1269            return self._parse_character_set(default=True)
1270
1271        if self._match_text_seq("COMPOUND", "SORTKEY"):
1272            return self._parse_sortkey(compound=True)
1273
1274        if self._match_text_seq("SQL", "SECURITY"):
1275            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1276
1277        assignment = self._match_pair(
1278            TokenType.VAR, TokenType.EQ, advance=False
1279        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1280
1281        if assignment:
1282            key = self._parse_var_or_string()
1283            self._match(TokenType.EQ)
1284            return self.expression(exp.Property, this=key, value=self._parse_column())
1285
1286        return None
1287
1288    def _parse_stored(self) -> exp.Expression:
1289        self._match(TokenType.ALIAS)
1290
1291        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1292        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1293
1294        return self.expression(
1295            exp.FileFormatProperty,
1296            this=self.expression(
1297                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1298            )
1299            if input_format or output_format
1300            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1301        )
1302
1303    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1304        self._match(TokenType.EQ)
1305        self._match(TokenType.ALIAS)
1306        return self.expression(exp_class, this=self._parse_field())
1307
1308    def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]:
1309        properties = []
1310
1311        while True:
1312            if before:
1313                prop = self._parse_property_before()
1314            else:
1315                prop = self._parse_property()
1316
1317            if not prop:
1318                break
1319            for p in ensure_list(prop):
1320                properties.append(p)
1321
1322        if properties:
1323            return self.expression(exp.Properties, expressions=properties)
1324
1325        return None
1326
1327    def _parse_fallback(self, no: bool = False) -> exp.Expression:
1328        return self.expression(
1329            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1330        )
1331
1332    def _parse_volatile_property(self) -> exp.Expression:
1333        if self._index >= 2:
1334            pre_volatile_token = self._tokens[self._index - 2]
1335        else:
1336            pre_volatile_token = None
1337
1338        if pre_volatile_token and pre_volatile_token.token_type in (
1339            TokenType.CREATE,
1340            TokenType.REPLACE,
1341            TokenType.UNIQUE,
1342        ):
1343            return exp.VolatileProperty()
1344
1345        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1346
1347    def _parse_with_property(
1348        self,
1349    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1350        self._match(TokenType.WITH)
1351        if self._match(TokenType.L_PAREN, advance=False):
1352            return self._parse_wrapped_csv(self._parse_property)
1353
1354        if self._match_text_seq("JOURNAL"):
1355            return self._parse_withjournaltable()
1356
1357        if self._match_text_seq("DATA"):
1358            return self._parse_withdata(no=False)
1359        elif self._match_text_seq("NO", "DATA"):
1360            return self._parse_withdata(no=True)
1361
1362        if not self._next:
1363            return None
1364
1365        return self._parse_withisolatedloading()
1366
1367    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1368    def _parse_definer(self) -> t.Optional[exp.Expression]:
1369        self._match(TokenType.EQ)
1370
1371        user = self._parse_id_var()
1372        self._match(TokenType.PARAMETER)
1373        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1374
1375        if not user or not host:
1376            return None
1377
1378        return exp.DefinerProperty(this=f"{user}@{host}")
1379
1380    def _parse_withjournaltable(self) -> exp.Expression:
1381        self._match(TokenType.TABLE)
1382        self._match(TokenType.EQ)
1383        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1384
1385    def _parse_log(self, no: bool = False) -> exp.Expression:
1386        return self.expression(exp.LogProperty, no=no)
1387
1388    def _parse_journal(self, **kwargs) -> exp.Expression:
1389        return self.expression(exp.JournalProperty, **kwargs)
1390
1391    def _parse_checksum(self) -> exp.Expression:
1392        self._match(TokenType.EQ)
1393
1394        on = None
1395        if self._match(TokenType.ON):
1396            on = True
1397        elif self._match_text_seq("OFF"):
1398            on = False
1399        default = self._match(TokenType.DEFAULT)
1400
1401        return self.expression(
1402            exp.ChecksumProperty,
1403            on=on,
1404            default=default,
1405        )
1406
1407    def _parse_cluster(self) -> t.Optional[exp.Expression]:
1408        if not self._match_text_seq("BY"):
1409            self._retreat(self._index - 1)
1410            return None
1411        return self.expression(
1412            exp.Cluster,
1413            expressions=self._parse_csv(self._parse_ordered),
1414        )
1415
1416    def _parse_freespace(self) -> exp.Expression:
1417        self._match(TokenType.EQ)
1418        return self.expression(
1419            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1420        )
1421
1422    def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression:
1423        if self._match(TokenType.EQ):
1424            return self.expression(
1425                exp.MergeBlockRatioProperty,
1426                this=self._parse_number(),
1427                percent=self._match(TokenType.PERCENT),
1428            )
1429        return self.expression(
1430            exp.MergeBlockRatioProperty,
1431            no=no,
1432            default=default,
1433        )
1434
1435    def _parse_datablocksize(
1436        self,
1437        default: t.Optional[bool] = None,
1438        minimum: t.Optional[bool] = None,
1439        maximum: t.Optional[bool] = None,
1440    ) -> exp.Expression:
1441        self._match(TokenType.EQ)
1442        size = self._parse_number()
1443        units = None
1444        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1445            units = self._prev.text
1446        return self.expression(
1447            exp.DataBlocksizeProperty,
1448            size=size,
1449            units=units,
1450            default=default,
1451            minimum=minimum,
1452            maximum=maximum,
1453        )
1454
1455    def _parse_blockcompression(self) -> exp.Expression:
1456        self._match(TokenType.EQ)
1457        always = self._match_text_seq("ALWAYS")
1458        manual = self._match_text_seq("MANUAL")
1459        never = self._match_text_seq("NEVER")
1460        default = self._match_text_seq("DEFAULT")
1461        autotemp = None
1462        if self._match_text_seq("AUTOTEMP"):
1463            autotemp = self._parse_schema()
1464
1465        return self.expression(
1466            exp.BlockCompressionProperty,
1467            always=always,
1468            manual=manual,
1469            never=never,
1470            default=default,
1471            autotemp=autotemp,
1472        )
1473
1474    def _parse_withisolatedloading(self) -> exp.Expression:
1475        no = self._match_text_seq("NO")
1476        concurrent = self._match_text_seq("CONCURRENT")
1477        self._match_text_seq("ISOLATED", "LOADING")
1478        for_all = self._match_text_seq("FOR", "ALL")
1479        for_insert = self._match_text_seq("FOR", "INSERT")
1480        for_none = self._match_text_seq("FOR", "NONE")
1481        return self.expression(
1482            exp.IsolatedLoadingProperty,
1483            no=no,
1484            concurrent=concurrent,
1485            for_all=for_all,
1486            for_insert=for_insert,
1487            for_none=for_none,
1488        )
1489
1490    def _parse_locking(self) -> exp.Expression:
1491        if self._match(TokenType.TABLE):
1492            kind = "TABLE"
1493        elif self._match(TokenType.VIEW):
1494            kind = "VIEW"
1495        elif self._match(TokenType.ROW):
1496            kind = "ROW"
1497        elif self._match_text_seq("DATABASE"):
1498            kind = "DATABASE"
1499        else:
1500            kind = None
1501
1502        if kind in ("DATABASE", "TABLE", "VIEW"):
1503            this = self._parse_table_parts()
1504        else:
1505            this = None
1506
1507        if self._match(TokenType.FOR):
1508            for_or_in = "FOR"
1509        elif self._match(TokenType.IN):
1510            for_or_in = "IN"
1511        else:
1512            for_or_in = None
1513
1514        if self._match_text_seq("ACCESS"):
1515            lock_type = "ACCESS"
1516        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1517            lock_type = "EXCLUSIVE"
1518        elif self._match_text_seq("SHARE"):
1519            lock_type = "SHARE"
1520        elif self._match_text_seq("READ"):
1521            lock_type = "READ"
1522        elif self._match_text_seq("WRITE"):
1523            lock_type = "WRITE"
1524        elif self._match_text_seq("CHECKSUM"):
1525            lock_type = "CHECKSUM"
1526        else:
1527            lock_type = None
1528
1529        override = self._match_text_seq("OVERRIDE")
1530
1531        return self.expression(
1532            exp.LockingProperty,
1533            this=this,
1534            kind=kind,
1535            for_or_in=for_or_in,
1536            lock_type=lock_type,
1537            override=override,
1538        )
1539
1540    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1541        if self._match(TokenType.PARTITION_BY):
1542            return self._parse_csv(self._parse_conjunction)
1543        return []
1544
1545    def _parse_partitioned_by(self) -> exp.Expression:
1546        self._match(TokenType.EQ)
1547        return self.expression(
1548            exp.PartitionedByProperty,
1549            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1550        )
1551
1552    def _parse_withdata(self, no: bool = False) -> exp.Expression:
1553        if self._match_text_seq("AND", "STATISTICS"):
1554            statistics = True
1555        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1556            statistics = False
1557        else:
1558            statistics = None
1559
1560        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1561
1562    def _parse_no_property(self) -> t.Optional[exp.Property]:
1563        if self._match_text_seq("PRIMARY", "INDEX"):
1564            return exp.NoPrimaryIndexProperty()
1565        return None
1566
1567    def _parse_on_property(self) -> t.Optional[exp.Property]:
1568        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1569            return exp.OnCommitProperty()
1570        elif self._match_text_seq("COMMIT", "DELETE", "ROWS"):
1571            return exp.OnCommitProperty(delete=True)
1572        return None
1573
1574    def _parse_distkey(self) -> exp.Expression:
1575        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1576
1577    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1578        table = self._parse_table(schema=True)
1579        options = []
1580        while self._match_texts(("INCLUDING", "EXCLUDING")):
1581            this = self._prev.text.upper()
1582            id_var = self._parse_id_var()
1583
1584            if not id_var:
1585                return None
1586
1587            options.append(
1588                self.expression(
1589                    exp.Property,
1590                    this=this,
1591                    value=exp.Var(this=id_var.this.upper()),
1592                )
1593            )
1594        return self.expression(exp.LikeProperty, this=table, expressions=options)
1595
1596    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1597        return self.expression(
1598            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1599        )
1600
1601    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1602        self._match(TokenType.EQ)
1603        return self.expression(
1604            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1605        )
1606
1607    def _parse_returns(self) -> exp.Expression:
1608        value: t.Optional[exp.Expression]
1609        is_table = self._match(TokenType.TABLE)
1610
1611        if is_table:
1612            if self._match(TokenType.LT):
1613                value = self.expression(
1614                    exp.Schema,
1615                    this="TABLE",
1616                    expressions=self._parse_csv(self._parse_struct_types),
1617                )
1618                if not self._match(TokenType.GT):
1619                    self.raise_error("Expecting >")
1620            else:
1621                value = self._parse_schema(exp.Var(this="TABLE"))
1622        else:
1623            value = self._parse_types()
1624
1625        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1626
1627    def _parse_describe(self) -> exp.Expression:
1628        kind = self._match_set(self.CREATABLES) and self._prev.text
1629        this = self._parse_table()
1630
1631        return self.expression(exp.Describe, this=this, kind=kind)
1632
1633    def _parse_insert(self) -> exp.Expression:
1634        overwrite = self._match(TokenType.OVERWRITE)
1635        local = self._match_text_seq("LOCAL")
1636        alternative = None
1637
1638        if self._match_text_seq("DIRECTORY"):
1639            this: t.Optional[exp.Expression] = self.expression(
1640                exp.Directory,
1641                this=self._parse_var_or_string(),
1642                local=local,
1643                row_format=self._parse_row_format(match_row=True),
1644            )
1645        else:
1646            if self._match(TokenType.OR):
1647                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1648
1649            self._match(TokenType.INTO)
1650            self._match(TokenType.TABLE)
1651            this = self._parse_table(schema=True)
1652
1653        return self.expression(
1654            exp.Insert,
1655            this=this,
1656            exists=self._parse_exists(),
1657            partition=self._parse_partition(),
1658            expression=self._parse_ddl_select(),
1659            conflict=self._parse_on_conflict(),
1660            returning=self._parse_returning(),
1661            overwrite=overwrite,
1662            alternative=alternative,
1663        )
1664
1665    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1666        conflict = self._match_text_seq("ON", "CONFLICT")
1667        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1668
1669        if not (conflict or duplicate):
1670            return None
1671
1672        nothing = None
1673        expressions = None
1674        key = None
1675        constraint = None
1676
1677        if conflict:
1678            if self._match_text_seq("ON", "CONSTRAINT"):
1679                constraint = self._parse_id_var()
1680            else:
1681                key = self._parse_csv(self._parse_value)
1682
1683        self._match_text_seq("DO")
1684        if self._match_text_seq("NOTHING"):
1685            nothing = True
1686        else:
1687            self._match(TokenType.UPDATE)
1688            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1689
1690        return self.expression(
1691            exp.OnConflict,
1692            duplicate=duplicate,
1693            expressions=expressions,
1694            nothing=nothing,
1695            key=key,
1696            constraint=constraint,
1697        )
1698
1699    def _parse_returning(self) -> t.Optional[exp.Expression]:
1700        if not self._match(TokenType.RETURNING):
1701            return None
1702
1703        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1704
1705    def _parse_row(self) -> t.Optional[exp.Expression]:
1706        if not self._match(TokenType.FORMAT):
1707            return None
1708        return self._parse_row_format()
1709
1710    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1711        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1712            return None
1713
1714        if self._match_text_seq("SERDE"):
1715            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1716
1717        self._match_text_seq("DELIMITED")
1718
1719        kwargs = {}
1720
1721        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1722            kwargs["fields"] = self._parse_string()
1723            if self._match_text_seq("ESCAPED", "BY"):
1724                kwargs["escaped"] = self._parse_string()
1725        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1726            kwargs["collection_items"] = self._parse_string()
1727        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1728            kwargs["map_keys"] = self._parse_string()
1729        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1730            kwargs["lines"] = self._parse_string()
1731        if self._match_text_seq("NULL", "DEFINED", "AS"):
1732            kwargs["null"] = self._parse_string()
1733
1734        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1735
1736    def _parse_load(self) -> exp.Expression:
1737        if self._match_text_seq("DATA"):
1738            local = self._match_text_seq("LOCAL")
1739            self._match_text_seq("INPATH")
1740            inpath = self._parse_string()
1741            overwrite = self._match(TokenType.OVERWRITE)
1742            self._match_pair(TokenType.INTO, TokenType.TABLE)
1743
1744            return self.expression(
1745                exp.LoadData,
1746                this=self._parse_table(schema=True),
1747                local=local,
1748                overwrite=overwrite,
1749                inpath=inpath,
1750                partition=self._parse_partition(),
1751                input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1752                serde=self._match_text_seq("SERDE") and self._parse_string(),
1753            )
1754        return self._parse_as_command(self._prev)
1755
1756    def _parse_delete(self) -> exp.Expression:
1757        self._match(TokenType.FROM)
1758
1759        return self.expression(
1760            exp.Delete,
1761            this=self._parse_table(),
1762            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1763            where=self._parse_where(),
1764            returning=self._parse_returning(),
1765        )
1766
1767    def _parse_update(self) -> exp.Expression:
1768        return self.expression(
1769            exp.Update,
1770            **{  # type: ignore
1771                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1772                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1773                "from": self._parse_from(modifiers=True),
1774                "where": self._parse_where(),
1775                "returning": self._parse_returning(),
1776            },
1777        )
1778
1779    def _parse_uncache(self) -> exp.Expression:
1780        if not self._match(TokenType.TABLE):
1781            self.raise_error("Expecting TABLE after UNCACHE")
1782
1783        return self.expression(
1784            exp.Uncache,
1785            exists=self._parse_exists(),
1786            this=self._parse_table(schema=True),
1787        )
1788
1789    def _parse_cache(self) -> exp.Expression:
1790        lazy = self._match_text_seq("LAZY")
1791        self._match(TokenType.TABLE)
1792        table = self._parse_table(schema=True)
1793        options = []
1794
1795        if self._match_text_seq("OPTIONS"):
1796            self._match_l_paren()
1797            k = self._parse_string()
1798            self._match(TokenType.EQ)
1799            v = self._parse_string()
1800            options = [k, v]
1801            self._match_r_paren()
1802
1803        self._match(TokenType.ALIAS)
1804        return self.expression(
1805            exp.Cache,
1806            this=table,
1807            lazy=lazy,
1808            options=options,
1809            expression=self._parse_select(nested=True),
1810        )
1811
1812    def _parse_partition(self) -> t.Optional[exp.Expression]:
1813        if not self._match(TokenType.PARTITION):
1814            return None
1815
1816        return self.expression(
1817            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1818        )
1819
1820    def _parse_value(self) -> exp.Expression:
1821        if self._match(TokenType.L_PAREN):
1822            expressions = self._parse_csv(self._parse_conjunction)
1823            self._match_r_paren()
1824            return self.expression(exp.Tuple, expressions=expressions)
1825
1826        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1827        # Source: https://prestodb.io/docs/current/sql/values.html
1828        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1829
1830    def _parse_select(
1831        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1832    ) -> t.Optional[exp.Expression]:
1833        cte = self._parse_with()
1834        if cte:
1835            this = self._parse_statement()
1836
1837            if not this:
1838                self.raise_error("Failed to parse any statement following CTE")
1839                return cte
1840
1841            if "with" in this.arg_types:
1842                this.set("with", cte)
1843            else:
1844                self.raise_error(f"{this.key} does not support CTE")
1845                this = cte
1846        elif self._match(TokenType.SELECT):
1847            comments = self._prev_comments
1848
1849            hint = self._parse_hint()
1850            all_ = self._match(TokenType.ALL)
1851            distinct = self._match(TokenType.DISTINCT)
1852
1853            kind = (
1854                self._match(TokenType.ALIAS)
1855                and self._match_texts(("STRUCT", "VALUE"))
1856                and self._prev.text
1857            )
1858
1859            if distinct:
1860                distinct = self.expression(
1861                    exp.Distinct,
1862                    on=self._parse_value() if self._match(TokenType.ON) else None,
1863                )
1864
1865            if all_ and distinct:
1866                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1867
1868            limit = self._parse_limit(top=True)
1869            expressions = self._parse_csv(self._parse_expression)
1870
1871            this = self.expression(
1872                exp.Select,
1873                kind=kind,
1874                hint=hint,
1875                distinct=distinct,
1876                expressions=expressions,
1877                limit=limit,
1878            )
1879            this.comments = comments
1880
1881            into = self._parse_into()
1882            if into:
1883                this.set("into", into)
1884
1885            from_ = self._parse_from()
1886            if from_:
1887                this.set("from", from_)
1888
1889            this = self._parse_query_modifiers(this)
1890        elif (table or nested) and self._match(TokenType.L_PAREN):
1891            this = self._parse_table() if table else self._parse_select(nested=True)
1892            this = self._parse_set_operations(self._parse_query_modifiers(this))
1893            self._match_r_paren()
1894
1895            # early return so that subquery unions aren't parsed again
1896            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1897            # Union ALL should be a property of the top select node, not the subquery
1898            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1899        elif self._match(TokenType.VALUES):
1900            this = self.expression(
1901                exp.Values,
1902                expressions=self._parse_csv(self._parse_value),
1903                alias=self._parse_table_alias(),
1904            )
1905        elif self._match(TokenType.PIVOT):
1906            this = self._parse_simplified_pivot()
1907        elif self._match(TokenType.FROM):
1908            this = exp.select("*").from_(t.cast(exp.From, self._parse_from(skip_from_token=True)))
1909        else:
1910            this = None
1911
1912        return self._parse_set_operations(this)
1913
1914    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1915        if not skip_with_token and not self._match(TokenType.WITH):
1916            return None
1917
1918        comments = self._prev_comments
1919        recursive = self._match(TokenType.RECURSIVE)
1920
1921        expressions = []
1922        while True:
1923            expressions.append(self._parse_cte())
1924
1925            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1926                break
1927            else:
1928                self._match(TokenType.WITH)
1929
1930        return self.expression(
1931            exp.With, comments=comments, expressions=expressions, recursive=recursive
1932        )
1933
1934    def _parse_cte(self) -> exp.Expression:
1935        alias = self._parse_table_alias()
1936        if not alias or not alias.this:
1937            self.raise_error("Expected CTE to have alias")
1938
1939        self._match(TokenType.ALIAS)
1940
1941        return self.expression(
1942            exp.CTE,
1943            this=self._parse_wrapped(self._parse_statement),
1944            alias=alias,
1945        )
1946
1947    def _parse_table_alias(
1948        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1949    ) -> t.Optional[exp.Expression]:
1950        any_token = self._match(TokenType.ALIAS)
1951        alias = (
1952            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1953            or self._parse_string_as_identifier()
1954        )
1955
1956        index = self._index
1957        if self._match(TokenType.L_PAREN):
1958            columns = self._parse_csv(self._parse_function_parameter)
1959            self._match_r_paren() if columns else self._retreat(index)
1960        else:
1961            columns = None
1962
1963        if not alias and not columns:
1964            return None
1965
1966        return self.expression(exp.TableAlias, this=alias, columns=columns)
1967
1968    def _parse_subquery(
1969        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1970    ) -> t.Optional[exp.Expression]:
1971        if not this:
1972            return None
1973        return self.expression(
1974            exp.Subquery,
1975            this=this,
1976            pivots=self._parse_pivots(),
1977            alias=self._parse_table_alias() if parse_alias else None,
1978        )
1979
1980    def _parse_query_modifiers(
1981        self, this: t.Optional[exp.Expression]
1982    ) -> t.Optional[exp.Expression]:
1983        if isinstance(this, self.MODIFIABLES):
1984            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1985                expression = parser(self)
1986
1987                if expression:
1988                    this.set(key, expression)
1989        return this
1990
1991    def _parse_hint(self) -> t.Optional[exp.Expression]:
1992        if self._match(TokenType.HINT):
1993            hints = self._parse_csv(self._parse_function)
1994            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1995                self.raise_error("Expected */ after HINT")
1996            return self.expression(exp.Hint, expressions=hints)
1997
1998        return None
1999
2000    def _parse_into(self) -> t.Optional[exp.Expression]:
2001        if not self._match(TokenType.INTO):
2002            return None
2003
2004        temp = self._match(TokenType.TEMPORARY)
2005        unlogged = self._match_text_seq("UNLOGGED")
2006        self._match(TokenType.TABLE)
2007
2008        return self.expression(
2009            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2010        )
2011
2012    def _parse_from(
2013        self, modifiers: bool = False, skip_from_token: bool = False
2014    ) -> t.Optional[exp.From]:
2015        if not skip_from_token and not self._match(TokenType.FROM):
2016            return None
2017
2018        comments = self._prev_comments
2019        this = self._parse_table()
2020
2021        return self.expression(
2022            exp.From,
2023            comments=comments,
2024            this=self._parse_query_modifiers(this) if modifiers else this,
2025        )
2026
2027    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2028        if not self._match(TokenType.MATCH_RECOGNIZE):
2029            return None
2030
2031        self._match_l_paren()
2032
2033        partition = self._parse_partition_by()
2034        order = self._parse_order()
2035        measures = (
2036            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2037        )
2038
2039        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2040            rows = exp.Var(this="ONE ROW PER MATCH")
2041        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2042            text = "ALL ROWS PER MATCH"
2043            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2044                text += f" SHOW EMPTY MATCHES"
2045            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2046                text += f" OMIT EMPTY MATCHES"
2047            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2048                text += f" WITH UNMATCHED ROWS"
2049            rows = exp.Var(this=text)
2050        else:
2051            rows = None
2052
2053        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2054            text = "AFTER MATCH SKIP"
2055            if self._match_text_seq("PAST", "LAST", "ROW"):
2056                text += f" PAST LAST ROW"
2057            elif self._match_text_seq("TO", "NEXT", "ROW"):
2058                text += f" TO NEXT ROW"
2059            elif self._match_text_seq("TO", "FIRST"):
2060                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2061            elif self._match_text_seq("TO", "LAST"):
2062                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2063            after = exp.Var(this=text)
2064        else:
2065            after = None
2066
2067        if self._match_text_seq("PATTERN"):
2068            self._match_l_paren()
2069
2070            if not self._curr:
2071                self.raise_error("Expecting )", self._curr)
2072
2073            paren = 1
2074            start = self._curr
2075
2076            while self._curr and paren > 0:
2077                if self._curr.token_type == TokenType.L_PAREN:
2078                    paren += 1
2079                if self._curr.token_type == TokenType.R_PAREN:
2080                    paren -= 1
2081                end = self._prev
2082                self._advance()
2083            if paren > 0:
2084                self.raise_error("Expecting )", self._curr)
2085            pattern = exp.Var(this=self._find_sql(start, end))
2086        else:
2087            pattern = None
2088
2089        define = (
2090            self._parse_csv(
2091                lambda: self.expression(
2092                    exp.Alias,
2093                    alias=self._parse_id_var(any_token=True),
2094                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2095                )
2096            )
2097            if self._match_text_seq("DEFINE")
2098            else None
2099        )
2100
2101        self._match_r_paren()
2102
2103        return self.expression(
2104            exp.MatchRecognize,
2105            partition_by=partition,
2106            order=order,
2107            measures=measures,
2108            rows=rows,
2109            after=after,
2110            pattern=pattern,
2111            define=define,
2112            alias=self._parse_table_alias(),
2113        )
2114
2115    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2116        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2117        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2118
2119        if outer_apply or cross_apply:
2120            this = self._parse_select(table=True)
2121            view = None
2122            outer = not cross_apply
2123        elif self._match(TokenType.LATERAL):
2124            this = self._parse_select(table=True)
2125            view = self._match(TokenType.VIEW)
2126            outer = self._match(TokenType.OUTER)
2127        else:
2128            return None
2129
2130        if not this:
2131            this = self._parse_function() or self._parse_id_var(any_token=False)
2132            while self._match(TokenType.DOT):
2133                this = exp.Dot(
2134                    this=this,
2135                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2136                )
2137
2138        table_alias: t.Optional[exp.Expression]
2139
2140        if view:
2141            table = self._parse_id_var(any_token=False)
2142            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2143            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2144        else:
2145            table_alias = self._parse_table_alias()
2146
2147        expression = self.expression(
2148            exp.Lateral,
2149            this=this,
2150            view=view,
2151            outer=outer,
2152            alias=table_alias,
2153        )
2154
2155        return expression
2156
2157    def _parse_join_side_and_kind(
2158        self,
2159    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2160        return (
2161            self._match(TokenType.NATURAL) and self._prev,
2162            self._match_set(self.JOIN_SIDES) and self._prev,
2163            self._match_set(self.JOIN_KINDS) and self._prev,
2164        )
2165
2166    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2167        if self._match(TokenType.COMMA):
2168            return self.expression(exp.Join, this=self._parse_table())
2169
2170        index = self._index
2171        natural, side, kind = self._parse_join_side_and_kind()
2172        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2173        join = self._match(TokenType.JOIN)
2174
2175        if not skip_join_token and not join:
2176            self._retreat(index)
2177            kind = None
2178            natural = None
2179            side = None
2180
2181        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2182        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2183
2184        if not skip_join_token and not join and not outer_apply and not cross_apply:
2185            return None
2186
2187        if outer_apply:
2188            side = Token(TokenType.LEFT, "LEFT")
2189
2190        kwargs: t.Dict[
2191            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2192        ] = {"this": self._parse_table()}
2193
2194        if natural:
2195            kwargs["natural"] = True
2196        if side:
2197            kwargs["side"] = side.text
2198        if kind:
2199            kwargs["kind"] = kind.text
2200        if hint:
2201            kwargs["hint"] = hint
2202
2203        if self._match(TokenType.ON):
2204            kwargs["on"] = self._parse_conjunction()
2205        elif self._match(TokenType.USING):
2206            kwargs["using"] = self._parse_wrapped_id_vars()
2207
2208        return self.expression(exp.Join, **kwargs)  # type: ignore
2209
2210    def _parse_index(
2211        self,
2212        index: t.Optional[exp.Expression] = None,
2213    ) -> t.Optional[exp.Expression]:
2214        if index:
2215            unique = None
2216            primary = None
2217            amp = None
2218
2219            self._match(TokenType.ON)
2220            self._match(TokenType.TABLE)  # hive
2221            table = self._parse_table_parts(schema=True)
2222        else:
2223            unique = self._match(TokenType.UNIQUE)
2224            primary = self._match_text_seq("PRIMARY")
2225            amp = self._match_text_seq("AMP")
2226            if not self._match(TokenType.INDEX):
2227                return None
2228            index = self._parse_id_var()
2229            table = None
2230
2231        if self._match(TokenType.L_PAREN, advance=False):
2232            columns = self._parse_wrapped_csv(self._parse_ordered)
2233        else:
2234            columns = None
2235
2236        return self.expression(
2237            exp.Index,
2238            this=index,
2239            table=table,
2240            columns=columns,
2241            unique=unique,
2242            primary=primary,
2243            amp=amp,
2244            partition_by=self._parse_partition_by(),
2245        )
2246
2247    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2248        return (
2249            (not schema and self._parse_function())
2250            or self._parse_id_var(any_token=False)
2251            or self._parse_string_as_identifier()
2252            or self._parse_placeholder()
2253        )
2254
2255    def _parse_table_parts(self, schema: bool = False) -> exp.Table:
2256        catalog = None
2257        db = None
2258        table = self._parse_table_part(schema=schema)
2259
2260        while self._match(TokenType.DOT):
2261            if catalog:
2262                # This allows nesting the table in arbitrarily many dot expressions if needed
2263                table = self.expression(
2264                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2265                )
2266            else:
2267                catalog = db
2268                db = table
2269                table = self._parse_table_part(schema=schema)
2270
2271        if not table:
2272            self.raise_error(f"Expected table name but got {self._curr}")
2273
2274        return self.expression(
2275            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2276        )
2277
2278    def _parse_table(
2279        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2280    ) -> t.Optional[exp.Expression]:
2281        lateral = self._parse_lateral()
2282        if lateral:
2283            return lateral
2284
2285        unnest = self._parse_unnest()
2286        if unnest:
2287            return unnest
2288
2289        values = self._parse_derived_table_values()
2290        if values:
2291            return values
2292
2293        subquery = self._parse_select(table=True)
2294        if subquery:
2295            if not subquery.args.get("pivots"):
2296                subquery.set("pivots", self._parse_pivots())
2297            return subquery
2298
2299        this: exp.Expression = self._parse_table_parts(schema=schema)
2300
2301        if schema:
2302            return self._parse_schema(this=this)
2303
2304        if self.alias_post_tablesample:
2305            table_sample = self._parse_table_sample()
2306
2307        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2308        if alias:
2309            this.set("alias", alias)
2310
2311        if not this.args.get("pivots"):
2312            this.set("pivots", self._parse_pivots())
2313
2314        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2315            this.set(
2316                "hints",
2317                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2318            )
2319            self._match_r_paren()
2320
2321        if not self.alias_post_tablesample:
2322            table_sample = self._parse_table_sample()
2323
2324        if table_sample:
2325            table_sample.set("this", this)
2326            this = table_sample
2327
2328        return this
2329
2330    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2331        if not self._match(TokenType.UNNEST):
2332            return None
2333
2334        expressions = self._parse_wrapped_csv(self._parse_type)
2335        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2336        alias = self._parse_table_alias()
2337
2338        if alias and self.unnest_column_only:
2339            if alias.args.get("columns"):
2340                self.raise_error("Unexpected extra column alias in unnest.")
2341            alias.set("columns", [alias.this])
2342            alias.set("this", None)
2343
2344        offset = None
2345        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2346            self._match(TokenType.ALIAS)
2347            offset = self._parse_id_var() or exp.Identifier(this="offset")
2348
2349        return self.expression(
2350            exp.Unnest,
2351            expressions=expressions,
2352            ordinality=ordinality,
2353            alias=alias,
2354            offset=offset,
2355        )
2356
2357    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2358        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2359        if not is_derived and not self._match(TokenType.VALUES):
2360            return None
2361
2362        expressions = self._parse_csv(self._parse_value)
2363
2364        if is_derived:
2365            self._match_r_paren()
2366
2367        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2368
2369    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2370        if not self._match(TokenType.TABLE_SAMPLE) and not (
2371            as_modifier and self._match_text_seq("USING", "SAMPLE")
2372        ):
2373            return None
2374
2375        bucket_numerator = None
2376        bucket_denominator = None
2377        bucket_field = None
2378        percent = None
2379        rows = None
2380        size = None
2381        seed = None
2382
2383        kind = (
2384            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2385        )
2386        method = self._parse_var(tokens=(TokenType.ROW,))
2387
2388        self._match(TokenType.L_PAREN)
2389
2390        num = self._parse_number()
2391
2392        if self._match_text_seq("BUCKET"):
2393            bucket_numerator = self._parse_number()
2394            self._match_text_seq("OUT", "OF")
2395            bucket_denominator = bucket_denominator = self._parse_number()
2396            self._match(TokenType.ON)
2397            bucket_field = self._parse_field()
2398        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2399            percent = num
2400        elif self._match(TokenType.ROWS):
2401            rows = num
2402        else:
2403            size = num
2404
2405        self._match(TokenType.R_PAREN)
2406
2407        if self._match(TokenType.L_PAREN):
2408            method = self._parse_var()
2409            seed = self._match(TokenType.COMMA) and self._parse_number()
2410            self._match_r_paren()
2411        elif self._match_texts(("SEED", "REPEATABLE")):
2412            seed = self._parse_wrapped(self._parse_number)
2413
2414        return self.expression(
2415            exp.TableSample,
2416            method=method,
2417            bucket_numerator=bucket_numerator,
2418            bucket_denominator=bucket_denominator,
2419            bucket_field=bucket_field,
2420            percent=percent,
2421            rows=rows,
2422            size=size,
2423            seed=seed,
2424            kind=kind,
2425        )
2426
2427    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2428        return list(iter(self._parse_pivot, None))
2429
2430    # https://duckdb.org/docs/sql/statements/pivot
2431    def _parse_simplified_pivot(self) -> exp.Pivot:
2432        def _parse_on() -> t.Optional[exp.Expression]:
2433            this = self._parse_bitwise()
2434            return self._parse_in(this) if self._match(TokenType.IN) else this
2435
2436        this = self._parse_table()
2437        expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on)
2438        using = self._match(TokenType.USING) and self._parse_csv(
2439            lambda: self._parse_alias(self._parse_function())
2440        )
2441        group = self._parse_group()
2442        return self.expression(
2443            exp.Pivot, this=this, expressions=expressions, using=using, group=group
2444        )
2445
2446    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2447        index = self._index
2448
2449        if self._match(TokenType.PIVOT):
2450            unpivot = False
2451        elif self._match(TokenType.UNPIVOT):
2452            unpivot = True
2453        else:
2454            return None
2455
2456        expressions = []
2457        field = None
2458
2459        if not self._match(TokenType.L_PAREN):
2460            self._retreat(index)
2461            return None
2462
2463        if unpivot:
2464            expressions = self._parse_csv(self._parse_column)
2465        else:
2466            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2467
2468        if not expressions:
2469            self.raise_error("Failed to parse PIVOT's aggregation list")
2470
2471        if not self._match(TokenType.FOR):
2472            self.raise_error("Expecting FOR")
2473
2474        value = self._parse_column()
2475
2476        if not self._match(TokenType.IN):
2477            self.raise_error("Expecting IN")
2478
2479        field = self._parse_in(value, alias=True)
2480
2481        self._match_r_paren()
2482
2483        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2484
2485        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2486            pivot.set("alias", self._parse_table_alias())
2487
2488        if not unpivot:
2489            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2490
2491            columns: t.List[exp.Expression] = []
2492            for fld in pivot.args["field"].expressions:
2493                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2494                for name in names:
2495                    if self.PREFIXED_PIVOT_COLUMNS:
2496                        name = f"{name}_{field_name}" if name else field_name
2497                    else:
2498                        name = f"{field_name}_{name}" if name else field_name
2499
2500                    columns.append(exp.to_identifier(name))
2501
2502            pivot.set("columns", columns)
2503
2504        return pivot
2505
2506    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2507        return [agg.alias for agg in aggregations]
2508
2509    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2510        if not skip_where_token and not self._match(TokenType.WHERE):
2511            return None
2512
2513        return self.expression(
2514            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2515        )
2516
2517    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2518        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2519            return None
2520
2521        elements = defaultdict(list)
2522
2523        while True:
2524            expressions = self._parse_csv(self._parse_conjunction)
2525            if expressions:
2526                elements["expressions"].extend(expressions)
2527
2528            grouping_sets = self._parse_grouping_sets()
2529            if grouping_sets:
2530                elements["grouping_sets"].extend(grouping_sets)
2531
2532            rollup = None
2533            cube = None
2534            totals = None
2535
2536            with_ = self._match(TokenType.WITH)
2537            if self._match(TokenType.ROLLUP):
2538                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2539                elements["rollup"].extend(ensure_list(rollup))
2540
2541            if self._match(TokenType.CUBE):
2542                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2543                elements["cube"].extend(ensure_list(cube))
2544
2545            if self._match_text_seq("TOTALS"):
2546                totals = True
2547                elements["totals"] = True  # type: ignore
2548
2549            if not (grouping_sets or rollup or cube or totals):
2550                break
2551
2552        return self.expression(exp.Group, **elements)  # type: ignore
2553
2554    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2555        if not self._match(TokenType.GROUPING_SETS):
2556            return None
2557
2558        return self._parse_wrapped_csv(self._parse_grouping_set)
2559
2560    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2561        if self._match(TokenType.L_PAREN):
2562            grouping_set = self._parse_csv(self._parse_column)
2563            self._match_r_paren()
2564            return self.expression(exp.Tuple, expressions=grouping_set)
2565
2566        return self._parse_column()
2567
2568    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2569        if not skip_having_token and not self._match(TokenType.HAVING):
2570            return None
2571        return self.expression(exp.Having, this=self._parse_conjunction())
2572
2573    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2574        if not self._match(TokenType.QUALIFY):
2575            return None
2576        return self.expression(exp.Qualify, this=self._parse_conjunction())
2577
2578    def _parse_order(
2579        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2580    ) -> t.Optional[exp.Expression]:
2581        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2582            return this
2583
2584        return self.expression(
2585            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2586        )
2587
2588    def _parse_sort(
2589        self, exp_class: t.Type[exp.Expression], *texts: str
2590    ) -> t.Optional[exp.Expression]:
2591        if not self._match_text_seq(*texts):
2592            return None
2593        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2594
2595    def _parse_ordered(self) -> exp.Expression:
2596        this = self._parse_conjunction()
2597        self._match(TokenType.ASC)
2598        is_desc = self._match(TokenType.DESC)
2599        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
2600        is_nulls_last = self._match_text_seq("NULLS", "LAST")
2601        desc = is_desc or False
2602        asc = not desc
2603        nulls_first = is_nulls_first or False
2604        explicitly_null_ordered = is_nulls_first or is_nulls_last
2605        if (
2606            not explicitly_null_ordered
2607            and (
2608                (asc and self.null_ordering == "nulls_are_small")
2609                or (desc and self.null_ordering != "nulls_are_small")
2610            )
2611            and self.null_ordering != "nulls_are_last"
2612        ):
2613            nulls_first = True
2614
2615        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2616
2617    def _parse_limit(
2618        self, this: t.Optional[exp.Expression] = None, top: bool = False
2619    ) -> t.Optional[exp.Expression]:
2620        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2621            limit_paren = self._match(TokenType.L_PAREN)
2622            limit_exp = self.expression(
2623                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2624            )
2625
2626            if limit_paren:
2627                self._match_r_paren()
2628
2629            return limit_exp
2630
2631        if self._match(TokenType.FETCH):
2632            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2633            direction = self._prev.text if direction else "FIRST"
2634
2635            count = self._parse_number()
2636            percent = self._match(TokenType.PERCENT)
2637
2638            self._match_set((TokenType.ROW, TokenType.ROWS))
2639
2640            only = self._match_text_seq("ONLY")
2641            with_ties = self._match_text_seq("WITH", "TIES")
2642
2643            if only and with_ties:
2644                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2645
2646            return self.expression(
2647                exp.Fetch,
2648                direction=direction,
2649                count=count,
2650                percent=percent,
2651                with_ties=with_ties,
2652            )
2653
2654        return this
2655
2656    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2657        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2658            return this
2659
2660        count = self._parse_number()
2661        self._match_set((TokenType.ROW, TokenType.ROWS))
2662        return self.expression(exp.Offset, this=this, expression=count)
2663
2664    def _parse_locks(self) -> t.List[exp.Expression]:
2665        # Lists are invariant, so we need to use a type hint here
2666        locks: t.List[exp.Expression] = []
2667
2668        while True:
2669            if self._match_text_seq("FOR", "UPDATE"):
2670                update = True
2671            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2672                "LOCK", "IN", "SHARE", "MODE"
2673            ):
2674                update = False
2675            else:
2676                break
2677
2678            expressions = None
2679            if self._match_text_seq("OF"):
2680                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2681
2682            wait: t.Optional[bool | exp.Expression] = None
2683            if self._match_text_seq("NOWAIT"):
2684                wait = True
2685            elif self._match_text_seq("WAIT"):
2686                wait = self._parse_primary()
2687            elif self._match_text_seq("SKIP", "LOCKED"):
2688                wait = False
2689
2690            locks.append(
2691                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2692            )
2693
2694        return locks
2695
2696    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2697        if not self._match_set(self.SET_OPERATIONS):
2698            return this
2699
2700        token_type = self._prev.token_type
2701
2702        if token_type == TokenType.UNION:
2703            expression = exp.Union
2704        elif token_type == TokenType.EXCEPT:
2705            expression = exp.Except
2706        else:
2707            expression = exp.Intersect
2708
2709        return self.expression(
2710            expression,
2711            this=this,
2712            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2713            expression=self._parse_set_operations(self._parse_select(nested=True)),
2714        )
2715
2716    def _parse_expression(self) -> t.Optional[exp.Expression]:
2717        return self._parse_alias(self._parse_conjunction())
2718
2719    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2720        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2721
2722    def _parse_equality(self) -> t.Optional[exp.Expression]:
2723        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2724
2725    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2726        return self._parse_tokens(self._parse_range, self.COMPARISON)
2727
2728    def _parse_range(self) -> t.Optional[exp.Expression]:
2729        this = self._parse_bitwise()
2730        negate = self._match(TokenType.NOT)
2731
2732        if self._match_set(self.RANGE_PARSERS):
2733            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2734            if not expression:
2735                return this
2736
2737            this = expression
2738        elif self._match(TokenType.ISNULL):
2739            this = self.expression(exp.Is, this=this, expression=exp.Null())
2740
2741        # Postgres supports ISNULL and NOTNULL for conditions.
2742        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2743        if self._match(TokenType.NOTNULL):
2744            this = self.expression(exp.Is, this=this, expression=exp.Null())
2745            this = self.expression(exp.Not, this=this)
2746
2747        if negate:
2748            this = self.expression(exp.Not, this=this)
2749
2750        if self._match(TokenType.IS):
2751            this = self._parse_is(this)
2752
2753        return this
2754
2755    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2756        index = self._index - 1
2757        negate = self._match(TokenType.NOT)
2758        if self._match_text_seq("DISTINCT", "FROM"):
2759            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2760            return self.expression(klass, this=this, expression=self._parse_expression())
2761
2762        expression = self._parse_null() or self._parse_boolean()
2763        if not expression:
2764            self._retreat(index)
2765            return None
2766
2767        this = self.expression(exp.Is, this=this, expression=expression)
2768        return self.expression(exp.Not, this=this) if negate else this
2769
2770    def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In:
2771        unnest = self._parse_unnest()
2772        if unnest:
2773            this = self.expression(exp.In, this=this, unnest=unnest)
2774        elif self._match(TokenType.L_PAREN):
2775            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
2776
2777            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2778                this = self.expression(exp.In, this=this, query=expressions[0])
2779            else:
2780                this = self.expression(exp.In, this=this, expressions=expressions)
2781
2782            self._match_r_paren(this)
2783        else:
2784            this = self.expression(exp.In, this=this, field=self._parse_field())
2785
2786        return this
2787
2788    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2789        low = self._parse_bitwise()
2790        self._match(TokenType.AND)
2791        high = self._parse_bitwise()
2792        return self.expression(exp.Between, this=this, low=low, high=high)
2793
2794    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2795        if not self._match(TokenType.ESCAPE):
2796            return this
2797        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2798
2799    def _parse_interval(self) -> t.Optional[exp.Expression]:
2800        if not self._match(TokenType.INTERVAL):
2801            return None
2802
2803        this = self._parse_primary() or self._parse_term()
2804        unit = self._parse_function() or self._parse_var()
2805
2806        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2807        # each INTERVAL expression into this canonical form so it's easy to transpile
2808        if this and this.is_number:
2809            this = exp.Literal.string(this.name)
2810        elif this and this.is_string:
2811            parts = this.name.split()
2812
2813            if len(parts) == 2:
2814                if unit:
2815                    # this is not actually a unit, it's something else
2816                    unit = None
2817                    self._retreat(self._index - 1)
2818                else:
2819                    this = exp.Literal.string(parts[0])
2820                    unit = self.expression(exp.Var, this=parts[1])
2821
2822        return self.expression(exp.Interval, this=this, unit=unit)
2823
2824    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2825        this = self._parse_term()
2826
2827        while True:
2828            if self._match_set(self.BITWISE):
2829                this = self.expression(
2830                    self.BITWISE[self._prev.token_type],
2831                    this=this,
2832                    expression=self._parse_term(),
2833                )
2834            elif self._match_pair(TokenType.LT, TokenType.LT):
2835                this = self.expression(
2836                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2837                )
2838            elif self._match_pair(TokenType.GT, TokenType.GT):
2839                this = self.expression(
2840                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2841                )
2842            else:
2843                break
2844
2845        return this
2846
2847    def _parse_term(self) -> t.Optional[exp.Expression]:
2848        return self._parse_tokens(self._parse_factor, self.TERM)
2849
2850    def _parse_factor(self) -> t.Optional[exp.Expression]:
2851        return self._parse_tokens(self._parse_unary, self.FACTOR)
2852
2853    def _parse_unary(self) -> t.Optional[exp.Expression]:
2854        if self._match_set(self.UNARY_PARSERS):
2855            return self.UNARY_PARSERS[self._prev.token_type](self)
2856        return self._parse_at_time_zone(self._parse_type())
2857
2858    def _parse_type(self) -> t.Optional[exp.Expression]:
2859        interval = self._parse_interval()
2860        if interval:
2861            return interval
2862
2863        index = self._index
2864        data_type = self._parse_types(check_func=True)
2865        this = self._parse_column()
2866
2867        if data_type:
2868            if isinstance(this, exp.Literal):
2869                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2870                if parser:
2871                    return parser(self, this, data_type)
2872                return self.expression(exp.Cast, this=this, to=data_type)
2873            if not data_type.expressions:
2874                self._retreat(index)
2875                return self._parse_column()
2876            return self._parse_column_ops(data_type)
2877
2878        return this
2879
2880    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2881        this = self._parse_type()
2882        if not this:
2883            return None
2884
2885        return self.expression(
2886            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2887        )
2888
2889    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2890        index = self._index
2891
2892        prefix = self._match_text_seq("SYSUDTLIB", ".")
2893
2894        if not self._match_set(self.TYPE_TOKENS):
2895            return None
2896
2897        type_token = self._prev.token_type
2898
2899        if type_token == TokenType.PSEUDO_TYPE:
2900            return self.expression(exp.PseudoType, this=self._prev.text)
2901
2902        nested = type_token in self.NESTED_TYPE_TOKENS
2903        is_struct = type_token == TokenType.STRUCT
2904        expressions = None
2905        maybe_func = False
2906
2907        if self._match(TokenType.L_PAREN):
2908            if is_struct:
2909                expressions = self._parse_csv(self._parse_struct_types)
2910            elif nested:
2911                expressions = self._parse_csv(self._parse_types)
2912            else:
2913                expressions = self._parse_csv(self._parse_type_size)
2914
2915            if not expressions or not self._match(TokenType.R_PAREN):
2916                self._retreat(index)
2917                return None
2918
2919            maybe_func = True
2920
2921        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2922            this = exp.DataType(
2923                this=exp.DataType.Type.ARRAY,
2924                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2925                nested=True,
2926            )
2927
2928            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2929                this = exp.DataType(
2930                    this=exp.DataType.Type.ARRAY,
2931                    expressions=[this],
2932                    nested=True,
2933                )
2934
2935            return this
2936
2937        if self._match(TokenType.L_BRACKET):
2938            self._retreat(index)
2939            return None
2940
2941        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2942        if nested and self._match(TokenType.LT):
2943            if is_struct:
2944                expressions = self._parse_csv(self._parse_struct_types)
2945            else:
2946                expressions = self._parse_csv(self._parse_types)
2947
2948            if not self._match(TokenType.GT):
2949                self.raise_error("Expecting >")
2950
2951            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2952                values = self._parse_csv(self._parse_conjunction)
2953                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2954
2955        value: t.Optional[exp.Expression] = None
2956        if type_token in self.TIMESTAMPS:
2957            if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ:
2958                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2959            elif (
2960                self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE")
2961                or type_token == TokenType.TIMESTAMPLTZ
2962            ):
2963                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2964            elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
2965                if type_token == TokenType.TIME:
2966                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2967                else:
2968                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2969
2970            maybe_func = maybe_func and value is None
2971
2972            if value is None:
2973                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2974        elif type_token == TokenType.INTERVAL:
2975            unit = self._parse_var()
2976
2977            if not unit:
2978                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2979            else:
2980                value = self.expression(exp.Interval, unit=unit)
2981
2982        if maybe_func and check_func:
2983            index2 = self._index
2984            peek = self._parse_string()
2985
2986            if not peek:
2987                self._retreat(index)
2988                return None
2989
2990            self._retreat(index2)
2991
2992        if value:
2993            return value
2994
2995        return exp.DataType(
2996            this=exp.DataType.Type[type_token.value.upper()],
2997            expressions=expressions,
2998            nested=nested,
2999            values=values,
3000            prefix=prefix,
3001        )
3002
3003    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
3004        this = self._parse_type() or self._parse_id_var()
3005        self._match(TokenType.COLON)
3006        return self._parse_column_def(this)
3007
3008    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3009        if not self._match_text_seq("AT", "TIME", "ZONE"):
3010            return this
3011        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
3012
3013    def _parse_column(self) -> t.Optional[exp.Expression]:
3014        this = self._parse_field()
3015        if isinstance(this, exp.Identifier):
3016            this = self.expression(exp.Column, this=this)
3017        elif not this:
3018            return self._parse_bracket(this)
3019        return self._parse_column_ops(this)
3020
3021    def _parse_column_ops(self, this: exp.Expression) -> exp.Expression:
3022        this = self._parse_bracket(this)
3023
3024        while self._match_set(self.COLUMN_OPERATORS):
3025            op_token = self._prev.token_type
3026            op = self.COLUMN_OPERATORS.get(op_token)
3027
3028            if op_token == TokenType.DCOLON:
3029                field = self._parse_types()
3030                if not field:
3031                    self.raise_error("Expected type")
3032            elif op and self._curr:
3033                self._advance()
3034                value = self._prev.text
3035                field = (
3036                    exp.Literal.number(value)
3037                    if self._prev.token_type == TokenType.NUMBER
3038                    else exp.Literal.string(value)
3039                )
3040            else:
3041                field = (
3042                    self._parse_star()
3043                    or self._parse_function(anonymous=True)
3044                    or self._parse_id_var()
3045                )
3046
3047            if isinstance(field, exp.Func):
3048                # bigquery allows function calls like x.y.count(...)
3049                # SAFE.SUBSTR(...)
3050                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3051                this = self._replace_columns_with_dots(this)
3052
3053            if op:
3054                this = op(self, this, field)
3055            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3056                this = self.expression(
3057                    exp.Column,
3058                    this=field,
3059                    table=this.this,
3060                    db=this.args.get("table"),
3061                    catalog=this.args.get("db"),
3062                )
3063            else:
3064                this = self.expression(exp.Dot, this=this, expression=field)
3065            this = self._parse_bracket(this)
3066        return this
3067
3068    def _parse_primary(self) -> t.Optional[exp.Expression]:
3069        if self._match_set(self.PRIMARY_PARSERS):
3070            token_type = self._prev.token_type
3071            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3072
3073            if token_type == TokenType.STRING:
3074                expressions = [primary]
3075                while self._match(TokenType.STRING):
3076                    expressions.append(exp.Literal.string(self._prev.text))
3077                if len(expressions) > 1:
3078                    return self.expression(exp.Concat, expressions=expressions)
3079            return primary
3080
3081        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3082            return exp.Literal.number(f"0.{self._prev.text}")
3083
3084        if self._match(TokenType.L_PAREN):
3085            comments = self._prev_comments
3086            query = self._parse_select()
3087
3088            if query:
3089                expressions = [query]
3090            else:
3091                expressions = self._parse_csv(self._parse_expression)
3092
3093            this = self._parse_query_modifiers(seq_get(expressions, 0))
3094
3095            if isinstance(this, exp.Subqueryable):
3096                this = self._parse_set_operations(
3097                    self._parse_subquery(this=this, parse_alias=False)
3098                )
3099            elif len(expressions) > 1:
3100                this = self.expression(exp.Tuple, expressions=expressions)
3101            else:
3102                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3103
3104            if this:
3105                this.add_comments(comments)
3106            self._match_r_paren(expression=this)
3107
3108            return this
3109
3110        return None
3111
3112    def _parse_field(
3113        self,
3114        any_token: bool = False,
3115        tokens: t.Optional[t.Collection[TokenType]] = None,
3116    ) -> t.Optional[exp.Expression]:
3117        return (
3118            self._parse_primary()
3119            or self._parse_function()
3120            or self._parse_id_var(any_token=any_token, tokens=tokens)
3121        )
3122
3123    def _parse_function(
3124        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3125    ) -> t.Optional[exp.Expression]:
3126        if not self._curr:
3127            return None
3128
3129        token_type = self._curr.token_type
3130
3131        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3132            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3133
3134        if not self._next or self._next.token_type != TokenType.L_PAREN:
3135            if token_type in self.NO_PAREN_FUNCTIONS:
3136                self._advance()
3137                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3138
3139            return None
3140
3141        if token_type not in self.FUNC_TOKENS:
3142            return None
3143
3144        this = self._curr.text
3145        upper = this.upper()
3146        self._advance(2)
3147
3148        parser = self.FUNCTION_PARSERS.get(upper)
3149
3150        if parser and not anonymous:
3151            this = parser(self)
3152        else:
3153            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3154
3155            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3156                this = self.expression(subquery_predicate, this=self._parse_select())
3157                self._match_r_paren()
3158                return this
3159
3160            if functions is None:
3161                functions = self.FUNCTIONS
3162
3163            function = functions.get(upper)
3164
3165            alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS
3166            args = self._parse_csv(lambda: self._parse_lambda(alias=alias))
3167
3168            if function and not anonymous:
3169                this = function(args)
3170                self.validate_expression(this, args)
3171            else:
3172                this = self.expression(exp.Anonymous, this=this, expressions=args)
3173
3174        self._match_r_paren(this)
3175        return self._parse_window(this)
3176
3177    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3178        return self._parse_column_def(self._parse_id_var())
3179
3180    def _parse_user_defined_function(
3181        self, kind: t.Optional[TokenType] = None
3182    ) -> t.Optional[exp.Expression]:
3183        this = self._parse_id_var()
3184
3185        while self._match(TokenType.DOT):
3186            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3187
3188        if not self._match(TokenType.L_PAREN):
3189            return this
3190
3191        expressions = self._parse_csv(self._parse_function_parameter)
3192        self._match_r_paren()
3193        return self.expression(
3194            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3195        )
3196
3197    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3198        literal = self._parse_primary()
3199        if literal:
3200            return self.expression(exp.Introducer, this=token.text, expression=literal)
3201
3202        return self.expression(exp.Identifier, this=token.text)
3203
3204    def _parse_session_parameter(self) -> exp.Expression:
3205        kind = None
3206        this = self._parse_id_var() or self._parse_primary()
3207
3208        if this and self._match(TokenType.DOT):
3209            kind = this.name
3210            this = self._parse_var() or self._parse_primary()
3211
3212        return self.expression(exp.SessionParameter, this=this, kind=kind)
3213
3214    def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]:
3215        index = self._index
3216
3217        if self._match(TokenType.L_PAREN):
3218            expressions = self._parse_csv(self._parse_id_var)
3219
3220            if not self._match(TokenType.R_PAREN):
3221                self._retreat(index)
3222        else:
3223            expressions = [self._parse_id_var()]
3224
3225        if self._match_set(self.LAMBDAS):
3226            return self.LAMBDAS[self._prev.token_type](self, expressions)
3227
3228        self._retreat(index)
3229
3230        this: t.Optional[exp.Expression]
3231
3232        if self._match(TokenType.DISTINCT):
3233            this = self.expression(
3234                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3235            )
3236        else:
3237            this = self._parse_select_or_expression(alias=alias)
3238
3239            if isinstance(this, exp.EQ):
3240                left = this.this
3241                if isinstance(left, exp.Column):
3242                    left.replace(exp.Var(this=left.text("this")))
3243
3244        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3245
3246    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3247        index = self._index
3248
3249        if not self.errors:
3250            try:
3251                if self._parse_select(nested=True):
3252                    return this
3253            except ParseError:
3254                pass
3255            finally:
3256                self.errors.clear()
3257                self._retreat(index)
3258
3259        if not self._match(TokenType.L_PAREN):
3260            return this
3261
3262        args = self._parse_csv(
3263            lambda: self._parse_constraint()
3264            or self._parse_column_def(self._parse_field(any_token=True))
3265        )
3266        self._match_r_paren()
3267        return self.expression(exp.Schema, this=this, expressions=args)
3268
3269    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3270        # column defs are not really columns, they're identifiers
3271        if isinstance(this, exp.Column):
3272            this = this.this
3273        kind = self._parse_types()
3274
3275        if self._match_text_seq("FOR", "ORDINALITY"):
3276            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3277
3278        constraints = []
3279        while True:
3280            constraint = self._parse_column_constraint()
3281            if not constraint:
3282                break
3283            constraints.append(constraint)
3284
3285        if not kind and not constraints:
3286            return this
3287
3288        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3289
3290    def _parse_auto_increment(self) -> exp.Expression:
3291        start = None
3292        increment = None
3293
3294        if self._match(TokenType.L_PAREN, advance=False):
3295            args = self._parse_wrapped_csv(self._parse_bitwise)
3296            start = seq_get(args, 0)
3297            increment = seq_get(args, 1)
3298        elif self._match_text_seq("START"):
3299            start = self._parse_bitwise()
3300            self._match_text_seq("INCREMENT")
3301            increment = self._parse_bitwise()
3302
3303        if start and increment:
3304            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3305
3306        return exp.AutoIncrementColumnConstraint()
3307
3308    def _parse_compress(self) -> exp.Expression:
3309        if self._match(TokenType.L_PAREN, advance=False):
3310            return self.expression(
3311                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3312            )
3313
3314        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3315
3316    def _parse_generated_as_identity(self) -> exp.Expression:
3317        if self._match_text_seq("BY", "DEFAULT"):
3318            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3319            this = self.expression(
3320                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3321            )
3322        else:
3323            self._match_text_seq("ALWAYS")
3324            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3325
3326        self._match(TokenType.ALIAS)
3327        identity = self._match_text_seq("IDENTITY")
3328
3329        if self._match(TokenType.L_PAREN):
3330            if self._match_text_seq("START", "WITH"):
3331                this.set("start", self._parse_bitwise())
3332            if self._match_text_seq("INCREMENT", "BY"):
3333                this.set("increment", self._parse_bitwise())
3334            if self._match_text_seq("MINVALUE"):
3335                this.set("minvalue", self._parse_bitwise())
3336            if self._match_text_seq("MAXVALUE"):
3337                this.set("maxvalue", self._parse_bitwise())
3338
3339            if self._match_text_seq("CYCLE"):
3340                this.set("cycle", True)
3341            elif self._match_text_seq("NO", "CYCLE"):
3342                this.set("cycle", False)
3343
3344            if not identity:
3345                this.set("expression", self._parse_bitwise())
3346
3347            self._match_r_paren()
3348
3349        return this
3350
3351    def _parse_inline(self) -> t.Optional[exp.Expression]:
3352        self._match_text_seq("LENGTH")
3353        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3354
3355    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3356        if self._match_text_seq("NULL"):
3357            return self.expression(exp.NotNullColumnConstraint)
3358        if self._match_text_seq("CASESPECIFIC"):
3359            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3360        return None
3361
3362    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3363        if self._match(TokenType.CONSTRAINT):
3364            this = self._parse_id_var()
3365        else:
3366            this = None
3367
3368        if self._match_texts(self.CONSTRAINT_PARSERS):
3369            return self.expression(
3370                exp.ColumnConstraint,
3371                this=this,
3372                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3373            )
3374
3375        return this
3376
3377    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3378        if not self._match(TokenType.CONSTRAINT):
3379            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3380
3381        this = self._parse_id_var()
3382        expressions = []
3383
3384        while True:
3385            constraint = self._parse_unnamed_constraint() or self._parse_function()
3386            if not constraint:
3387                break
3388            expressions.append(constraint)
3389
3390        return self.expression(exp.Constraint, this=this, expressions=expressions)
3391
3392    def _parse_unnamed_constraint(
3393        self, constraints: t.Optional[t.Collection[str]] = None
3394    ) -> t.Optional[exp.Expression]:
3395        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3396            return None
3397
3398        constraint = self._prev.text.upper()
3399        if constraint not in self.CONSTRAINT_PARSERS:
3400            self.raise_error(f"No parser found for schema constraint {constraint}.")
3401
3402        return self.CONSTRAINT_PARSERS[constraint](self)
3403
3404    def _parse_unique(self) -> exp.Expression:
3405        self._match_text_seq("KEY")
3406        return self.expression(
3407            exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False))
3408        )
3409
3410    def _parse_key_constraint_options(self) -> t.List[str]:
3411        options = []
3412        while True:
3413            if not self._curr:
3414                break
3415
3416            if self._match(TokenType.ON):
3417                action = None
3418                on = self._advance_any() and self._prev.text
3419
3420                if self._match_text_seq("NO", "ACTION"):
3421                    action = "NO ACTION"
3422                elif self._match_text_seq("CASCADE"):
3423                    action = "CASCADE"
3424                elif self._match_pair(TokenType.SET, TokenType.NULL):
3425                    action = "SET NULL"
3426                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3427                    action = "SET DEFAULT"
3428                else:
3429                    self.raise_error("Invalid key constraint")
3430
3431                options.append(f"ON {on} {action}")
3432            elif self._match_text_seq("NOT", "ENFORCED"):
3433                options.append("NOT ENFORCED")
3434            elif self._match_text_seq("DEFERRABLE"):
3435                options.append("DEFERRABLE")
3436            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3437                options.append("INITIALLY DEFERRED")
3438            elif self._match_text_seq("NORELY"):
3439                options.append("NORELY")
3440            elif self._match_text_seq("MATCH", "FULL"):
3441                options.append("MATCH FULL")
3442            else:
3443                break
3444
3445        return options
3446
3447    def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]:
3448        if match and not self._match(TokenType.REFERENCES):
3449            return None
3450
3451        expressions = None
3452        this = self._parse_id_var()
3453
3454        if self._match(TokenType.L_PAREN, advance=False):
3455            expressions = self._parse_wrapped_id_vars()
3456
3457        options = self._parse_key_constraint_options()
3458        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3459
3460    def _parse_foreign_key(self) -> exp.Expression:
3461        expressions = self._parse_wrapped_id_vars()
3462        reference = self._parse_references()
3463        options = {}
3464
3465        while self._match(TokenType.ON):
3466            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3467                self.raise_error("Expected DELETE or UPDATE")
3468
3469            kind = self._prev.text.lower()
3470
3471            if self._match_text_seq("NO", "ACTION"):
3472                action = "NO ACTION"
3473            elif self._match(TokenType.SET):
3474                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3475                action = "SET " + self._prev.text.upper()
3476            else:
3477                self._advance()
3478                action = self._prev.text.upper()
3479
3480            options[kind] = action
3481
3482        return self.expression(
3483            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3484        )
3485
3486    def _parse_primary_key(self) -> exp.Expression:
3487        desc = (
3488            self._match_set((TokenType.ASC, TokenType.DESC))
3489            and self._prev.token_type == TokenType.DESC
3490        )
3491
3492        if not self._match(TokenType.L_PAREN, advance=False):
3493            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3494
3495        expressions = self._parse_wrapped_csv(self._parse_field)
3496        options = self._parse_key_constraint_options()
3497        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3498
3499    @t.overload
3500    def _parse_bracket(self, this: exp.Expression) -> exp.Expression:
3501        ...
3502
3503    @t.overload
3504    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3505        ...
3506
3507    def _parse_bracket(self, this):
3508        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3509            return this
3510
3511        bracket_kind = self._prev.token_type
3512        expressions: t.List[t.Optional[exp.Expression]]
3513
3514        if self._match(TokenType.COLON):
3515            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3516        else:
3517            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3518
3519        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3520        if bracket_kind == TokenType.L_BRACE:
3521            this = self.expression(exp.Struct, expressions=expressions)
3522        elif not this or this.name.upper() == "ARRAY":
3523            this = self.expression(exp.Array, expressions=expressions)
3524        else:
3525            expressions = apply_index_offset(this, expressions, -self.index_offset)
3526            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3527
3528        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3529            self.raise_error("Expected ]")
3530        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3531            self.raise_error("Expected }")
3532
3533        self._add_comments(this)
3534        return self._parse_bracket(this)
3535
3536    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3537        if self._match(TokenType.COLON):
3538            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3539        return this
3540
3541    def _parse_case(self) -> t.Optional[exp.Expression]:
3542        ifs = []
3543        default = None
3544
3545        expression = self._parse_conjunction()
3546
3547        while self._match(TokenType.WHEN):
3548            this = self._parse_conjunction()
3549            self._match(TokenType.THEN)
3550            then = self._parse_conjunction()
3551            ifs.append(self.expression(exp.If, this=this, true=then))
3552
3553        if self._match(TokenType.ELSE):
3554            default = self._parse_conjunction()
3555
3556        if not self._match(TokenType.END):
3557            self.raise_error("Expected END after CASE", self._prev)
3558
3559        return self._parse_window(
3560            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3561        )
3562
3563    def _parse_if(self) -> t.Optional[exp.Expression]:
3564        if self._match(TokenType.L_PAREN):
3565            args = self._parse_csv(self._parse_conjunction)
3566            this = exp.If.from_arg_list(args)
3567            self.validate_expression(this, args)
3568            self._match_r_paren()
3569        else:
3570            index = self._index - 1
3571            condition = self._parse_conjunction()
3572
3573            if not condition:
3574                self._retreat(index)
3575                return None
3576
3577            self._match(TokenType.THEN)
3578            true = self._parse_conjunction()
3579            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3580            self._match(TokenType.END)
3581            this = self.expression(exp.If, this=condition, true=true, false=false)
3582
3583        return self._parse_window(this)
3584
3585    def _parse_extract(self) -> exp.Expression:
3586        this = self._parse_function() or self._parse_var() or self._parse_type()
3587
3588        if self._match(TokenType.FROM):
3589            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3590
3591        if not self._match(TokenType.COMMA):
3592            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3593
3594        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3595
3596    def _parse_cast(self, strict: bool) -> exp.Expression:
3597        this = self._parse_conjunction()
3598
3599        if not self._match(TokenType.ALIAS):
3600            if self._match(TokenType.COMMA):
3601                return self.expression(
3602                    exp.CastToStrType, this=this, expression=self._parse_string()
3603                )
3604            else:
3605                self.raise_error("Expected AS after CAST")
3606
3607        to = self._parse_types()
3608
3609        if not to:
3610            self.raise_error("Expected TYPE after CAST")
3611        elif to.this == exp.DataType.Type.CHAR:
3612            if self._match(TokenType.CHARACTER_SET):
3613                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3614
3615        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3616
3617    def _parse_string_agg(self) -> exp.Expression:
3618        expression: t.Optional[exp.Expression]
3619
3620        if self._match(TokenType.DISTINCT):
3621            args = self._parse_csv(self._parse_conjunction)
3622            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3623        else:
3624            args = self._parse_csv(self._parse_conjunction)
3625            expression = seq_get(args, 0)
3626
3627        index = self._index
3628        if not self._match(TokenType.R_PAREN):
3629            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3630            order = self._parse_order(this=expression)
3631            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3632
3633        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3634        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3635        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3636        if not self._match_text_seq("WITHIN", "GROUP"):
3637            self._retreat(index)
3638            this = exp.GroupConcat.from_arg_list(args)
3639            self.validate_expression(this, args)
3640            return this
3641
3642        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3643        order = self._parse_order(this=expression)
3644        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3645
3646    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3647        to: t.Optional[exp.Expression]
3648        this = self._parse_bitwise()
3649
3650        if self._match(TokenType.USING):
3651            to = self.expression(exp.CharacterSet, this=self._parse_var())
3652        elif self._match(TokenType.COMMA):
3653            to = self._parse_bitwise()
3654        else:
3655            to = None
3656
3657        # Swap the argument order if needed to produce the correct AST
3658        if self.CONVERT_TYPE_FIRST:
3659            this, to = to, this
3660
3661        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3662
3663    def _parse_decode(self) -> t.Optional[exp.Expression]:
3664        """
3665        There are generally two variants of the DECODE function:
3666
3667        - DECODE(bin, charset)
3668        - DECODE(expression, search, result [, search, result] ... [, default])
3669
3670        The second variant will always be parsed into a CASE expression. Note that NULL
3671        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3672        instead of relying on pattern matching.
3673        """
3674        args = self._parse_csv(self._parse_conjunction)
3675
3676        if len(args) < 3:
3677            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3678
3679        expression, *expressions = args
3680        if not expression:
3681            return None
3682
3683        ifs = []
3684        for search, result in zip(expressions[::2], expressions[1::2]):
3685            if not search or not result:
3686                return None
3687
3688            if isinstance(search, exp.Literal):
3689                ifs.append(
3690                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3691                )
3692            elif isinstance(search, exp.Null):
3693                ifs.append(
3694                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3695                )
3696            else:
3697                cond = exp.or_(
3698                    exp.EQ(this=expression.copy(), expression=search),
3699                    exp.and_(
3700                        exp.Is(this=expression.copy(), expression=exp.Null()),
3701                        exp.Is(this=search.copy(), expression=exp.Null()),
3702                        copy=False,
3703                    ),
3704                    copy=False,
3705                )
3706                ifs.append(exp.If(this=cond, true=result))
3707
3708        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3709
3710    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3711        self._match_text_seq("KEY")
3712        key = self._parse_field()
3713        self._match(TokenType.COLON)
3714        self._match_text_seq("VALUE")
3715        value = self._parse_field()
3716        if not key and not value:
3717            return None
3718        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3719
3720    def _parse_json_object(self) -> exp.Expression:
3721        expressions = self._parse_csv(self._parse_json_key_value)
3722
3723        null_handling = None
3724        if self._match_text_seq("NULL", "ON", "NULL"):
3725            null_handling = "NULL ON NULL"
3726        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3727            null_handling = "ABSENT ON NULL"
3728
3729        unique_keys = None
3730        if self._match_text_seq("WITH", "UNIQUE"):
3731            unique_keys = True
3732        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3733            unique_keys = False
3734
3735        self._match_text_seq("KEYS")
3736
3737        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3738        format_json = self._match_text_seq("FORMAT", "JSON")
3739        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3740
3741        return self.expression(
3742            exp.JSONObject,
3743            expressions=expressions,
3744            null_handling=null_handling,
3745            unique_keys=unique_keys,
3746            return_type=return_type,
3747            format_json=format_json,
3748            encoding=encoding,
3749        )
3750
3751    def _parse_logarithm(self) -> exp.Expression:
3752        # Default argument order is base, expression
3753        args = self._parse_csv(self._parse_range)
3754
3755        if len(args) > 1:
3756            if not self.LOG_BASE_FIRST:
3757                args.reverse()
3758            return exp.Log.from_arg_list(args)
3759
3760        return self.expression(
3761            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3762        )
3763
3764    def _parse_match_against(self) -> exp.Expression:
3765        expressions = self._parse_csv(self._parse_column)
3766
3767        self._match_text_seq(")", "AGAINST", "(")
3768
3769        this = self._parse_string()
3770
3771        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3772            modifier = "IN NATURAL LANGUAGE MODE"
3773            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3774                modifier = f"{modifier} WITH QUERY EXPANSION"
3775        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3776            modifier = "IN BOOLEAN MODE"
3777        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3778            modifier = "WITH QUERY EXPANSION"
3779        else:
3780            modifier = None
3781
3782        return self.expression(
3783            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3784        )
3785
3786    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3787    def _parse_open_json(self) -> exp.Expression:
3788        this = self._parse_bitwise()
3789        path = self._match(TokenType.COMMA) and self._parse_string()
3790
3791        def _parse_open_json_column_def() -> exp.Expression:
3792            this = self._parse_field(any_token=True)
3793            kind = self._parse_types()
3794            path = self._parse_string()
3795            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3796            return self.expression(
3797                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3798            )
3799
3800        expressions = None
3801        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3802            self._match_l_paren()
3803            expressions = self._parse_csv(_parse_open_json_column_def)
3804
3805        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3806
3807    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3808        args = self._parse_csv(self._parse_bitwise)
3809
3810        if self._match(TokenType.IN):
3811            return self.expression(
3812                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3813            )
3814
3815        if haystack_first:
3816            haystack = seq_get(args, 0)
3817            needle = seq_get(args, 1)
3818        else:
3819            needle = seq_get(args, 0)
3820            haystack = seq_get(args, 1)
3821
3822        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3823
3824        self.validate_expression(this, args)
3825
3826        return this
3827
3828    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3829        args = self._parse_csv(self._parse_table)
3830        return exp.JoinHint(this=func_name.upper(), expressions=args)
3831
3832    def _parse_substring(self) -> exp.Expression:
3833        # Postgres supports the form: substring(string [from int] [for int])
3834        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3835
3836        args = self._parse_csv(self._parse_bitwise)
3837
3838        if self._match(TokenType.FROM):
3839            args.append(self._parse_bitwise())
3840            if self._match(TokenType.FOR):
3841                args.append(self._parse_bitwise())
3842
3843        this = exp.Substring.from_arg_list(args)
3844        self.validate_expression(this, args)
3845
3846        return this
3847
3848    def _parse_trim(self) -> exp.Expression:
3849        # https://www.w3resource.com/sql/character-functions/trim.php
3850        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3851
3852        position = None
3853        collation = None
3854
3855        if self._match_texts(self.TRIM_TYPES):
3856            position = self._prev.text.upper()
3857
3858        expression = self._parse_bitwise()
3859        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3860            this = self._parse_bitwise()
3861        else:
3862            this = expression
3863            expression = None
3864
3865        if self._match(TokenType.COLLATE):
3866            collation = self._parse_bitwise()
3867
3868        return self.expression(
3869            exp.Trim,
3870            this=this,
3871            position=position,
3872            expression=expression,
3873            collation=collation,
3874        )
3875
3876    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3877        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3878
3879    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3880        return self._parse_window(self._parse_id_var(), alias=True)
3881
3882    def _parse_respect_or_ignore_nulls(
3883        self, this: t.Optional[exp.Expression]
3884    ) -> t.Optional[exp.Expression]:
3885        if self._match_text_seq("IGNORE", "NULLS"):
3886            return self.expression(exp.IgnoreNulls, this=this)
3887        if self._match_text_seq("RESPECT", "NULLS"):
3888            return self.expression(exp.RespectNulls, this=this)
3889        return this
3890
3891    def _parse_window(
3892        self, this: t.Optional[exp.Expression], alias: bool = False
3893    ) -> t.Optional[exp.Expression]:
3894        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3895            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3896            self._match_r_paren()
3897
3898        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3899        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3900        if self._match_text_seq("WITHIN", "GROUP"):
3901            order = self._parse_wrapped(self._parse_order)
3902            this = self.expression(exp.WithinGroup, this=this, expression=order)
3903
3904        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3905        # Some dialects choose to implement and some do not.
3906        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3907
3908        # There is some code above in _parse_lambda that handles
3909        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3910
3911        # The below changes handle
3912        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3913
3914        # Oracle allows both formats
3915        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3916        #   and Snowflake chose to do the same for familiarity
3917        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3918        this = self._parse_respect_or_ignore_nulls(this)
3919
3920        # bigquery select from window x AS (partition by ...)
3921        if alias:
3922            over = None
3923            self._match(TokenType.ALIAS)
3924        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3925            return this
3926        else:
3927            over = self._prev.text.upper()
3928
3929        if not self._match(TokenType.L_PAREN):
3930            return self.expression(
3931                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3932            )
3933
3934        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3935
3936        first = self._match(TokenType.FIRST)
3937        if self._match_text_seq("LAST"):
3938            first = False
3939
3940        partition = self._parse_partition_by()
3941        order = self._parse_order()
3942        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3943
3944        if kind:
3945            self._match(TokenType.BETWEEN)
3946            start = self._parse_window_spec()
3947            self._match(TokenType.AND)
3948            end = self._parse_window_spec()
3949
3950            spec = self.expression(
3951                exp.WindowSpec,
3952                kind=kind,
3953                start=start["value"],
3954                start_side=start["side"],
3955                end=end["value"],
3956                end_side=end["side"],
3957            )
3958        else:
3959            spec = None
3960
3961        self._match_r_paren()
3962
3963        return self.expression(
3964            exp.Window,
3965            this=this,
3966            partition_by=partition,
3967            order=order,
3968            spec=spec,
3969            alias=window_alias,
3970            over=over,
3971            first=first,
3972        )
3973
3974    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3975        self._match(TokenType.BETWEEN)
3976
3977        return {
3978            "value": (
3979                (self._match_text_seq("UNBOUNDED") and "UNBOUNDED")
3980                or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW")
3981                or self._parse_bitwise()
3982            ),
3983            "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text,
3984        }
3985
3986    def _parse_alias(
3987        self, this: t.Optional[exp.Expression], explicit: bool = False
3988    ) -> t.Optional[exp.Expression]:
3989        any_token = self._match(TokenType.ALIAS)
3990
3991        if explicit and not any_token:
3992            return this
3993
3994        if self._match(TokenType.L_PAREN):
3995            aliases = self.expression(
3996                exp.Aliases,
3997                this=this,
3998                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3999            )
4000            self._match_r_paren(aliases)
4001            return aliases
4002
4003        alias = self._parse_id_var(any_token)
4004
4005        if alias:
4006            return self.expression(exp.Alias, this=this, alias=alias)
4007
4008        return this
4009
4010    def _parse_id_var(
4011        self,
4012        any_token: bool = True,
4013        tokens: t.Optional[t.Collection[TokenType]] = None,
4014        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
4015    ) -> t.Optional[exp.Expression]:
4016        identifier = self._parse_identifier()
4017
4018        if identifier:
4019            return identifier
4020
4021        prefix = ""
4022
4023        if prefix_tokens:
4024            while self._match_set(prefix_tokens):
4025                prefix += self._prev.text
4026
4027        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
4028            quoted = self._prev.token_type == TokenType.STRING
4029            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
4030
4031        return None
4032
4033    def _parse_string(self) -> t.Optional[exp.Expression]:
4034        if self._match(TokenType.STRING):
4035            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
4036        return self._parse_placeholder()
4037
4038    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
4039        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
4040
4041    def _parse_number(self) -> t.Optional[exp.Expression]:
4042        if self._match(TokenType.NUMBER):
4043            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
4044        return self._parse_placeholder()
4045
4046    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4047        if self._match(TokenType.IDENTIFIER):
4048            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4049        return self._parse_placeholder()
4050
4051    def _parse_var(
4052        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4053    ) -> t.Optional[exp.Expression]:
4054        if (
4055            (any_token and self._advance_any())
4056            or self._match(TokenType.VAR)
4057            or (self._match_set(tokens) if tokens else False)
4058        ):
4059            return self.expression(exp.Var, this=self._prev.text)
4060        return self._parse_placeholder()
4061
4062    def _advance_any(self) -> t.Optional[Token]:
4063        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4064            self._advance()
4065            return self._prev
4066        return None
4067
4068    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4069        return self._parse_var() or self._parse_string()
4070
4071    def _parse_null(self) -> t.Optional[exp.Expression]:
4072        if self._match(TokenType.NULL):
4073            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4074        return None
4075
4076    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4077        if self._match(TokenType.TRUE):
4078            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4079        if self._match(TokenType.FALSE):
4080            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4081        return None
4082
4083    def _parse_star(self) -> t.Optional[exp.Expression]:
4084        if self._match(TokenType.STAR):
4085            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4086        return None
4087
4088    def _parse_parameter(self) -> exp.Expression:
4089        wrapped = self._match(TokenType.L_BRACE)
4090        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4091        self._match(TokenType.R_BRACE)
4092        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4093
4094    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4095        if self._match_set(self.PLACEHOLDER_PARSERS):
4096            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4097            if placeholder:
4098                return placeholder
4099            self._advance(-1)
4100        return None
4101
4102    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4103        if not self._match(TokenType.EXCEPT):
4104            return None
4105        if self._match(TokenType.L_PAREN, advance=False):
4106            return self._parse_wrapped_csv(self._parse_column)
4107        return self._parse_csv(self._parse_column)
4108
4109    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4110        if not self._match(TokenType.REPLACE):
4111            return None
4112        if self._match(TokenType.L_PAREN, advance=False):
4113            return self._parse_wrapped_csv(self._parse_expression)
4114        return self._parse_csv(self._parse_expression)
4115
4116    def _parse_csv(
4117        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4118    ) -> t.List[t.Optional[exp.Expression]]:
4119        parse_result = parse_method()
4120        items = [parse_result] if parse_result is not None else []
4121
4122        while self._match(sep):
4123            self._add_comments(parse_result)
4124            parse_result = parse_method()
4125            if parse_result is not None:
4126                items.append(parse_result)
4127
4128        return items
4129
4130    def _parse_tokens(
4131        self, parse_method: t.Callable, expressions: t.Dict
4132    ) -> t.Optional[exp.Expression]:
4133        this = parse_method()
4134
4135        while self._match_set(expressions):
4136            this = self.expression(
4137                expressions[self._prev.token_type],
4138                this=this,
4139                comments=self._prev_comments,
4140                expression=parse_method(),
4141            )
4142
4143        return this
4144
4145    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4146        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4147
4148    def _parse_wrapped_csv(
4149        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4150    ) -> t.List[t.Optional[exp.Expression]]:
4151        return self._parse_wrapped(
4152            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4153        )
4154
4155    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4156        wrapped = self._match(TokenType.L_PAREN)
4157        if not wrapped and not optional:
4158            self.raise_error("Expecting (")
4159        parse_result = parse_method()
4160        if wrapped:
4161            self._match_r_paren()
4162        return parse_result
4163
4164    def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]:
4165        return self._parse_select() or self._parse_set_operations(
4166            self._parse_expression() if alias else self._parse_conjunction()
4167        )
4168
4169    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4170        return self._parse_query_modifiers(
4171            self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False))
4172        )
4173
4174    def _parse_transaction(self) -> exp.Expression:
4175        this = None
4176        if self._match_texts(self.TRANSACTION_KIND):
4177            this = self._prev.text
4178
4179        self._match_texts({"TRANSACTION", "WORK"})
4180
4181        modes = []
4182        while True:
4183            mode = []
4184            while self._match(TokenType.VAR):
4185                mode.append(self._prev.text)
4186
4187            if mode:
4188                modes.append(" ".join(mode))
4189            if not self._match(TokenType.COMMA):
4190                break
4191
4192        return self.expression(exp.Transaction, this=this, modes=modes)
4193
4194    def _parse_commit_or_rollback(self) -> exp.Expression:
4195        chain = None
4196        savepoint = None
4197        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4198
4199        self._match_texts({"TRANSACTION", "WORK"})
4200
4201        if self._match_text_seq("TO"):
4202            self._match_text_seq("SAVEPOINT")
4203            savepoint = self._parse_id_var()
4204
4205        if self._match(TokenType.AND):
4206            chain = not self._match_text_seq("NO")
4207            self._match_text_seq("CHAIN")
4208
4209        if is_rollback:
4210            return self.expression(exp.Rollback, savepoint=savepoint)
4211        return self.expression(exp.Commit, chain=chain)
4212
4213    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4214        if not self._match_text_seq("ADD"):
4215            return None
4216
4217        self._match(TokenType.COLUMN)
4218        exists_column = self._parse_exists(not_=True)
4219        expression = self._parse_column_def(self._parse_field(any_token=True))
4220
4221        if expression:
4222            expression.set("exists", exists_column)
4223
4224            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4225            if self._match_texts(("FIRST", "AFTER")):
4226                position = self._prev.text
4227                column_position = self.expression(
4228                    exp.ColumnPosition, this=self._parse_column(), position=position
4229                )
4230                expression.set("position", column_position)
4231
4232        return expression
4233
4234    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4235        drop = self._match(TokenType.DROP) and self._parse_drop()
4236        if drop and not isinstance(drop, exp.Command):
4237            drop.set("kind", drop.args.get("kind", "COLUMN"))
4238        return drop
4239
4240    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4241    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4242        return self.expression(
4243            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4244        )
4245
4246    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4247        this = None
4248        kind = self._prev.token_type
4249
4250        if kind == TokenType.CONSTRAINT:
4251            this = self._parse_id_var()
4252
4253            if self._match_text_seq("CHECK"):
4254                expression = self._parse_wrapped(self._parse_conjunction)
4255                enforced = self._match_text_seq("ENFORCED")
4256
4257                return self.expression(
4258                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4259                )
4260
4261        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4262            expression = self._parse_foreign_key()
4263        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4264            expression = self._parse_primary_key()
4265        else:
4266            expression = None
4267
4268        return self.expression(exp.AddConstraint, this=this, expression=expression)
4269
4270    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4271        index = self._index - 1
4272
4273        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4274            return self._parse_csv(self._parse_add_constraint)
4275
4276        self._retreat(index)
4277        return self._parse_csv(self._parse_add_column)
4278
4279    def _parse_alter_table_alter(self) -> exp.Expression:
4280        self._match(TokenType.COLUMN)
4281        column = self._parse_field(any_token=True)
4282
4283        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4284            return self.expression(exp.AlterColumn, this=column, drop=True)
4285        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4286            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4287
4288        self._match_text_seq("SET", "DATA")
4289        return self.expression(
4290            exp.AlterColumn,
4291            this=column,
4292            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4293            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4294            using=self._match(TokenType.USING) and self._parse_conjunction(),
4295        )
4296
4297    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4298        index = self._index - 1
4299
4300        partition_exists = self._parse_exists()
4301        if self._match(TokenType.PARTITION, advance=False):
4302            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4303
4304        self._retreat(index)
4305        return self._parse_csv(self._parse_drop_column)
4306
4307    def _parse_alter_table_rename(self) -> exp.Expression:
4308        self._match_text_seq("TO")
4309        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4310
4311    def _parse_alter(self) -> t.Optional[exp.Expression]:
4312        start = self._prev
4313
4314        if not self._match(TokenType.TABLE):
4315            return self._parse_as_command(start)
4316
4317        exists = self._parse_exists()
4318        this = self._parse_table(schema=True)
4319
4320        if self._next:
4321            self._advance()
4322        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4323
4324        if parser:
4325            actions = ensure_list(parser(self))
4326
4327            if not self._curr:
4328                return self.expression(
4329                    exp.AlterTable,
4330                    this=this,
4331                    exists=exists,
4332                    actions=actions,
4333                )
4334        return self._parse_as_command(start)
4335
4336    def _parse_merge(self) -> exp.Expression:
4337        self._match(TokenType.INTO)
4338        target = self._parse_table()
4339
4340        self._match(TokenType.USING)
4341        using = self._parse_table()
4342
4343        self._match(TokenType.ON)
4344        on = self._parse_conjunction()
4345
4346        whens = []
4347        while self._match(TokenType.WHEN):
4348            matched = not self._match(TokenType.NOT)
4349            self._match_text_seq("MATCHED")
4350            source = (
4351                False
4352                if self._match_text_seq("BY", "TARGET")
4353                else self._match_text_seq("BY", "SOURCE")
4354            )
4355            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4356
4357            self._match(TokenType.THEN)
4358
4359            if self._match(TokenType.INSERT):
4360                _this = self._parse_star()
4361                if _this:
4362                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4363                else:
4364                    then = self.expression(
4365                        exp.Insert,
4366                        this=self._parse_value(),
4367                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4368                    )
4369            elif self._match(TokenType.UPDATE):
4370                expressions = self._parse_star()
4371                if expressions:
4372                    then = self.expression(exp.Update, expressions=expressions)
4373                else:
4374                    then = self.expression(
4375                        exp.Update,
4376                        expressions=self._match(TokenType.SET)
4377                        and self._parse_csv(self._parse_equality),
4378                    )
4379            elif self._match(TokenType.DELETE):
4380                then = self.expression(exp.Var, this=self._prev.text)
4381            else:
4382                then = None
4383
4384            whens.append(
4385                self.expression(
4386                    exp.When,
4387                    matched=matched,
4388                    source=source,
4389                    condition=condition,
4390                    then=then,
4391                )
4392            )
4393
4394        return self.expression(
4395            exp.Merge,
4396            this=target,
4397            using=using,
4398            on=on,
4399            expressions=whens,
4400        )
4401
4402    def _parse_show(self) -> t.Optional[exp.Expression]:
4403        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4404        if parser:
4405            return parser(self)
4406        self._advance()
4407        return self.expression(exp.Show, this=self._prev.text.upper())
4408
4409    def _parse_set_item_assignment(
4410        self, kind: t.Optional[str] = None
4411    ) -> t.Optional[exp.Expression]:
4412        index = self._index
4413
4414        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4415            return self._parse_set_transaction(global_=kind == "GLOBAL")
4416
4417        left = self._parse_primary() or self._parse_id_var()
4418
4419        if not self._match_texts(("=", "TO")):
4420            self._retreat(index)
4421            return None
4422
4423        right = self._parse_statement() or self._parse_id_var()
4424        this = self.expression(
4425            exp.EQ,
4426            this=left,
4427            expression=right,
4428        )
4429
4430        return self.expression(
4431            exp.SetItem,
4432            this=this,
4433            kind=kind,
4434        )
4435
4436    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4437        self._match_text_seq("TRANSACTION")
4438        characteristics = self._parse_csv(
4439            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4440        )
4441        return self.expression(
4442            exp.SetItem,
4443            expressions=characteristics,
4444            kind="TRANSACTION",
4445            **{"global": global_},  # type: ignore
4446        )
4447
4448    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4449        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4450        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4451
4452    def _parse_set(self) -> exp.Expression:
4453        index = self._index
4454        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4455
4456        if self._curr:
4457            self._retreat(index)
4458            return self._parse_as_command(self._prev)
4459
4460        return set_
4461
4462    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4463        for option in options:
4464            if self._match_text_seq(*option.split(" ")):
4465                return exp.Var(this=option)
4466        return None
4467
4468    def _parse_as_command(self, start: Token) -> exp.Command:
4469        while self._curr:
4470            self._advance()
4471        text = self._find_sql(start, self._prev)
4472        size = len(start.text)
4473        return exp.Command(this=text[:size], expression=text[size:])
4474
4475    def _find_parser(
4476        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4477    ) -> t.Optional[t.Callable]:
4478        if not self._curr:
4479            return None
4480
4481        index = self._index
4482        this = []
4483        while True:
4484            # The current token might be multiple words
4485            curr = self._curr.text.upper()
4486            key = curr.split(" ")
4487            this.append(curr)
4488            self._advance()
4489            result, trie = in_trie(trie, key)
4490            if result == 0:
4491                break
4492            if result == 2:
4493                subparser = parsers[" ".join(this)]
4494                return subparser
4495        self._retreat(index)
4496        return None
4497
4498    def _match(self, token_type, advance=True, expression=None):
4499        if not self._curr:
4500            return None
4501
4502        if self._curr.token_type == token_type:
4503            if advance:
4504                self._advance()
4505            self._add_comments(expression)
4506            return True
4507
4508        return None
4509
4510    def _match_set(self, types, advance=True):
4511        if not self._curr:
4512            return None
4513
4514        if self._curr.token_type in types:
4515            if advance:
4516                self._advance()
4517            return True
4518
4519        return None
4520
4521    def _match_pair(self, token_type_a, token_type_b, advance=True):
4522        if not self._curr or not self._next:
4523            return None
4524
4525        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4526            if advance:
4527                self._advance(2)
4528            return True
4529
4530        return None
4531
4532    def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4533        if not self._match(TokenType.L_PAREN, expression=expression):
4534            self.raise_error("Expecting (")
4535
4536    def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4537        if not self._match(TokenType.R_PAREN, expression=expression):
4538            self.raise_error("Expecting )")
4539
4540    def _match_texts(self, texts, advance=True):
4541        if self._curr and self._curr.text.upper() in texts:
4542            if advance:
4543                self._advance()
4544            return True
4545        return False
4546
4547    def _match_text_seq(self, *texts, advance=True):
4548        index = self._index
4549        for text in texts:
4550            if self._curr and self._curr.text.upper() == text:
4551                self._advance()
4552            else:
4553                self._retreat(index)
4554                return False
4555
4556        if not advance:
4557            self._retreat(index)
4558
4559        return True
4560
4561    @t.overload
4562    def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression:
4563        ...
4564
4565    @t.overload
4566    def _replace_columns_with_dots(
4567        self, this: t.Optional[exp.Expression]
4568    ) -> t.Optional[exp.Expression]:
4569        ...
4570
4571    def _replace_columns_with_dots(self, this):
4572        if isinstance(this, exp.Dot):
4573            exp.replace_children(this, self._replace_columns_with_dots)
4574        elif isinstance(this, exp.Column):
4575            exp.replace_children(this, self._replace_columns_with_dots)
4576            table = this.args.get("table")
4577            this = (
4578                self.expression(exp.Dot, this=table, expression=this.this)
4579                if table
4580                else self.expression(exp.Var, this=this.name)
4581            )
4582        elif isinstance(this, exp.Identifier):
4583            this = self.expression(exp.Var, this=this.name)
4584
4585        return this
4586
4587    def _replace_lambda(
4588        self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str]
4589    ) -> t.Optional[exp.Expression]:
4590        if not node:
4591            return node
4592
4593        for column in node.find_all(exp.Column):
4594            if column.parts[0].name in lambda_variables:
4595                dot_or_id = column.to_dot() if column.table else column.this
4596                parent = column.parent
4597
4598                while isinstance(parent, exp.Dot):
4599                    if not isinstance(parent.parent, exp.Dot):
4600                        parent.replace(dot_or_id)
4601                        break
4602                    parent = parent.parent
4603                else:
4604                    if column is node:
4605                        node = dot_or_id
4606                    else:
4607                        column.replace(dot_or_id)
4608        return node
def parse_var_map(args: List) -> sqlglot.expressions.StarMap | sqlglot.expressions.VarMap:
20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap:
21    if len(args) == 1 and args[0].is_star:
22        return exp.StarMap(this=args[0])
23
24    keys = []
25    values = []
26    for i in range(0, len(args), 2):
27        keys.append(args[i])
28        values.append(args[i + 1])
29    return exp.VarMap(
30        keys=exp.Array(expressions=keys),
31        values=exp.Array(expressions=values),
32    )
def parse_like(args: List) -> sqlglot.expressions.Expression:
35def parse_like(args: t.List) -> exp.Expression:
36    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
37    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
def binary_range_parser( expr_type: Type[sqlglot.expressions.Expression]) -> Callable[[sqlglot.parser.Parser, Optional[sqlglot.expressions.Expression]], Optional[sqlglot.expressions.Expression]]:
40def binary_range_parser(
41    expr_type: t.Type[exp.Expression],
42) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
43    return lambda self, this: self._parse_escape(
44        self.expression(expr_type, this=this, expression=self._parse_bitwise())
45    )
class Parser:
  57class Parser(metaclass=_Parser):
  58    """
  59    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  60    a parsed syntax tree.
  61
  62    Args:
  63        error_level: the desired error level.
  64            Default: ErrorLevel.IMMEDIATE
  65        error_message_context: determines the amount of context to capture from a
  66            query string when displaying the error message (in number of characters).
  67            Default: 50.
  68        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  69            Default: 0
  70        alias_post_tablesample: If the table alias comes after tablesample.
  71            Default: False
  72        max_errors: Maximum number of error messages to include in a raised ParseError.
  73            This is only relevant if error_level is ErrorLevel.RAISE.
  74            Default: 3
  75        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  76            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  77            Default: "nulls_are_small"
  78    """
  79
  80    FUNCTIONS: t.Dict[str, t.Callable] = {
  81        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  82        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  83            this=seq_get(args, 0),
  84            to=exp.DataType(this=exp.DataType.Type.TEXT),
  85        ),
  86        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  87        "IFNULL": exp.Coalesce.from_arg_list,
  88        "LIKE": parse_like,
  89        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  90            this=seq_get(args, 0),
  91            to=exp.DataType(this=exp.DataType.Type.TEXT),
  92        ),
  93        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  94            this=exp.Cast(
  95                this=seq_get(args, 0),
  96                to=exp.DataType(this=exp.DataType.Type.TEXT),
  97            ),
  98            start=exp.Literal.number(1),
  99            length=exp.Literal.number(10),
 100        ),
 101        "VAR_MAP": parse_var_map,
 102    }
 103
 104    NO_PAREN_FUNCTIONS = {
 105        TokenType.CURRENT_DATE: exp.CurrentDate,
 106        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 107        TokenType.CURRENT_TIME: exp.CurrentTime,
 108        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 109        TokenType.CURRENT_USER: exp.CurrentUser,
 110    }
 111
 112    JOIN_HINTS: t.Set[str] = set()
 113
 114    NESTED_TYPE_TOKENS = {
 115        TokenType.ARRAY,
 116        TokenType.MAP,
 117        TokenType.NULLABLE,
 118        TokenType.STRUCT,
 119    }
 120
 121    TYPE_TOKENS = {
 122        TokenType.BIT,
 123        TokenType.BOOLEAN,
 124        TokenType.TINYINT,
 125        TokenType.UTINYINT,
 126        TokenType.SMALLINT,
 127        TokenType.USMALLINT,
 128        TokenType.INT,
 129        TokenType.UINT,
 130        TokenType.BIGINT,
 131        TokenType.UBIGINT,
 132        TokenType.INT128,
 133        TokenType.UINT128,
 134        TokenType.INT256,
 135        TokenType.UINT256,
 136        TokenType.FLOAT,
 137        TokenType.DOUBLE,
 138        TokenType.CHAR,
 139        TokenType.NCHAR,
 140        TokenType.VARCHAR,
 141        TokenType.NVARCHAR,
 142        TokenType.TEXT,
 143        TokenType.MEDIUMTEXT,
 144        TokenType.LONGTEXT,
 145        TokenType.MEDIUMBLOB,
 146        TokenType.LONGBLOB,
 147        TokenType.BINARY,
 148        TokenType.VARBINARY,
 149        TokenType.JSON,
 150        TokenType.JSONB,
 151        TokenType.INTERVAL,
 152        TokenType.TIME,
 153        TokenType.TIMESTAMP,
 154        TokenType.TIMESTAMPTZ,
 155        TokenType.TIMESTAMPLTZ,
 156        TokenType.DATETIME,
 157        TokenType.DATETIME64,
 158        TokenType.DATE,
 159        TokenType.DECIMAL,
 160        TokenType.BIGDECIMAL,
 161        TokenType.UUID,
 162        TokenType.GEOGRAPHY,
 163        TokenType.GEOMETRY,
 164        TokenType.HLLSKETCH,
 165        TokenType.HSTORE,
 166        TokenType.PSEUDO_TYPE,
 167        TokenType.SUPER,
 168        TokenType.SERIAL,
 169        TokenType.SMALLSERIAL,
 170        TokenType.BIGSERIAL,
 171        TokenType.XML,
 172        TokenType.UNIQUEIDENTIFIER,
 173        TokenType.MONEY,
 174        TokenType.SMALLMONEY,
 175        TokenType.ROWVERSION,
 176        TokenType.IMAGE,
 177        TokenType.VARIANT,
 178        TokenType.OBJECT,
 179        TokenType.INET,
 180        *NESTED_TYPE_TOKENS,
 181    }
 182
 183    SUBQUERY_PREDICATES = {
 184        TokenType.ANY: exp.Any,
 185        TokenType.ALL: exp.All,
 186        TokenType.EXISTS: exp.Exists,
 187        TokenType.SOME: exp.Any,
 188    }
 189
 190    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 191
 192    DB_CREATABLES = {
 193        TokenType.DATABASE,
 194        TokenType.SCHEMA,
 195        TokenType.TABLE,
 196        TokenType.VIEW,
 197    }
 198
 199    CREATABLES = {
 200        TokenType.COLUMN,
 201        TokenType.FUNCTION,
 202        TokenType.INDEX,
 203        TokenType.PROCEDURE,
 204        *DB_CREATABLES,
 205    }
 206
 207    ID_VAR_TOKENS = {
 208        TokenType.VAR,
 209        TokenType.ANTI,
 210        TokenType.APPLY,
 211        TokenType.ASC,
 212        TokenType.AUTO_INCREMENT,
 213        TokenType.BEGIN,
 214        TokenType.CACHE,
 215        TokenType.COLLATE,
 216        TokenType.COMMAND,
 217        TokenType.COMMENT,
 218        TokenType.COMMIT,
 219        TokenType.CONSTRAINT,
 220        TokenType.DEFAULT,
 221        TokenType.DELETE,
 222        TokenType.DESC,
 223        TokenType.DESCRIBE,
 224        TokenType.DIV,
 225        TokenType.END,
 226        TokenType.EXECUTE,
 227        TokenType.ESCAPE,
 228        TokenType.FALSE,
 229        TokenType.FIRST,
 230        TokenType.FILTER,
 231        TokenType.FORMAT,
 232        TokenType.FULL,
 233        TokenType.IF,
 234        TokenType.IS,
 235        TokenType.ISNULL,
 236        TokenType.INTERVAL,
 237        TokenType.KEEP,
 238        TokenType.LEFT,
 239        TokenType.LOAD,
 240        TokenType.MERGE,
 241        TokenType.NATURAL,
 242        TokenType.NEXT,
 243        TokenType.OFFSET,
 244        TokenType.ORDINALITY,
 245        TokenType.OVERWRITE,
 246        TokenType.PARTITION,
 247        TokenType.PERCENT,
 248        TokenType.PIVOT,
 249        TokenType.PRAGMA,
 250        TokenType.RANGE,
 251        TokenType.REFERENCES,
 252        TokenType.RIGHT,
 253        TokenType.ROW,
 254        TokenType.ROWS,
 255        TokenType.SEMI,
 256        TokenType.SET,
 257        TokenType.SETTINGS,
 258        TokenType.SHOW,
 259        TokenType.TEMPORARY,
 260        TokenType.TOP,
 261        TokenType.TRUE,
 262        TokenType.UNIQUE,
 263        TokenType.UNPIVOT,
 264        TokenType.VOLATILE,
 265        TokenType.WINDOW,
 266        *CREATABLES,
 267        *SUBQUERY_PREDICATES,
 268        *TYPE_TOKENS,
 269        *NO_PAREN_FUNCTIONS,
 270    }
 271
 272    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 273
 274    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 275        TokenType.APPLY,
 276        TokenType.FULL,
 277        TokenType.LEFT,
 278        TokenType.LOCK,
 279        TokenType.NATURAL,
 280        TokenType.OFFSET,
 281        TokenType.RIGHT,
 282        TokenType.WINDOW,
 283    }
 284
 285    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 286
 287    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 288
 289    TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
 290
 291    FUNC_TOKENS = {
 292        TokenType.COMMAND,
 293        TokenType.CURRENT_DATE,
 294        TokenType.CURRENT_DATETIME,
 295        TokenType.CURRENT_TIMESTAMP,
 296        TokenType.CURRENT_TIME,
 297        TokenType.CURRENT_USER,
 298        TokenType.FILTER,
 299        TokenType.FIRST,
 300        TokenType.FORMAT,
 301        TokenType.GLOB,
 302        TokenType.IDENTIFIER,
 303        TokenType.INDEX,
 304        TokenType.ISNULL,
 305        TokenType.ILIKE,
 306        TokenType.LIKE,
 307        TokenType.MERGE,
 308        TokenType.OFFSET,
 309        TokenType.PRIMARY_KEY,
 310        TokenType.RANGE,
 311        TokenType.REPLACE,
 312        TokenType.ROW,
 313        TokenType.UNNEST,
 314        TokenType.VAR,
 315        TokenType.LEFT,
 316        TokenType.RIGHT,
 317        TokenType.DATE,
 318        TokenType.DATETIME,
 319        TokenType.TABLE,
 320        TokenType.TIMESTAMP,
 321        TokenType.TIMESTAMPTZ,
 322        TokenType.WINDOW,
 323        *TYPE_TOKENS,
 324        *SUBQUERY_PREDICATES,
 325    }
 326
 327    CONJUNCTION = {
 328        TokenType.AND: exp.And,
 329        TokenType.OR: exp.Or,
 330    }
 331
 332    EQUALITY = {
 333        TokenType.EQ: exp.EQ,
 334        TokenType.NEQ: exp.NEQ,
 335        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 336    }
 337
 338    COMPARISON = {
 339        TokenType.GT: exp.GT,
 340        TokenType.GTE: exp.GTE,
 341        TokenType.LT: exp.LT,
 342        TokenType.LTE: exp.LTE,
 343    }
 344
 345    BITWISE = {
 346        TokenType.AMP: exp.BitwiseAnd,
 347        TokenType.CARET: exp.BitwiseXor,
 348        TokenType.PIPE: exp.BitwiseOr,
 349        TokenType.DPIPE: exp.DPipe,
 350    }
 351
 352    TERM = {
 353        TokenType.DASH: exp.Sub,
 354        TokenType.PLUS: exp.Add,
 355        TokenType.MOD: exp.Mod,
 356        TokenType.COLLATE: exp.Collate,
 357    }
 358
 359    FACTOR = {
 360        TokenType.DIV: exp.IntDiv,
 361        TokenType.LR_ARROW: exp.Distance,
 362        TokenType.SLASH: exp.Div,
 363        TokenType.STAR: exp.Mul,
 364    }
 365
 366    TIMESTAMPS = {
 367        TokenType.TIME,
 368        TokenType.TIMESTAMP,
 369        TokenType.TIMESTAMPTZ,
 370        TokenType.TIMESTAMPLTZ,
 371    }
 372
 373    SET_OPERATIONS = {
 374        TokenType.UNION,
 375        TokenType.INTERSECT,
 376        TokenType.EXCEPT,
 377    }
 378
 379    JOIN_SIDES = {
 380        TokenType.LEFT,
 381        TokenType.RIGHT,
 382        TokenType.FULL,
 383    }
 384
 385    JOIN_KINDS = {
 386        TokenType.INNER,
 387        TokenType.OUTER,
 388        TokenType.CROSS,
 389        TokenType.SEMI,
 390        TokenType.ANTI,
 391    }
 392
 393    LAMBDAS = {
 394        TokenType.ARROW: lambda self, expressions: self.expression(
 395            exp.Lambda,
 396            this=self._replace_lambda(
 397                self._parse_conjunction(),
 398                {node.name for node in expressions},
 399            ),
 400            expressions=expressions,
 401        ),
 402        TokenType.FARROW: lambda self, expressions: self.expression(
 403            exp.Kwarg,
 404            this=exp.Var(this=expressions[0].name),
 405            expression=self._parse_conjunction(),
 406        ),
 407    }
 408
 409    COLUMN_OPERATORS = {
 410        TokenType.DOT: None,
 411        TokenType.DCOLON: lambda self, this, to: self.expression(
 412            exp.Cast if self.STRICT_CAST else exp.TryCast,
 413            this=this,
 414            to=to,
 415        ),
 416        TokenType.ARROW: lambda self, this, path: self.expression(
 417            exp.JSONExtract,
 418            this=this,
 419            expression=path,
 420        ),
 421        TokenType.DARROW: lambda self, this, path: self.expression(
 422            exp.JSONExtractScalar,
 423            this=this,
 424            expression=path,
 425        ),
 426        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 427            exp.JSONBExtract,
 428            this=this,
 429            expression=path,
 430        ),
 431        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 432            exp.JSONBExtractScalar,
 433            this=this,
 434            expression=path,
 435        ),
 436        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 437            exp.JSONBContains,
 438            this=this,
 439            expression=key,
 440        ),
 441    }
 442
 443    EXPRESSION_PARSERS = {
 444        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"),
 445        exp.Column: lambda self: self._parse_column(),
 446        exp.Condition: lambda self: self._parse_conjunction(),
 447        exp.DataType: lambda self: self._parse_types(),
 448        exp.Expression: lambda self: self._parse_statement(),
 449        exp.From: lambda self: self._parse_from(),
 450        exp.Group: lambda self: self._parse_group(),
 451        exp.Having: lambda self: self._parse_having(),
 452        exp.Identifier: lambda self: self._parse_id_var(),
 453        exp.Join: lambda self: self._parse_join(),
 454        exp.Lambda: lambda self: self._parse_lambda(),
 455        exp.Lateral: lambda self: self._parse_lateral(),
 456        exp.Limit: lambda self: self._parse_limit(),
 457        exp.Offset: lambda self: self._parse_offset(),
 458        exp.Order: lambda self: self._parse_order(),
 459        exp.Ordered: lambda self: self._parse_ordered(),
 460        exp.Properties: lambda self: self._parse_properties(),
 461        exp.Qualify: lambda self: self._parse_qualify(),
 462        exp.Returning: lambda self: self._parse_returning(),
 463        exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"),
 464        exp.Table: lambda self: self._parse_table_parts(),
 465        exp.TableAlias: lambda self: self._parse_table_alias(),
 466        exp.Where: lambda self: self._parse_where(),
 467        exp.Window: lambda self: self._parse_named_window(),
 468        exp.With: lambda self: self._parse_with(),
 469        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 470    }
 471
 472    STATEMENT_PARSERS = {
 473        TokenType.ALTER: lambda self: self._parse_alter(),
 474        TokenType.BEGIN: lambda self: self._parse_transaction(),
 475        TokenType.CACHE: lambda self: self._parse_cache(),
 476        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 477        TokenType.COMMENT: lambda self: self._parse_comment(),
 478        TokenType.CREATE: lambda self: self._parse_create(),
 479        TokenType.DELETE: lambda self: self._parse_delete(),
 480        TokenType.DESC: lambda self: self._parse_describe(),
 481        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 482        TokenType.DROP: lambda self: self._parse_drop(),
 483        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 484        TokenType.FROM: lambda self: exp.select("*").from_(
 485            t.cast(exp.From, self._parse_from(skip_from_token=True))
 486        ),
 487        TokenType.INSERT: lambda self: self._parse_insert(),
 488        TokenType.LOAD: lambda self: self._parse_load(),
 489        TokenType.MERGE: lambda self: self._parse_merge(),
 490        TokenType.PIVOT: lambda self: self._parse_simplified_pivot(),
 491        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 492        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 493        TokenType.SET: lambda self: self._parse_set(),
 494        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 495        TokenType.UPDATE: lambda self: self._parse_update(),
 496        TokenType.USE: lambda self: self.expression(
 497            exp.Use,
 498            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 499            and exp.Var(this=self._prev.text),
 500            this=self._parse_table(schema=False),
 501        ),
 502    }
 503
 504    UNARY_PARSERS = {
 505        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 506        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 507        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 508        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 509    }
 510
 511    PRIMARY_PARSERS = {
 512        TokenType.STRING: lambda self, token: self.expression(
 513            exp.Literal, this=token.text, is_string=True
 514        ),
 515        TokenType.NUMBER: lambda self, token: self.expression(
 516            exp.Literal, this=token.text, is_string=False
 517        ),
 518        TokenType.STAR: lambda self, _: self.expression(
 519            exp.Star,
 520            **{"except": self._parse_except(), "replace": self._parse_replace()},
 521        ),
 522        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 523        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 524        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 525        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 526        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 527        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 528        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 529        TokenType.NATIONAL_STRING: lambda self, token: self.expression(
 530            exp.National, this=token.text
 531        ),
 532        TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
 533        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 534    }
 535
 536    PLACEHOLDER_PARSERS = {
 537        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 538        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 539        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 540        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 541        else None,
 542    }
 543
 544    RANGE_PARSERS = {
 545        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 546        TokenType.GLOB: binary_range_parser(exp.Glob),
 547        TokenType.ILIKE: binary_range_parser(exp.ILike),
 548        TokenType.IN: lambda self, this: self._parse_in(this),
 549        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 550        TokenType.IS: lambda self, this: self._parse_is(this),
 551        TokenType.LIKE: binary_range_parser(exp.Like),
 552        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 553        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 554        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 555    }
 556
 557    PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
 558        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 559        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 560        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 561        "CHARACTER SET": lambda self: self._parse_character_set(),
 562        "CHECKSUM": lambda self: self._parse_checksum(),
 563        "CLUSTER": lambda self: self._parse_cluster(),
 564        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 565        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 566        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 567        "DEFINER": lambda self: self._parse_definer(),
 568        "DETERMINISTIC": lambda self: self.expression(
 569            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 570        ),
 571        "DISTKEY": lambda self: self._parse_distkey(),
 572        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 573        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 574        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 575        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 576        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 577        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 578        "FREESPACE": lambda self: self._parse_freespace(),
 579        "IMMUTABLE": lambda self: self.expression(
 580            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 581        ),
 582        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 583        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 584        "LIKE": lambda self: self._parse_create_like(),
 585        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 586        "LOCK": lambda self: self._parse_locking(),
 587        "LOCKING": lambda self: self._parse_locking(),
 588        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 589        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 590        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 591        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 592        "NO": lambda self: self._parse_no_property(),
 593        "ON": lambda self: self._parse_on_property(),
 594        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 595        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 596        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 597        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 598        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 599        "RETURNS": lambda self: self._parse_returns(),
 600        "ROW": lambda self: self._parse_row(),
 601        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 602        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 603        "SETTINGS": lambda self: self.expression(
 604            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 605        ),
 606        "SORTKEY": lambda self: self._parse_sortkey(),
 607        "STABLE": lambda self: self.expression(
 608            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 609        ),
 610        "STORED": lambda self: self._parse_stored(),
 611        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 612        "TEMP": lambda self: self.expression(exp.TemporaryProperty),
 613        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
 614        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 615        "TTL": lambda self: self._parse_ttl(),
 616        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 617        "VOLATILE": lambda self: self._parse_volatile_property(),
 618        "WITH": lambda self: self._parse_with_property(),
 619    }
 620
 621    CONSTRAINT_PARSERS = {
 622        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 623        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 624        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 625        "CHARACTER SET": lambda self: self.expression(
 626            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 627        ),
 628        "CHECK": lambda self: self.expression(
 629            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 630        ),
 631        "COLLATE": lambda self: self.expression(
 632            exp.CollateColumnConstraint, this=self._parse_var()
 633        ),
 634        "COMMENT": lambda self: self.expression(
 635            exp.CommentColumnConstraint, this=self._parse_string()
 636        ),
 637        "COMPRESS": lambda self: self._parse_compress(),
 638        "DEFAULT": lambda self: self.expression(
 639            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 640        ),
 641        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 642        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 643        "FORMAT": lambda self: self.expression(
 644            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 645        ),
 646        "GENERATED": lambda self: self._parse_generated_as_identity(),
 647        "IDENTITY": lambda self: self._parse_auto_increment(),
 648        "INLINE": lambda self: self._parse_inline(),
 649        "LIKE": lambda self: self._parse_create_like(),
 650        "NOT": lambda self: self._parse_not_constraint(),
 651        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 652        "ON": lambda self: self._match(TokenType.UPDATE)
 653        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 654        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 655        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 656        "REFERENCES": lambda self: self._parse_references(match=False),
 657        "TITLE": lambda self: self.expression(
 658            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 659        ),
 660        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 661        "UNIQUE": lambda self: self._parse_unique(),
 662        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 663    }
 664
 665    ALTER_PARSERS = {
 666        "ADD": lambda self: self._parse_alter_table_add(),
 667        "ALTER": lambda self: self._parse_alter_table_alter(),
 668        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 669        "DROP": lambda self: self._parse_alter_table_drop(),
 670        "RENAME": lambda self: self._parse_alter_table_rename(),
 671    }
 672
 673    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 674
 675    NO_PAREN_FUNCTION_PARSERS = {
 676        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 677        TokenType.CASE: lambda self: self._parse_case(),
 678        TokenType.IF: lambda self: self._parse_if(),
 679        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 680            exp.NextValueFor,
 681            this=self._parse_column(),
 682            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 683        ),
 684    }
 685
 686    FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
 687
 688    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 689        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 690        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 691        "DECODE": lambda self: self._parse_decode(),
 692        "EXTRACT": lambda self: self._parse_extract(),
 693        "JSON_OBJECT": lambda self: self._parse_json_object(),
 694        "LOG": lambda self: self._parse_logarithm(),
 695        "MATCH": lambda self: self._parse_match_against(),
 696        "OPENJSON": lambda self: self._parse_open_json(),
 697        "POSITION": lambda self: self._parse_position(),
 698        "SAFE_CAST": lambda self: self._parse_cast(False),
 699        "STRING_AGG": lambda self: self._parse_string_agg(),
 700        "SUBSTRING": lambda self: self._parse_substring(),
 701        "TRIM": lambda self: self._parse_trim(),
 702        "TRY_CAST": lambda self: self._parse_cast(False),
 703        "TRY_CONVERT": lambda self: self._parse_convert(False),
 704    }
 705
 706    QUERY_MODIFIER_PARSERS = {
 707        "joins": lambda self: list(iter(self._parse_join, None)),
 708        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 709        "match": lambda self: self._parse_match_recognize(),
 710        "where": lambda self: self._parse_where(),
 711        "group": lambda self: self._parse_group(),
 712        "having": lambda self: self._parse_having(),
 713        "qualify": lambda self: self._parse_qualify(),
 714        "windows": lambda self: self._parse_window_clause(),
 715        "order": lambda self: self._parse_order(),
 716        "limit": lambda self: self._parse_limit(),
 717        "offset": lambda self: self._parse_offset(),
 718        "locks": lambda self: self._parse_locks(),
 719        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 720    }
 721
 722    SET_PARSERS = {
 723        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 724        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 725        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 726        "TRANSACTION": lambda self: self._parse_set_transaction(),
 727    }
 728
 729    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 730
 731    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 732
 733    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 734
 735    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 736
 737    TRANSACTION_CHARACTERISTICS = {
 738        "ISOLATION LEVEL REPEATABLE READ",
 739        "ISOLATION LEVEL READ COMMITTED",
 740        "ISOLATION LEVEL READ UNCOMMITTED",
 741        "ISOLATION LEVEL SERIALIZABLE",
 742        "READ WRITE",
 743        "READ ONLY",
 744    }
 745
 746    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 747
 748    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 749
 750    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 751    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 752    WINDOW_SIDES = {"FOLLOWING", "PRECEDING"}
 753
 754    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 755
 756    STRICT_CAST = True
 757
 758    CONVERT_TYPE_FIRST = False
 759
 760    PREFIXED_PIVOT_COLUMNS = False
 761    IDENTIFY_PIVOT_STRINGS = False
 762
 763    LOG_BASE_FIRST = True
 764    LOG_DEFAULTS_TO_LN = False
 765
 766    __slots__ = (
 767        "error_level",
 768        "error_message_context",
 769        "sql",
 770        "errors",
 771        "index_offset",
 772        "unnest_column_only",
 773        "alias_post_tablesample",
 774        "max_errors",
 775        "null_ordering",
 776        "_tokens",
 777        "_index",
 778        "_curr",
 779        "_next",
 780        "_prev",
 781        "_prev_comments",
 782        "_show_trie",
 783        "_set_trie",
 784    )
 785
 786    def __init__(
 787        self,
 788        error_level: t.Optional[ErrorLevel] = None,
 789        error_message_context: int = 100,
 790        index_offset: int = 0,
 791        unnest_column_only: bool = False,
 792        alias_post_tablesample: bool = False,
 793        max_errors: int = 3,
 794        null_ordering: t.Optional[str] = None,
 795    ):
 796        self.error_level = error_level or ErrorLevel.IMMEDIATE
 797        self.error_message_context = error_message_context
 798        self.index_offset = index_offset
 799        self.unnest_column_only = unnest_column_only
 800        self.alias_post_tablesample = alias_post_tablesample
 801        self.max_errors = max_errors
 802        self.null_ordering = null_ordering
 803        self.reset()
 804
 805    def reset(self):
 806        self.sql = ""
 807        self.errors = []
 808        self._tokens = []
 809        self._index = 0
 810        self._curr = None
 811        self._next = None
 812        self._prev = None
 813        self._prev_comments = None
 814
 815    def parse(
 816        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 817    ) -> t.List[t.Optional[exp.Expression]]:
 818        """
 819        Parses a list of tokens and returns a list of syntax trees, one tree
 820        per parsed SQL statement.
 821
 822        Args:
 823            raw_tokens: the list of tokens.
 824            sql: the original SQL string, used to produce helpful debug messages.
 825
 826        Returns:
 827            The list of syntax trees.
 828        """
 829        return self._parse(
 830            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 831        )
 832
 833    def parse_into(
 834        self,
 835        expression_types: exp.IntoType,
 836        raw_tokens: t.List[Token],
 837        sql: t.Optional[str] = None,
 838    ) -> t.List[t.Optional[exp.Expression]]:
 839        """
 840        Parses a list of tokens into a given Expression type. If a collection of Expression
 841        types is given instead, this method will try to parse the token list into each one
 842        of them, stopping at the first for which the parsing succeeds.
 843
 844        Args:
 845            expression_types: the expression type(s) to try and parse the token list into.
 846            raw_tokens: the list of tokens.
 847            sql: the original SQL string, used to produce helpful debug messages.
 848
 849        Returns:
 850            The target Expression.
 851        """
 852        errors = []
 853        for expression_type in ensure_collection(expression_types):
 854            parser = self.EXPRESSION_PARSERS.get(expression_type)
 855            if not parser:
 856                raise TypeError(f"No parser registered for {expression_type}")
 857            try:
 858                return self._parse(parser, raw_tokens, sql)
 859            except ParseError as e:
 860                e.errors[0]["into_expression"] = expression_type
 861                errors.append(e)
 862        raise ParseError(
 863            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
 864            errors=merge_errors(errors),
 865        ) from errors[-1]
 866
 867    def _parse(
 868        self,
 869        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 870        raw_tokens: t.List[Token],
 871        sql: t.Optional[str] = None,
 872    ) -> t.List[t.Optional[exp.Expression]]:
 873        self.reset()
 874        self.sql = sql or ""
 875        total = len(raw_tokens)
 876        chunks: t.List[t.List[Token]] = [[]]
 877
 878        for i, token in enumerate(raw_tokens):
 879            if token.token_type == TokenType.SEMICOLON:
 880                if i < total - 1:
 881                    chunks.append([])
 882            else:
 883                chunks[-1].append(token)
 884
 885        expressions = []
 886
 887        for tokens in chunks:
 888            self._index = -1
 889            self._tokens = tokens
 890            self._advance()
 891
 892            expressions.append(parse_method(self))
 893
 894            if self._index < len(self._tokens):
 895                self.raise_error("Invalid expression / Unexpected token")
 896
 897            self.check_errors()
 898
 899        return expressions
 900
 901    def check_errors(self) -> None:
 902        """
 903        Logs or raises any found errors, depending on the chosen error level setting.
 904        """
 905        if self.error_level == ErrorLevel.WARN:
 906            for error in self.errors:
 907                logger.error(str(error))
 908        elif self.error_level == ErrorLevel.RAISE and self.errors:
 909            raise ParseError(
 910                concat_messages(self.errors, self.max_errors),
 911                errors=merge_errors(self.errors),
 912            )
 913
 914    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 915        """
 916        Appends an error in the list of recorded errors or raises it, depending on the chosen
 917        error level setting.
 918        """
 919        token = token or self._curr or self._prev or Token.string("")
 920        start = token.start
 921        end = token.end + 1
 922        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 923        highlight = self.sql[start:end]
 924        end_context = self.sql[end : end + self.error_message_context]
 925
 926        error = ParseError.new(
 927            f"{message}. Line {token.line}, Col: {token.col}.\n"
 928            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 929            description=message,
 930            line=token.line,
 931            col=token.col,
 932            start_context=start_context,
 933            highlight=highlight,
 934            end_context=end_context,
 935        )
 936
 937        if self.error_level == ErrorLevel.IMMEDIATE:
 938            raise error
 939
 940        self.errors.append(error)
 941
 942    def expression(
 943        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 944    ) -> E:
 945        """
 946        Creates a new, validated Expression.
 947
 948        Args:
 949            exp_class: the expression class to instantiate.
 950            comments: an optional list of comments to attach to the expression.
 951            kwargs: the arguments to set for the expression along with their respective values.
 952
 953        Returns:
 954            The target expression.
 955        """
 956        instance = exp_class(**kwargs)
 957        instance.add_comments(comments) if comments else self._add_comments(instance)
 958        self.validate_expression(instance)
 959        return instance
 960
 961    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 962        if expression and self._prev_comments:
 963            expression.add_comments(self._prev_comments)
 964            self._prev_comments = None
 965
 966    def validate_expression(
 967        self, expression: exp.Expression, args: t.Optional[t.List] = None
 968    ) -> None:
 969        """
 970        Validates an already instantiated expression, making sure that all its mandatory arguments
 971        are set.
 972
 973        Args:
 974            expression: the expression to validate.
 975            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 976        """
 977        if self.error_level == ErrorLevel.IGNORE:
 978            return
 979
 980        for error_message in expression.error_messages(args):
 981            self.raise_error(error_message)
 982
 983    def _find_sql(self, start: Token, end: Token) -> str:
 984        return self.sql[start.start : end.end + 1]
 985
 986    def _advance(self, times: int = 1) -> None:
 987        self._index += times
 988        self._curr = seq_get(self._tokens, self._index)
 989        self._next = seq_get(self._tokens, self._index + 1)
 990        if self._index > 0:
 991            self._prev = self._tokens[self._index - 1]
 992            self._prev_comments = self._prev.comments
 993        else:
 994            self._prev = None
 995            self._prev_comments = None
 996
 997    def _retreat(self, index: int) -> None:
 998        if index != self._index:
 999            self._advance(index - self._index)
1000
1001    def _parse_command(self) -> exp.Command:
1002        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1003
1004    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1005        start = self._prev
1006        exists = self._parse_exists() if allow_exists else None
1007
1008        self._match(TokenType.ON)
1009
1010        kind = self._match_set(self.CREATABLES) and self._prev
1011
1012        if not kind:
1013            return self._parse_as_command(start)
1014
1015        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1016            this = self._parse_user_defined_function(kind=kind.token_type)
1017        elif kind.token_type == TokenType.TABLE:
1018            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1019        elif kind.token_type == TokenType.COLUMN:
1020            this = self._parse_column()
1021        else:
1022            this = self._parse_id_var()
1023
1024        self._match(TokenType.IS)
1025
1026        return self.expression(
1027            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1028        )
1029
1030    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1031    def _parse_ttl(self) -> exp.Expression:
1032        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1033            this = self._parse_bitwise()
1034
1035            if self._match_text_seq("DELETE"):
1036                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1037            if self._match_text_seq("RECOMPRESS"):
1038                return self.expression(
1039                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1040                )
1041            if self._match_text_seq("TO", "DISK"):
1042                return self.expression(
1043                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1044                )
1045            if self._match_text_seq("TO", "VOLUME"):
1046                return self.expression(
1047                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1048                )
1049
1050            return this
1051
1052        expressions = self._parse_csv(_parse_ttl_action)
1053        where = self._parse_where()
1054        group = self._parse_group()
1055
1056        aggregates = None
1057        if group and self._match(TokenType.SET):
1058            aggregates = self._parse_csv(self._parse_set_item)
1059
1060        return self.expression(
1061            exp.MergeTreeTTL,
1062            expressions=expressions,
1063            where=where,
1064            group=group,
1065            aggregates=aggregates,
1066        )
1067
1068    def _parse_statement(self) -> t.Optional[exp.Expression]:
1069        if self._curr is None:
1070            return None
1071
1072        if self._match_set(self.STATEMENT_PARSERS):
1073            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1074
1075        if self._match_set(Tokenizer.COMMANDS):
1076            return self._parse_command()
1077
1078        expression = self._parse_expression()
1079        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1080        return self._parse_query_modifiers(expression)
1081
1082    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1083        start = self._prev
1084        temporary = self._match(TokenType.TEMPORARY)
1085        materialized = self._match_text_seq("MATERIALIZED")
1086        kind = self._match_set(self.CREATABLES) and self._prev.text
1087        if not kind:
1088            return self._parse_as_command(start)
1089
1090        return self.expression(
1091            exp.Drop,
1092            exists=self._parse_exists(),
1093            this=self._parse_table(schema=True),
1094            kind=kind,
1095            temporary=temporary,
1096            materialized=materialized,
1097            cascade=self._match_text_seq("CASCADE"),
1098            constraints=self._match_text_seq("CONSTRAINTS"),
1099            purge=self._match_text_seq("PURGE"),
1100        )
1101
1102    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1103        return (
1104            self._match(TokenType.IF)
1105            and (not not_ or self._match(TokenType.NOT))
1106            and self._match(TokenType.EXISTS)
1107        )
1108
1109    def _parse_create(self) -> t.Optional[exp.Expression]:
1110        start = self._prev
1111        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1112            TokenType.OR, TokenType.REPLACE
1113        )
1114        unique = self._match(TokenType.UNIQUE)
1115
1116        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1117            self._match(TokenType.TABLE)
1118
1119        properties = None
1120        create_token = self._match_set(self.CREATABLES) and self._prev
1121
1122        if not create_token:
1123            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1124            create_token = self._match_set(self.CREATABLES) and self._prev
1125
1126            if not properties or not create_token:
1127                return self._parse_as_command(start)
1128
1129        exists = self._parse_exists(not_=True)
1130        this = None
1131        expression = None
1132        indexes = None
1133        no_schema_binding = None
1134        begin = None
1135        clone = None
1136
1137        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1138            this = self._parse_user_defined_function(kind=create_token.token_type)
1139            temp_properties = self._parse_properties()
1140            if properties and temp_properties:
1141                properties.expressions.extend(temp_properties.expressions)
1142            elif temp_properties:
1143                properties = temp_properties
1144
1145            self._match(TokenType.ALIAS)
1146            begin = self._match(TokenType.BEGIN)
1147            return_ = self._match_text_seq("RETURN")
1148            expression = self._parse_statement()
1149
1150            if return_:
1151                expression = self.expression(exp.Return, this=expression)
1152        elif create_token.token_type == TokenType.INDEX:
1153            this = self._parse_index(index=self._parse_id_var())
1154        elif create_token.token_type in self.DB_CREATABLES:
1155            table_parts = self._parse_table_parts(schema=True)
1156
1157            # exp.Properties.Location.POST_NAME
1158            if self._match(TokenType.COMMA):
1159                temp_properties = self._parse_properties(before=True)
1160                if properties and temp_properties:
1161                    properties.expressions.extend(temp_properties.expressions)
1162                elif temp_properties:
1163                    properties = temp_properties
1164
1165            this = self._parse_schema(this=table_parts)
1166
1167            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1168            temp_properties = self._parse_properties()
1169            if properties and temp_properties:
1170                properties.expressions.extend(temp_properties.expressions)
1171            elif temp_properties:
1172                properties = temp_properties
1173
1174            self._match(TokenType.ALIAS)
1175
1176            # exp.Properties.Location.POST_ALIAS
1177            if not (
1178                self._match(TokenType.SELECT, advance=False)
1179                or self._match(TokenType.WITH, advance=False)
1180                or self._match(TokenType.L_PAREN, advance=False)
1181            ):
1182                temp_properties = self._parse_properties()
1183                if properties and temp_properties:
1184                    properties.expressions.extend(temp_properties.expressions)
1185                elif temp_properties:
1186                    properties = temp_properties
1187
1188            expression = self._parse_ddl_select()
1189
1190            if create_token.token_type == TokenType.TABLE:
1191                indexes = []
1192                while True:
1193                    index = self._parse_index()
1194
1195                    # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX
1196                    temp_properties = self._parse_properties()
1197                    if properties and temp_properties:
1198                        properties.expressions.extend(temp_properties.expressions)
1199                    elif temp_properties:
1200                        properties = temp_properties
1201
1202                    if not index:
1203                        break
1204                    else:
1205                        self._match(TokenType.COMMA)
1206                        indexes.append(index)
1207            elif create_token.token_type == TokenType.VIEW:
1208                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1209                    no_schema_binding = True
1210
1211            if self._match_text_seq("CLONE"):
1212                clone = self._parse_table(schema=True)
1213                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1214                clone_kind = (
1215                    self._match(TokenType.L_PAREN)
1216                    and self._match_texts(self.CLONE_KINDS)
1217                    and self._prev.text.upper()
1218                )
1219                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1220                self._match(TokenType.R_PAREN)
1221                clone = self.expression(
1222                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1223                )
1224
1225        return self.expression(
1226            exp.Create,
1227            this=this,
1228            kind=create_token.text,
1229            replace=replace,
1230            unique=unique,
1231            expression=expression,
1232            exists=exists,
1233            properties=properties,
1234            indexes=indexes,
1235            no_schema_binding=no_schema_binding,
1236            begin=begin,
1237            clone=clone,
1238        )
1239
1240    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1241        # only used for teradata currently
1242        self._match(TokenType.COMMA)
1243
1244        kwargs = {
1245            "no": self._match_text_seq("NO"),
1246            "dual": self._match_text_seq("DUAL"),
1247            "before": self._match_text_seq("BEFORE"),
1248            "default": self._match_text_seq("DEFAULT"),
1249            "local": (self._match_text_seq("LOCAL") and "LOCAL")
1250            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1251            "after": self._match_text_seq("AFTER"),
1252            "minimum": self._match_texts(("MIN", "MINIMUM")),
1253            "maximum": self._match_texts(("MAX", "MAXIMUM")),
1254        }
1255
1256        if self._match_texts(self.PROPERTY_PARSERS):
1257            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1258            try:
1259                return parser(self, **{k: v for k, v in kwargs.items() if v})
1260            except TypeError:
1261                self.raise_error(f"Cannot parse property '{self._prev.text}'")
1262
1263        return None
1264
1265    def _parse_property(self) -> t.Optional[exp.Expression]:
1266        if self._match_texts(self.PROPERTY_PARSERS):
1267            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1268
1269        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1270            return self._parse_character_set(default=True)
1271
1272        if self._match_text_seq("COMPOUND", "SORTKEY"):
1273            return self._parse_sortkey(compound=True)
1274
1275        if self._match_text_seq("SQL", "SECURITY"):
1276            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1277
1278        assignment = self._match_pair(
1279            TokenType.VAR, TokenType.EQ, advance=False
1280        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1281
1282        if assignment:
1283            key = self._parse_var_or_string()
1284            self._match(TokenType.EQ)
1285            return self.expression(exp.Property, this=key, value=self._parse_column())
1286
1287        return None
1288
1289    def _parse_stored(self) -> exp.Expression:
1290        self._match(TokenType.ALIAS)
1291
1292        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1293        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1294
1295        return self.expression(
1296            exp.FileFormatProperty,
1297            this=self.expression(
1298                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1299            )
1300            if input_format or output_format
1301            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1302        )
1303
1304    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1305        self._match(TokenType.EQ)
1306        self._match(TokenType.ALIAS)
1307        return self.expression(exp_class, this=self._parse_field())
1308
1309    def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]:
1310        properties = []
1311
1312        while True:
1313            if before:
1314                prop = self._parse_property_before()
1315            else:
1316                prop = self._parse_property()
1317
1318            if not prop:
1319                break
1320            for p in ensure_list(prop):
1321                properties.append(p)
1322
1323        if properties:
1324            return self.expression(exp.Properties, expressions=properties)
1325
1326        return None
1327
1328    def _parse_fallback(self, no: bool = False) -> exp.Expression:
1329        return self.expression(
1330            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1331        )
1332
1333    def _parse_volatile_property(self) -> exp.Expression:
1334        if self._index >= 2:
1335            pre_volatile_token = self._tokens[self._index - 2]
1336        else:
1337            pre_volatile_token = None
1338
1339        if pre_volatile_token and pre_volatile_token.token_type in (
1340            TokenType.CREATE,
1341            TokenType.REPLACE,
1342            TokenType.UNIQUE,
1343        ):
1344            return exp.VolatileProperty()
1345
1346        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1347
1348    def _parse_with_property(
1349        self,
1350    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1351        self._match(TokenType.WITH)
1352        if self._match(TokenType.L_PAREN, advance=False):
1353            return self._parse_wrapped_csv(self._parse_property)
1354
1355        if self._match_text_seq("JOURNAL"):
1356            return self._parse_withjournaltable()
1357
1358        if self._match_text_seq("DATA"):
1359            return self._parse_withdata(no=False)
1360        elif self._match_text_seq("NO", "DATA"):
1361            return self._parse_withdata(no=True)
1362
1363        if not self._next:
1364            return None
1365
1366        return self._parse_withisolatedloading()
1367
1368    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1369    def _parse_definer(self) -> t.Optional[exp.Expression]:
1370        self._match(TokenType.EQ)
1371
1372        user = self._parse_id_var()
1373        self._match(TokenType.PARAMETER)
1374        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1375
1376        if not user or not host:
1377            return None
1378
1379        return exp.DefinerProperty(this=f"{user}@{host}")
1380
1381    def _parse_withjournaltable(self) -> exp.Expression:
1382        self._match(TokenType.TABLE)
1383        self._match(TokenType.EQ)
1384        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1385
1386    def _parse_log(self, no: bool = False) -> exp.Expression:
1387        return self.expression(exp.LogProperty, no=no)
1388
1389    def _parse_journal(self, **kwargs) -> exp.Expression:
1390        return self.expression(exp.JournalProperty, **kwargs)
1391
1392    def _parse_checksum(self) -> exp.Expression:
1393        self._match(TokenType.EQ)
1394
1395        on = None
1396        if self._match(TokenType.ON):
1397            on = True
1398        elif self._match_text_seq("OFF"):
1399            on = False
1400        default = self._match(TokenType.DEFAULT)
1401
1402        return self.expression(
1403            exp.ChecksumProperty,
1404            on=on,
1405            default=default,
1406        )
1407
1408    def _parse_cluster(self) -> t.Optional[exp.Expression]:
1409        if not self._match_text_seq("BY"):
1410            self._retreat(self._index - 1)
1411            return None
1412        return self.expression(
1413            exp.Cluster,
1414            expressions=self._parse_csv(self._parse_ordered),
1415        )
1416
1417    def _parse_freespace(self) -> exp.Expression:
1418        self._match(TokenType.EQ)
1419        return self.expression(
1420            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1421        )
1422
1423    def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression:
1424        if self._match(TokenType.EQ):
1425            return self.expression(
1426                exp.MergeBlockRatioProperty,
1427                this=self._parse_number(),
1428                percent=self._match(TokenType.PERCENT),
1429            )
1430        return self.expression(
1431            exp.MergeBlockRatioProperty,
1432            no=no,
1433            default=default,
1434        )
1435
1436    def _parse_datablocksize(
1437        self,
1438        default: t.Optional[bool] = None,
1439        minimum: t.Optional[bool] = None,
1440        maximum: t.Optional[bool] = None,
1441    ) -> exp.Expression:
1442        self._match(TokenType.EQ)
1443        size = self._parse_number()
1444        units = None
1445        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1446            units = self._prev.text
1447        return self.expression(
1448            exp.DataBlocksizeProperty,
1449            size=size,
1450            units=units,
1451            default=default,
1452            minimum=minimum,
1453            maximum=maximum,
1454        )
1455
1456    def _parse_blockcompression(self) -> exp.Expression:
1457        self._match(TokenType.EQ)
1458        always = self._match_text_seq("ALWAYS")
1459        manual = self._match_text_seq("MANUAL")
1460        never = self._match_text_seq("NEVER")
1461        default = self._match_text_seq("DEFAULT")
1462        autotemp = None
1463        if self._match_text_seq("AUTOTEMP"):
1464            autotemp = self._parse_schema()
1465
1466        return self.expression(
1467            exp.BlockCompressionProperty,
1468            always=always,
1469            manual=manual,
1470            never=never,
1471            default=default,
1472            autotemp=autotemp,
1473        )
1474
1475    def _parse_withisolatedloading(self) -> exp.Expression:
1476        no = self._match_text_seq("NO")
1477        concurrent = self._match_text_seq("CONCURRENT")
1478        self._match_text_seq("ISOLATED", "LOADING")
1479        for_all = self._match_text_seq("FOR", "ALL")
1480        for_insert = self._match_text_seq("FOR", "INSERT")
1481        for_none = self._match_text_seq("FOR", "NONE")
1482        return self.expression(
1483            exp.IsolatedLoadingProperty,
1484            no=no,
1485            concurrent=concurrent,
1486            for_all=for_all,
1487            for_insert=for_insert,
1488            for_none=for_none,
1489        )
1490
1491    def _parse_locking(self) -> exp.Expression:
1492        if self._match(TokenType.TABLE):
1493            kind = "TABLE"
1494        elif self._match(TokenType.VIEW):
1495            kind = "VIEW"
1496        elif self._match(TokenType.ROW):
1497            kind = "ROW"
1498        elif self._match_text_seq("DATABASE"):
1499            kind = "DATABASE"
1500        else:
1501            kind = None
1502
1503        if kind in ("DATABASE", "TABLE", "VIEW"):
1504            this = self._parse_table_parts()
1505        else:
1506            this = None
1507
1508        if self._match(TokenType.FOR):
1509            for_or_in = "FOR"
1510        elif self._match(TokenType.IN):
1511            for_or_in = "IN"
1512        else:
1513            for_or_in = None
1514
1515        if self._match_text_seq("ACCESS"):
1516            lock_type = "ACCESS"
1517        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1518            lock_type = "EXCLUSIVE"
1519        elif self._match_text_seq("SHARE"):
1520            lock_type = "SHARE"
1521        elif self._match_text_seq("READ"):
1522            lock_type = "READ"
1523        elif self._match_text_seq("WRITE"):
1524            lock_type = "WRITE"
1525        elif self._match_text_seq("CHECKSUM"):
1526            lock_type = "CHECKSUM"
1527        else:
1528            lock_type = None
1529
1530        override = self._match_text_seq("OVERRIDE")
1531
1532        return self.expression(
1533            exp.LockingProperty,
1534            this=this,
1535            kind=kind,
1536            for_or_in=for_or_in,
1537            lock_type=lock_type,
1538            override=override,
1539        )
1540
1541    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1542        if self._match(TokenType.PARTITION_BY):
1543            return self._parse_csv(self._parse_conjunction)
1544        return []
1545
1546    def _parse_partitioned_by(self) -> exp.Expression:
1547        self._match(TokenType.EQ)
1548        return self.expression(
1549            exp.PartitionedByProperty,
1550            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1551        )
1552
1553    def _parse_withdata(self, no: bool = False) -> exp.Expression:
1554        if self._match_text_seq("AND", "STATISTICS"):
1555            statistics = True
1556        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1557            statistics = False
1558        else:
1559            statistics = None
1560
1561        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1562
1563    def _parse_no_property(self) -> t.Optional[exp.Property]:
1564        if self._match_text_seq("PRIMARY", "INDEX"):
1565            return exp.NoPrimaryIndexProperty()
1566        return None
1567
1568    def _parse_on_property(self) -> t.Optional[exp.Property]:
1569        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1570            return exp.OnCommitProperty()
1571        elif self._match_text_seq("COMMIT", "DELETE", "ROWS"):
1572            return exp.OnCommitProperty(delete=True)
1573        return None
1574
1575    def _parse_distkey(self) -> exp.Expression:
1576        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1577
1578    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1579        table = self._parse_table(schema=True)
1580        options = []
1581        while self._match_texts(("INCLUDING", "EXCLUDING")):
1582            this = self._prev.text.upper()
1583            id_var = self._parse_id_var()
1584
1585            if not id_var:
1586                return None
1587
1588            options.append(
1589                self.expression(
1590                    exp.Property,
1591                    this=this,
1592                    value=exp.Var(this=id_var.this.upper()),
1593                )
1594            )
1595        return self.expression(exp.LikeProperty, this=table, expressions=options)
1596
1597    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1598        return self.expression(
1599            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1600        )
1601
1602    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1603        self._match(TokenType.EQ)
1604        return self.expression(
1605            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1606        )
1607
1608    def _parse_returns(self) -> exp.Expression:
1609        value: t.Optional[exp.Expression]
1610        is_table = self._match(TokenType.TABLE)
1611
1612        if is_table:
1613            if self._match(TokenType.LT):
1614                value = self.expression(
1615                    exp.Schema,
1616                    this="TABLE",
1617                    expressions=self._parse_csv(self._parse_struct_types),
1618                )
1619                if not self._match(TokenType.GT):
1620                    self.raise_error("Expecting >")
1621            else:
1622                value = self._parse_schema(exp.Var(this="TABLE"))
1623        else:
1624            value = self._parse_types()
1625
1626        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1627
1628    def _parse_describe(self) -> exp.Expression:
1629        kind = self._match_set(self.CREATABLES) and self._prev.text
1630        this = self._parse_table()
1631
1632        return self.expression(exp.Describe, this=this, kind=kind)
1633
1634    def _parse_insert(self) -> exp.Expression:
1635        overwrite = self._match(TokenType.OVERWRITE)
1636        local = self._match_text_seq("LOCAL")
1637        alternative = None
1638
1639        if self._match_text_seq("DIRECTORY"):
1640            this: t.Optional[exp.Expression] = self.expression(
1641                exp.Directory,
1642                this=self._parse_var_or_string(),
1643                local=local,
1644                row_format=self._parse_row_format(match_row=True),
1645            )
1646        else:
1647            if self._match(TokenType.OR):
1648                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1649
1650            self._match(TokenType.INTO)
1651            self._match(TokenType.TABLE)
1652            this = self._parse_table(schema=True)
1653
1654        return self.expression(
1655            exp.Insert,
1656            this=this,
1657            exists=self._parse_exists(),
1658            partition=self._parse_partition(),
1659            expression=self._parse_ddl_select(),
1660            conflict=self._parse_on_conflict(),
1661            returning=self._parse_returning(),
1662            overwrite=overwrite,
1663            alternative=alternative,
1664        )
1665
1666    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1667        conflict = self._match_text_seq("ON", "CONFLICT")
1668        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1669
1670        if not (conflict or duplicate):
1671            return None
1672
1673        nothing = None
1674        expressions = None
1675        key = None
1676        constraint = None
1677
1678        if conflict:
1679            if self._match_text_seq("ON", "CONSTRAINT"):
1680                constraint = self._parse_id_var()
1681            else:
1682                key = self._parse_csv(self._parse_value)
1683
1684        self._match_text_seq("DO")
1685        if self._match_text_seq("NOTHING"):
1686            nothing = True
1687        else:
1688            self._match(TokenType.UPDATE)
1689            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1690
1691        return self.expression(
1692            exp.OnConflict,
1693            duplicate=duplicate,
1694            expressions=expressions,
1695            nothing=nothing,
1696            key=key,
1697            constraint=constraint,
1698        )
1699
1700    def _parse_returning(self) -> t.Optional[exp.Expression]:
1701        if not self._match(TokenType.RETURNING):
1702            return None
1703
1704        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1705
1706    def _parse_row(self) -> t.Optional[exp.Expression]:
1707        if not self._match(TokenType.FORMAT):
1708            return None
1709        return self._parse_row_format()
1710
1711    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1712        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1713            return None
1714
1715        if self._match_text_seq("SERDE"):
1716            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1717
1718        self._match_text_seq("DELIMITED")
1719
1720        kwargs = {}
1721
1722        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1723            kwargs["fields"] = self._parse_string()
1724            if self._match_text_seq("ESCAPED", "BY"):
1725                kwargs["escaped"] = self._parse_string()
1726        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1727            kwargs["collection_items"] = self._parse_string()
1728        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1729            kwargs["map_keys"] = self._parse_string()
1730        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1731            kwargs["lines"] = self._parse_string()
1732        if self._match_text_seq("NULL", "DEFINED", "AS"):
1733            kwargs["null"] = self._parse_string()
1734
1735        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1736
1737    def _parse_load(self) -> exp.Expression:
1738        if self._match_text_seq("DATA"):
1739            local = self._match_text_seq("LOCAL")
1740            self._match_text_seq("INPATH")
1741            inpath = self._parse_string()
1742            overwrite = self._match(TokenType.OVERWRITE)
1743            self._match_pair(TokenType.INTO, TokenType.TABLE)
1744
1745            return self.expression(
1746                exp.LoadData,
1747                this=self._parse_table(schema=True),
1748                local=local,
1749                overwrite=overwrite,
1750                inpath=inpath,
1751                partition=self._parse_partition(),
1752                input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1753                serde=self._match_text_seq("SERDE") and self._parse_string(),
1754            )
1755        return self._parse_as_command(self._prev)
1756
1757    def _parse_delete(self) -> exp.Expression:
1758        self._match(TokenType.FROM)
1759
1760        return self.expression(
1761            exp.Delete,
1762            this=self._parse_table(),
1763            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1764            where=self._parse_where(),
1765            returning=self._parse_returning(),
1766        )
1767
1768    def _parse_update(self) -> exp.Expression:
1769        return self.expression(
1770            exp.Update,
1771            **{  # type: ignore
1772                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1773                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1774                "from": self._parse_from(modifiers=True),
1775                "where": self._parse_where(),
1776                "returning": self._parse_returning(),
1777            },
1778        )
1779
1780    def _parse_uncache(self) -> exp.Expression:
1781        if not self._match(TokenType.TABLE):
1782            self.raise_error("Expecting TABLE after UNCACHE")
1783
1784        return self.expression(
1785            exp.Uncache,
1786            exists=self._parse_exists(),
1787            this=self._parse_table(schema=True),
1788        )
1789
1790    def _parse_cache(self) -> exp.Expression:
1791        lazy = self._match_text_seq("LAZY")
1792        self._match(TokenType.TABLE)
1793        table = self._parse_table(schema=True)
1794        options = []
1795
1796        if self._match_text_seq("OPTIONS"):
1797            self._match_l_paren()
1798            k = self._parse_string()
1799            self._match(TokenType.EQ)
1800            v = self._parse_string()
1801            options = [k, v]
1802            self._match_r_paren()
1803
1804        self._match(TokenType.ALIAS)
1805        return self.expression(
1806            exp.Cache,
1807            this=table,
1808            lazy=lazy,
1809            options=options,
1810            expression=self._parse_select(nested=True),
1811        )
1812
1813    def _parse_partition(self) -> t.Optional[exp.Expression]:
1814        if not self._match(TokenType.PARTITION):
1815            return None
1816
1817        return self.expression(
1818            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1819        )
1820
1821    def _parse_value(self) -> exp.Expression:
1822        if self._match(TokenType.L_PAREN):
1823            expressions = self._parse_csv(self._parse_conjunction)
1824            self._match_r_paren()
1825            return self.expression(exp.Tuple, expressions=expressions)
1826
1827        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1828        # Source: https://prestodb.io/docs/current/sql/values.html
1829        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1830
1831    def _parse_select(
1832        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1833    ) -> t.Optional[exp.Expression]:
1834        cte = self._parse_with()
1835        if cte:
1836            this = self._parse_statement()
1837
1838            if not this:
1839                self.raise_error("Failed to parse any statement following CTE")
1840                return cte
1841
1842            if "with" in this.arg_types:
1843                this.set("with", cte)
1844            else:
1845                self.raise_error(f"{this.key} does not support CTE")
1846                this = cte
1847        elif self._match(TokenType.SELECT):
1848            comments = self._prev_comments
1849
1850            hint = self._parse_hint()
1851            all_ = self._match(TokenType.ALL)
1852            distinct = self._match(TokenType.DISTINCT)
1853
1854            kind = (
1855                self._match(TokenType.ALIAS)
1856                and self._match_texts(("STRUCT", "VALUE"))
1857                and self._prev.text
1858            )
1859
1860            if distinct:
1861                distinct = self.expression(
1862                    exp.Distinct,
1863                    on=self._parse_value() if self._match(TokenType.ON) else None,
1864                )
1865
1866            if all_ and distinct:
1867                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1868
1869            limit = self._parse_limit(top=True)
1870            expressions = self._parse_csv(self._parse_expression)
1871
1872            this = self.expression(
1873                exp.Select,
1874                kind=kind,
1875                hint=hint,
1876                distinct=distinct,
1877                expressions=expressions,
1878                limit=limit,
1879            )
1880            this.comments = comments
1881
1882            into = self._parse_into()
1883            if into:
1884                this.set("into", into)
1885
1886            from_ = self._parse_from()
1887            if from_:
1888                this.set("from", from_)
1889
1890            this = self._parse_query_modifiers(this)
1891        elif (table or nested) and self._match(TokenType.L_PAREN):
1892            this = self._parse_table() if table else self._parse_select(nested=True)
1893            this = self._parse_set_operations(self._parse_query_modifiers(this))
1894            self._match_r_paren()
1895
1896            # early return so that subquery unions aren't parsed again
1897            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1898            # Union ALL should be a property of the top select node, not the subquery
1899            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1900        elif self._match(TokenType.VALUES):
1901            this = self.expression(
1902                exp.Values,
1903                expressions=self._parse_csv(self._parse_value),
1904                alias=self._parse_table_alias(),
1905            )
1906        elif self._match(TokenType.PIVOT):
1907            this = self._parse_simplified_pivot()
1908        elif self._match(TokenType.FROM):
1909            this = exp.select("*").from_(t.cast(exp.From, self._parse_from(skip_from_token=True)))
1910        else:
1911            this = None
1912
1913        return self._parse_set_operations(this)
1914
1915    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1916        if not skip_with_token and not self._match(TokenType.WITH):
1917            return None
1918
1919        comments = self._prev_comments
1920        recursive = self._match(TokenType.RECURSIVE)
1921
1922        expressions = []
1923        while True:
1924            expressions.append(self._parse_cte())
1925
1926            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1927                break
1928            else:
1929                self._match(TokenType.WITH)
1930
1931        return self.expression(
1932            exp.With, comments=comments, expressions=expressions, recursive=recursive
1933        )
1934
1935    def _parse_cte(self) -> exp.Expression:
1936        alias = self._parse_table_alias()
1937        if not alias or not alias.this:
1938            self.raise_error("Expected CTE to have alias")
1939
1940        self._match(TokenType.ALIAS)
1941
1942        return self.expression(
1943            exp.CTE,
1944            this=self._parse_wrapped(self._parse_statement),
1945            alias=alias,
1946        )
1947
1948    def _parse_table_alias(
1949        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1950    ) -> t.Optional[exp.Expression]:
1951        any_token = self._match(TokenType.ALIAS)
1952        alias = (
1953            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1954            or self._parse_string_as_identifier()
1955        )
1956
1957        index = self._index
1958        if self._match(TokenType.L_PAREN):
1959            columns = self._parse_csv(self._parse_function_parameter)
1960            self._match_r_paren() if columns else self._retreat(index)
1961        else:
1962            columns = None
1963
1964        if not alias and not columns:
1965            return None
1966
1967        return self.expression(exp.TableAlias, this=alias, columns=columns)
1968
1969    def _parse_subquery(
1970        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1971    ) -> t.Optional[exp.Expression]:
1972        if not this:
1973            return None
1974        return self.expression(
1975            exp.Subquery,
1976            this=this,
1977            pivots=self._parse_pivots(),
1978            alias=self._parse_table_alias() if parse_alias else None,
1979        )
1980
1981    def _parse_query_modifiers(
1982        self, this: t.Optional[exp.Expression]
1983    ) -> t.Optional[exp.Expression]:
1984        if isinstance(this, self.MODIFIABLES):
1985            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1986                expression = parser(self)
1987
1988                if expression:
1989                    this.set(key, expression)
1990        return this
1991
1992    def _parse_hint(self) -> t.Optional[exp.Expression]:
1993        if self._match(TokenType.HINT):
1994            hints = self._parse_csv(self._parse_function)
1995            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1996                self.raise_error("Expected */ after HINT")
1997            return self.expression(exp.Hint, expressions=hints)
1998
1999        return None
2000
2001    def _parse_into(self) -> t.Optional[exp.Expression]:
2002        if not self._match(TokenType.INTO):
2003            return None
2004
2005        temp = self._match(TokenType.TEMPORARY)
2006        unlogged = self._match_text_seq("UNLOGGED")
2007        self._match(TokenType.TABLE)
2008
2009        return self.expression(
2010            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2011        )
2012
2013    def _parse_from(
2014        self, modifiers: bool = False, skip_from_token: bool = False
2015    ) -> t.Optional[exp.From]:
2016        if not skip_from_token and not self._match(TokenType.FROM):
2017            return None
2018
2019        comments = self._prev_comments
2020        this = self._parse_table()
2021
2022        return self.expression(
2023            exp.From,
2024            comments=comments,
2025            this=self._parse_query_modifiers(this) if modifiers else this,
2026        )
2027
2028    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2029        if not self._match(TokenType.MATCH_RECOGNIZE):
2030            return None
2031
2032        self._match_l_paren()
2033
2034        partition = self._parse_partition_by()
2035        order = self._parse_order()
2036        measures = (
2037            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2038        )
2039
2040        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2041            rows = exp.Var(this="ONE ROW PER MATCH")
2042        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2043            text = "ALL ROWS PER MATCH"
2044            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2045                text += f" SHOW EMPTY MATCHES"
2046            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2047                text += f" OMIT EMPTY MATCHES"
2048            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2049                text += f" WITH UNMATCHED ROWS"
2050            rows = exp.Var(this=text)
2051        else:
2052            rows = None
2053
2054        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2055            text = "AFTER MATCH SKIP"
2056            if self._match_text_seq("PAST", "LAST", "ROW"):
2057                text += f" PAST LAST ROW"
2058            elif self._match_text_seq("TO", "NEXT", "ROW"):
2059                text += f" TO NEXT ROW"
2060            elif self._match_text_seq("TO", "FIRST"):
2061                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2062            elif self._match_text_seq("TO", "LAST"):
2063                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2064            after = exp.Var(this=text)
2065        else:
2066            after = None
2067
2068        if self._match_text_seq("PATTERN"):
2069            self._match_l_paren()
2070
2071            if not self._curr:
2072                self.raise_error("Expecting )", self._curr)
2073
2074            paren = 1
2075            start = self._curr
2076
2077            while self._curr and paren > 0:
2078                if self._curr.token_type == TokenType.L_PAREN:
2079                    paren += 1
2080                if self._curr.token_type == TokenType.R_PAREN:
2081                    paren -= 1
2082                end = self._prev
2083                self._advance()
2084            if paren > 0:
2085                self.raise_error("Expecting )", self._curr)
2086            pattern = exp.Var(this=self._find_sql(start, end))
2087        else:
2088            pattern = None
2089
2090        define = (
2091            self._parse_csv(
2092                lambda: self.expression(
2093                    exp.Alias,
2094                    alias=self._parse_id_var(any_token=True),
2095                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2096                )
2097            )
2098            if self._match_text_seq("DEFINE")
2099            else None
2100        )
2101
2102        self._match_r_paren()
2103
2104        return self.expression(
2105            exp.MatchRecognize,
2106            partition_by=partition,
2107            order=order,
2108            measures=measures,
2109            rows=rows,
2110            after=after,
2111            pattern=pattern,
2112            define=define,
2113            alias=self._parse_table_alias(),
2114        )
2115
2116    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2117        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2118        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2119
2120        if outer_apply or cross_apply:
2121            this = self._parse_select(table=True)
2122            view = None
2123            outer = not cross_apply
2124        elif self._match(TokenType.LATERAL):
2125            this = self._parse_select(table=True)
2126            view = self._match(TokenType.VIEW)
2127            outer = self._match(TokenType.OUTER)
2128        else:
2129            return None
2130
2131        if not this:
2132            this = self._parse_function() or self._parse_id_var(any_token=False)
2133            while self._match(TokenType.DOT):
2134                this = exp.Dot(
2135                    this=this,
2136                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2137                )
2138
2139        table_alias: t.Optional[exp.Expression]
2140
2141        if view:
2142            table = self._parse_id_var(any_token=False)
2143            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2144            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2145        else:
2146            table_alias = self._parse_table_alias()
2147
2148        expression = self.expression(
2149            exp.Lateral,
2150            this=this,
2151            view=view,
2152            outer=outer,
2153            alias=table_alias,
2154        )
2155
2156        return expression
2157
2158    def _parse_join_side_and_kind(
2159        self,
2160    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2161        return (
2162            self._match(TokenType.NATURAL) and self._prev,
2163            self._match_set(self.JOIN_SIDES) and self._prev,
2164            self._match_set(self.JOIN_KINDS) and self._prev,
2165        )
2166
2167    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2168        if self._match(TokenType.COMMA):
2169            return self.expression(exp.Join, this=self._parse_table())
2170
2171        index = self._index
2172        natural, side, kind = self._parse_join_side_and_kind()
2173        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2174        join = self._match(TokenType.JOIN)
2175
2176        if not skip_join_token and not join:
2177            self._retreat(index)
2178            kind = None
2179            natural = None
2180            side = None
2181
2182        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2183        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2184
2185        if not skip_join_token and not join and not outer_apply and not cross_apply:
2186            return None
2187
2188        if outer_apply:
2189            side = Token(TokenType.LEFT, "LEFT")
2190
2191        kwargs: t.Dict[
2192            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2193        ] = {"this": self._parse_table()}
2194
2195        if natural:
2196            kwargs["natural"] = True
2197        if side:
2198            kwargs["side"] = side.text
2199        if kind:
2200            kwargs["kind"] = kind.text
2201        if hint:
2202            kwargs["hint"] = hint
2203
2204        if self._match(TokenType.ON):
2205            kwargs["on"] = self._parse_conjunction()
2206        elif self._match(TokenType.USING):
2207            kwargs["using"] = self._parse_wrapped_id_vars()
2208
2209        return self.expression(exp.Join, **kwargs)  # type: ignore
2210
2211    def _parse_index(
2212        self,
2213        index: t.Optional[exp.Expression] = None,
2214    ) -> t.Optional[exp.Expression]:
2215        if index:
2216            unique = None
2217            primary = None
2218            amp = None
2219
2220            self._match(TokenType.ON)
2221            self._match(TokenType.TABLE)  # hive
2222            table = self._parse_table_parts(schema=True)
2223        else:
2224            unique = self._match(TokenType.UNIQUE)
2225            primary = self._match_text_seq("PRIMARY")
2226            amp = self._match_text_seq("AMP")
2227            if not self._match(TokenType.INDEX):
2228                return None
2229            index = self._parse_id_var()
2230            table = None
2231
2232        if self._match(TokenType.L_PAREN, advance=False):
2233            columns = self._parse_wrapped_csv(self._parse_ordered)
2234        else:
2235            columns = None
2236
2237        return self.expression(
2238            exp.Index,
2239            this=index,
2240            table=table,
2241            columns=columns,
2242            unique=unique,
2243            primary=primary,
2244            amp=amp,
2245            partition_by=self._parse_partition_by(),
2246        )
2247
2248    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2249        return (
2250            (not schema and self._parse_function())
2251            or self._parse_id_var(any_token=False)
2252            or self._parse_string_as_identifier()
2253            or self._parse_placeholder()
2254        )
2255
2256    def _parse_table_parts(self, schema: bool = False) -> exp.Table:
2257        catalog = None
2258        db = None
2259        table = self._parse_table_part(schema=schema)
2260
2261        while self._match(TokenType.DOT):
2262            if catalog:
2263                # This allows nesting the table in arbitrarily many dot expressions if needed
2264                table = self.expression(
2265                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2266                )
2267            else:
2268                catalog = db
2269                db = table
2270                table = self._parse_table_part(schema=schema)
2271
2272        if not table:
2273            self.raise_error(f"Expected table name but got {self._curr}")
2274
2275        return self.expression(
2276            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2277        )
2278
2279    def _parse_table(
2280        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2281    ) -> t.Optional[exp.Expression]:
2282        lateral = self._parse_lateral()
2283        if lateral:
2284            return lateral
2285
2286        unnest = self._parse_unnest()
2287        if unnest:
2288            return unnest
2289
2290        values = self._parse_derived_table_values()
2291        if values:
2292            return values
2293
2294        subquery = self._parse_select(table=True)
2295        if subquery:
2296            if not subquery.args.get("pivots"):
2297                subquery.set("pivots", self._parse_pivots())
2298            return subquery
2299
2300        this: exp.Expression = self._parse_table_parts(schema=schema)
2301
2302        if schema:
2303            return self._parse_schema(this=this)
2304
2305        if self.alias_post_tablesample:
2306            table_sample = self._parse_table_sample()
2307
2308        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2309        if alias:
2310            this.set("alias", alias)
2311
2312        if not this.args.get("pivots"):
2313            this.set("pivots", self._parse_pivots())
2314
2315        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2316            this.set(
2317                "hints",
2318                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2319            )
2320            self._match_r_paren()
2321
2322        if not self.alias_post_tablesample:
2323            table_sample = self._parse_table_sample()
2324
2325        if table_sample:
2326            table_sample.set("this", this)
2327            this = table_sample
2328
2329        return this
2330
2331    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2332        if not self._match(TokenType.UNNEST):
2333            return None
2334
2335        expressions = self._parse_wrapped_csv(self._parse_type)
2336        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2337        alias = self._parse_table_alias()
2338
2339        if alias and self.unnest_column_only:
2340            if alias.args.get("columns"):
2341                self.raise_error("Unexpected extra column alias in unnest.")
2342            alias.set("columns", [alias.this])
2343            alias.set("this", None)
2344
2345        offset = None
2346        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2347            self._match(TokenType.ALIAS)
2348            offset = self._parse_id_var() or exp.Identifier(this="offset")
2349
2350        return self.expression(
2351            exp.Unnest,
2352            expressions=expressions,
2353            ordinality=ordinality,
2354            alias=alias,
2355            offset=offset,
2356        )
2357
2358    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2359        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2360        if not is_derived and not self._match(TokenType.VALUES):
2361            return None
2362
2363        expressions = self._parse_csv(self._parse_value)
2364
2365        if is_derived:
2366            self._match_r_paren()
2367
2368        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2369
2370    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2371        if not self._match(TokenType.TABLE_SAMPLE) and not (
2372            as_modifier and self._match_text_seq("USING", "SAMPLE")
2373        ):
2374            return None
2375
2376        bucket_numerator = None
2377        bucket_denominator = None
2378        bucket_field = None
2379        percent = None
2380        rows = None
2381        size = None
2382        seed = None
2383
2384        kind = (
2385            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2386        )
2387        method = self._parse_var(tokens=(TokenType.ROW,))
2388
2389        self._match(TokenType.L_PAREN)
2390
2391        num = self._parse_number()
2392
2393        if self._match_text_seq("BUCKET"):
2394            bucket_numerator = self._parse_number()
2395            self._match_text_seq("OUT", "OF")
2396            bucket_denominator = bucket_denominator = self._parse_number()
2397            self._match(TokenType.ON)
2398            bucket_field = self._parse_field()
2399        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2400            percent = num
2401        elif self._match(TokenType.ROWS):
2402            rows = num
2403        else:
2404            size = num
2405
2406        self._match(TokenType.R_PAREN)
2407
2408        if self._match(TokenType.L_PAREN):
2409            method = self._parse_var()
2410            seed = self._match(TokenType.COMMA) and self._parse_number()
2411            self._match_r_paren()
2412        elif self._match_texts(("SEED", "REPEATABLE")):
2413            seed = self._parse_wrapped(self._parse_number)
2414
2415        return self.expression(
2416            exp.TableSample,
2417            method=method,
2418            bucket_numerator=bucket_numerator,
2419            bucket_denominator=bucket_denominator,
2420            bucket_field=bucket_field,
2421            percent=percent,
2422            rows=rows,
2423            size=size,
2424            seed=seed,
2425            kind=kind,
2426        )
2427
2428    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2429        return list(iter(self._parse_pivot, None))
2430
2431    # https://duckdb.org/docs/sql/statements/pivot
2432    def _parse_simplified_pivot(self) -> exp.Pivot:
2433        def _parse_on() -> t.Optional[exp.Expression]:
2434            this = self._parse_bitwise()
2435            return self._parse_in(this) if self._match(TokenType.IN) else this
2436
2437        this = self._parse_table()
2438        expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on)
2439        using = self._match(TokenType.USING) and self._parse_csv(
2440            lambda: self._parse_alias(self._parse_function())
2441        )
2442        group = self._parse_group()
2443        return self.expression(
2444            exp.Pivot, this=this, expressions=expressions, using=using, group=group
2445        )
2446
2447    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2448        index = self._index
2449
2450        if self._match(TokenType.PIVOT):
2451            unpivot = False
2452        elif self._match(TokenType.UNPIVOT):
2453            unpivot = True
2454        else:
2455            return None
2456
2457        expressions = []
2458        field = None
2459
2460        if not self._match(TokenType.L_PAREN):
2461            self._retreat(index)
2462            return None
2463
2464        if unpivot:
2465            expressions = self._parse_csv(self._parse_column)
2466        else:
2467            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2468
2469        if not expressions:
2470            self.raise_error("Failed to parse PIVOT's aggregation list")
2471
2472        if not self._match(TokenType.FOR):
2473            self.raise_error("Expecting FOR")
2474
2475        value = self._parse_column()
2476
2477        if not self._match(TokenType.IN):
2478            self.raise_error("Expecting IN")
2479
2480        field = self._parse_in(value, alias=True)
2481
2482        self._match_r_paren()
2483
2484        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2485
2486        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2487            pivot.set("alias", self._parse_table_alias())
2488
2489        if not unpivot:
2490            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2491
2492            columns: t.List[exp.Expression] = []
2493            for fld in pivot.args["field"].expressions:
2494                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2495                for name in names:
2496                    if self.PREFIXED_PIVOT_COLUMNS:
2497                        name = f"{name}_{field_name}" if name else field_name
2498                    else:
2499                        name = f"{field_name}_{name}" if name else field_name
2500
2501                    columns.append(exp.to_identifier(name))
2502
2503            pivot.set("columns", columns)
2504
2505        return pivot
2506
2507    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2508        return [agg.alias for agg in aggregations]
2509
2510    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2511        if not skip_where_token and not self._match(TokenType.WHERE):
2512            return None
2513
2514        return self.expression(
2515            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2516        )
2517
2518    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2519        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2520            return None
2521
2522        elements = defaultdict(list)
2523
2524        while True:
2525            expressions = self._parse_csv(self._parse_conjunction)
2526            if expressions:
2527                elements["expressions"].extend(expressions)
2528
2529            grouping_sets = self._parse_grouping_sets()
2530            if grouping_sets:
2531                elements["grouping_sets"].extend(grouping_sets)
2532
2533            rollup = None
2534            cube = None
2535            totals = None
2536
2537            with_ = self._match(TokenType.WITH)
2538            if self._match(TokenType.ROLLUP):
2539                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2540                elements["rollup"].extend(ensure_list(rollup))
2541
2542            if self._match(TokenType.CUBE):
2543                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2544                elements["cube"].extend(ensure_list(cube))
2545
2546            if self._match_text_seq("TOTALS"):
2547                totals = True
2548                elements["totals"] = True  # type: ignore
2549
2550            if not (grouping_sets or rollup or cube or totals):
2551                break
2552
2553        return self.expression(exp.Group, **elements)  # type: ignore
2554
2555    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2556        if not self._match(TokenType.GROUPING_SETS):
2557            return None
2558
2559        return self._parse_wrapped_csv(self._parse_grouping_set)
2560
2561    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2562        if self._match(TokenType.L_PAREN):
2563            grouping_set = self._parse_csv(self._parse_column)
2564            self._match_r_paren()
2565            return self.expression(exp.Tuple, expressions=grouping_set)
2566
2567        return self._parse_column()
2568
2569    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2570        if not skip_having_token and not self._match(TokenType.HAVING):
2571            return None
2572        return self.expression(exp.Having, this=self._parse_conjunction())
2573
2574    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2575        if not self._match(TokenType.QUALIFY):
2576            return None
2577        return self.expression(exp.Qualify, this=self._parse_conjunction())
2578
2579    def _parse_order(
2580        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2581    ) -> t.Optional[exp.Expression]:
2582        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2583            return this
2584
2585        return self.expression(
2586            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2587        )
2588
2589    def _parse_sort(
2590        self, exp_class: t.Type[exp.Expression], *texts: str
2591    ) -> t.Optional[exp.Expression]:
2592        if not self._match_text_seq(*texts):
2593            return None
2594        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2595
2596    def _parse_ordered(self) -> exp.Expression:
2597        this = self._parse_conjunction()
2598        self._match(TokenType.ASC)
2599        is_desc = self._match(TokenType.DESC)
2600        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
2601        is_nulls_last = self._match_text_seq("NULLS", "LAST")
2602        desc = is_desc or False
2603        asc = not desc
2604        nulls_first = is_nulls_first or False
2605        explicitly_null_ordered = is_nulls_first or is_nulls_last
2606        if (
2607            not explicitly_null_ordered
2608            and (
2609                (asc and self.null_ordering == "nulls_are_small")
2610                or (desc and self.null_ordering != "nulls_are_small")
2611            )
2612            and self.null_ordering != "nulls_are_last"
2613        ):
2614            nulls_first = True
2615
2616        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2617
2618    def _parse_limit(
2619        self, this: t.Optional[exp.Expression] = None, top: bool = False
2620    ) -> t.Optional[exp.Expression]:
2621        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2622            limit_paren = self._match(TokenType.L_PAREN)
2623            limit_exp = self.expression(
2624                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2625            )
2626
2627            if limit_paren:
2628                self._match_r_paren()
2629
2630            return limit_exp
2631
2632        if self._match(TokenType.FETCH):
2633            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2634            direction = self._prev.text if direction else "FIRST"
2635
2636            count = self._parse_number()
2637            percent = self._match(TokenType.PERCENT)
2638
2639            self._match_set((TokenType.ROW, TokenType.ROWS))
2640
2641            only = self._match_text_seq("ONLY")
2642            with_ties = self._match_text_seq("WITH", "TIES")
2643
2644            if only and with_ties:
2645                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2646
2647            return self.expression(
2648                exp.Fetch,
2649                direction=direction,
2650                count=count,
2651                percent=percent,
2652                with_ties=with_ties,
2653            )
2654
2655        return this
2656
2657    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2658        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2659            return this
2660
2661        count = self._parse_number()
2662        self._match_set((TokenType.ROW, TokenType.ROWS))
2663        return self.expression(exp.Offset, this=this, expression=count)
2664
2665    def _parse_locks(self) -> t.List[exp.Expression]:
2666        # Lists are invariant, so we need to use a type hint here
2667        locks: t.List[exp.Expression] = []
2668
2669        while True:
2670            if self._match_text_seq("FOR", "UPDATE"):
2671                update = True
2672            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2673                "LOCK", "IN", "SHARE", "MODE"
2674            ):
2675                update = False
2676            else:
2677                break
2678
2679            expressions = None
2680            if self._match_text_seq("OF"):
2681                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2682
2683            wait: t.Optional[bool | exp.Expression] = None
2684            if self._match_text_seq("NOWAIT"):
2685                wait = True
2686            elif self._match_text_seq("WAIT"):
2687                wait = self._parse_primary()
2688            elif self._match_text_seq("SKIP", "LOCKED"):
2689                wait = False
2690
2691            locks.append(
2692                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2693            )
2694
2695        return locks
2696
2697    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2698        if not self._match_set(self.SET_OPERATIONS):
2699            return this
2700
2701        token_type = self._prev.token_type
2702
2703        if token_type == TokenType.UNION:
2704            expression = exp.Union
2705        elif token_type == TokenType.EXCEPT:
2706            expression = exp.Except
2707        else:
2708            expression = exp.Intersect
2709
2710        return self.expression(
2711            expression,
2712            this=this,
2713            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2714            expression=self._parse_set_operations(self._parse_select(nested=True)),
2715        )
2716
2717    def _parse_expression(self) -> t.Optional[exp.Expression]:
2718        return self._parse_alias(self._parse_conjunction())
2719
2720    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2721        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2722
2723    def _parse_equality(self) -> t.Optional[exp.Expression]:
2724        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2725
2726    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2727        return self._parse_tokens(self._parse_range, self.COMPARISON)
2728
2729    def _parse_range(self) -> t.Optional[exp.Expression]:
2730        this = self._parse_bitwise()
2731        negate = self._match(TokenType.NOT)
2732
2733        if self._match_set(self.RANGE_PARSERS):
2734            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2735            if not expression:
2736                return this
2737
2738            this = expression
2739        elif self._match(TokenType.ISNULL):
2740            this = self.expression(exp.Is, this=this, expression=exp.Null())
2741
2742        # Postgres supports ISNULL and NOTNULL for conditions.
2743        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2744        if self._match(TokenType.NOTNULL):
2745            this = self.expression(exp.Is, this=this, expression=exp.Null())
2746            this = self.expression(exp.Not, this=this)
2747
2748        if negate:
2749            this = self.expression(exp.Not, this=this)
2750
2751        if self._match(TokenType.IS):
2752            this = self._parse_is(this)
2753
2754        return this
2755
2756    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2757        index = self._index - 1
2758        negate = self._match(TokenType.NOT)
2759        if self._match_text_seq("DISTINCT", "FROM"):
2760            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2761            return self.expression(klass, this=this, expression=self._parse_expression())
2762
2763        expression = self._parse_null() or self._parse_boolean()
2764        if not expression:
2765            self._retreat(index)
2766            return None
2767
2768        this = self.expression(exp.Is, this=this, expression=expression)
2769        return self.expression(exp.Not, this=this) if negate else this
2770
2771    def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In:
2772        unnest = self._parse_unnest()
2773        if unnest:
2774            this = self.expression(exp.In, this=this, unnest=unnest)
2775        elif self._match(TokenType.L_PAREN):
2776            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
2777
2778            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2779                this = self.expression(exp.In, this=this, query=expressions[0])
2780            else:
2781                this = self.expression(exp.In, this=this, expressions=expressions)
2782
2783            self._match_r_paren(this)
2784        else:
2785            this = self.expression(exp.In, this=this, field=self._parse_field())
2786
2787        return this
2788
2789    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2790        low = self._parse_bitwise()
2791        self._match(TokenType.AND)
2792        high = self._parse_bitwise()
2793        return self.expression(exp.Between, this=this, low=low, high=high)
2794
2795    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2796        if not self._match(TokenType.ESCAPE):
2797            return this
2798        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2799
2800    def _parse_interval(self) -> t.Optional[exp.Expression]:
2801        if not self._match(TokenType.INTERVAL):
2802            return None
2803
2804        this = self._parse_primary() or self._parse_term()
2805        unit = self._parse_function() or self._parse_var()
2806
2807        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2808        # each INTERVAL expression into this canonical form so it's easy to transpile
2809        if this and this.is_number:
2810            this = exp.Literal.string(this.name)
2811        elif this and this.is_string:
2812            parts = this.name.split()
2813
2814            if len(parts) == 2:
2815                if unit:
2816                    # this is not actually a unit, it's something else
2817                    unit = None
2818                    self._retreat(self._index - 1)
2819                else:
2820                    this = exp.Literal.string(parts[0])
2821                    unit = self.expression(exp.Var, this=parts[1])
2822
2823        return self.expression(exp.Interval, this=this, unit=unit)
2824
2825    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2826        this = self._parse_term()
2827
2828        while True:
2829            if self._match_set(self.BITWISE):
2830                this = self.expression(
2831                    self.BITWISE[self._prev.token_type],
2832                    this=this,
2833                    expression=self._parse_term(),
2834                )
2835            elif self._match_pair(TokenType.LT, TokenType.LT):
2836                this = self.expression(
2837                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2838                )
2839            elif self._match_pair(TokenType.GT, TokenType.GT):
2840                this = self.expression(
2841                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2842                )
2843            else:
2844                break
2845
2846        return this
2847
2848    def _parse_term(self) -> t.Optional[exp.Expression]:
2849        return self._parse_tokens(self._parse_factor, self.TERM)
2850
2851    def _parse_factor(self) -> t.Optional[exp.Expression]:
2852        return self._parse_tokens(self._parse_unary, self.FACTOR)
2853
2854    def _parse_unary(self) -> t.Optional[exp.Expression]:
2855        if self._match_set(self.UNARY_PARSERS):
2856            return self.UNARY_PARSERS[self._prev.token_type](self)
2857        return self._parse_at_time_zone(self._parse_type())
2858
2859    def _parse_type(self) -> t.Optional[exp.Expression]:
2860        interval = self._parse_interval()
2861        if interval:
2862            return interval
2863
2864        index = self._index
2865        data_type = self._parse_types(check_func=True)
2866        this = self._parse_column()
2867
2868        if data_type:
2869            if isinstance(this, exp.Literal):
2870                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2871                if parser:
2872                    return parser(self, this, data_type)
2873                return self.expression(exp.Cast, this=this, to=data_type)
2874            if not data_type.expressions:
2875                self._retreat(index)
2876                return self._parse_column()
2877            return self._parse_column_ops(data_type)
2878
2879        return this
2880
2881    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2882        this = self._parse_type()
2883        if not this:
2884            return None
2885
2886        return self.expression(
2887            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2888        )
2889
2890    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2891        index = self._index
2892
2893        prefix = self._match_text_seq("SYSUDTLIB", ".")
2894
2895        if not self._match_set(self.TYPE_TOKENS):
2896            return None
2897
2898        type_token = self._prev.token_type
2899
2900        if type_token == TokenType.PSEUDO_TYPE:
2901            return self.expression(exp.PseudoType, this=self._prev.text)
2902
2903        nested = type_token in self.NESTED_TYPE_TOKENS
2904        is_struct = type_token == TokenType.STRUCT
2905        expressions = None
2906        maybe_func = False
2907
2908        if self._match(TokenType.L_PAREN):
2909            if is_struct:
2910                expressions = self._parse_csv(self._parse_struct_types)
2911            elif nested:
2912                expressions = self._parse_csv(self._parse_types)
2913            else:
2914                expressions = self._parse_csv(self._parse_type_size)
2915
2916            if not expressions or not self._match(TokenType.R_PAREN):
2917                self._retreat(index)
2918                return None
2919
2920            maybe_func = True
2921
2922        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2923            this = exp.DataType(
2924                this=exp.DataType.Type.ARRAY,
2925                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2926                nested=True,
2927            )
2928
2929            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2930                this = exp.DataType(
2931                    this=exp.DataType.Type.ARRAY,
2932                    expressions=[this],
2933                    nested=True,
2934                )
2935
2936            return this
2937
2938        if self._match(TokenType.L_BRACKET):
2939            self._retreat(index)
2940            return None
2941
2942        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2943        if nested and self._match(TokenType.LT):
2944            if is_struct:
2945                expressions = self._parse_csv(self._parse_struct_types)
2946            else:
2947                expressions = self._parse_csv(self._parse_types)
2948
2949            if not self._match(TokenType.GT):
2950                self.raise_error("Expecting >")
2951
2952            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2953                values = self._parse_csv(self._parse_conjunction)
2954                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2955
2956        value: t.Optional[exp.Expression] = None
2957        if type_token in self.TIMESTAMPS:
2958            if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ:
2959                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2960            elif (
2961                self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE")
2962                or type_token == TokenType.TIMESTAMPLTZ
2963            ):
2964                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2965            elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
2966                if type_token == TokenType.TIME:
2967                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2968                else:
2969                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2970
2971            maybe_func = maybe_func and value is None
2972
2973            if value is None:
2974                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2975        elif type_token == TokenType.INTERVAL:
2976            unit = self._parse_var()
2977
2978            if not unit:
2979                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2980            else:
2981                value = self.expression(exp.Interval, unit=unit)
2982
2983        if maybe_func and check_func:
2984            index2 = self._index
2985            peek = self._parse_string()
2986
2987            if not peek:
2988                self._retreat(index)
2989                return None
2990
2991            self._retreat(index2)
2992
2993        if value:
2994            return value
2995
2996        return exp.DataType(
2997            this=exp.DataType.Type[type_token.value.upper()],
2998            expressions=expressions,
2999            nested=nested,
3000            values=values,
3001            prefix=prefix,
3002        )
3003
3004    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
3005        this = self._parse_type() or self._parse_id_var()
3006        self._match(TokenType.COLON)
3007        return self._parse_column_def(this)
3008
3009    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3010        if not self._match_text_seq("AT", "TIME", "ZONE"):
3011            return this
3012        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
3013
3014    def _parse_column(self) -> t.Optional[exp.Expression]:
3015        this = self._parse_field()
3016        if isinstance(this, exp.Identifier):
3017            this = self.expression(exp.Column, this=this)
3018        elif not this:
3019            return self._parse_bracket(this)
3020        return self._parse_column_ops(this)
3021
3022    def _parse_column_ops(self, this: exp.Expression) -> exp.Expression:
3023        this = self._parse_bracket(this)
3024
3025        while self._match_set(self.COLUMN_OPERATORS):
3026            op_token = self._prev.token_type
3027            op = self.COLUMN_OPERATORS.get(op_token)
3028
3029            if op_token == TokenType.DCOLON:
3030                field = self._parse_types()
3031                if not field:
3032                    self.raise_error("Expected type")
3033            elif op and self._curr:
3034                self._advance()
3035                value = self._prev.text
3036                field = (
3037                    exp.Literal.number(value)
3038                    if self._prev.token_type == TokenType.NUMBER
3039                    else exp.Literal.string(value)
3040                )
3041            else:
3042                field = (
3043                    self._parse_star()
3044                    or self._parse_function(anonymous=True)
3045                    or self._parse_id_var()
3046                )
3047
3048            if isinstance(field, exp.Func):
3049                # bigquery allows function calls like x.y.count(...)
3050                # SAFE.SUBSTR(...)
3051                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3052                this = self._replace_columns_with_dots(this)
3053
3054            if op:
3055                this = op(self, this, field)
3056            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3057                this = self.expression(
3058                    exp.Column,
3059                    this=field,
3060                    table=this.this,
3061                    db=this.args.get("table"),
3062                    catalog=this.args.get("db"),
3063                )
3064            else:
3065                this = self.expression(exp.Dot, this=this, expression=field)
3066            this = self._parse_bracket(this)
3067        return this
3068
3069    def _parse_primary(self) -> t.Optional[exp.Expression]:
3070        if self._match_set(self.PRIMARY_PARSERS):
3071            token_type = self._prev.token_type
3072            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3073
3074            if token_type == TokenType.STRING:
3075                expressions = [primary]
3076                while self._match(TokenType.STRING):
3077                    expressions.append(exp.Literal.string(self._prev.text))
3078                if len(expressions) > 1:
3079                    return self.expression(exp.Concat, expressions=expressions)
3080            return primary
3081
3082        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3083            return exp.Literal.number(f"0.{self._prev.text}")
3084
3085        if self._match(TokenType.L_PAREN):
3086            comments = self._prev_comments
3087            query = self._parse_select()
3088
3089            if query:
3090                expressions = [query]
3091            else:
3092                expressions = self._parse_csv(self._parse_expression)
3093
3094            this = self._parse_query_modifiers(seq_get(expressions, 0))
3095
3096            if isinstance(this, exp.Subqueryable):
3097                this = self._parse_set_operations(
3098                    self._parse_subquery(this=this, parse_alias=False)
3099                )
3100            elif len(expressions) > 1:
3101                this = self.expression(exp.Tuple, expressions=expressions)
3102            else:
3103                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3104
3105            if this:
3106                this.add_comments(comments)
3107            self._match_r_paren(expression=this)
3108
3109            return this
3110
3111        return None
3112
3113    def _parse_field(
3114        self,
3115        any_token: bool = False,
3116        tokens: t.Optional[t.Collection[TokenType]] = None,
3117    ) -> t.Optional[exp.Expression]:
3118        return (
3119            self._parse_primary()
3120            or self._parse_function()
3121            or self._parse_id_var(any_token=any_token, tokens=tokens)
3122        )
3123
3124    def _parse_function(
3125        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3126    ) -> t.Optional[exp.Expression]:
3127        if not self._curr:
3128            return None
3129
3130        token_type = self._curr.token_type
3131
3132        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3133            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3134
3135        if not self._next or self._next.token_type != TokenType.L_PAREN:
3136            if token_type in self.NO_PAREN_FUNCTIONS:
3137                self._advance()
3138                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3139
3140            return None
3141
3142        if token_type not in self.FUNC_TOKENS:
3143            return None
3144
3145        this = self._curr.text
3146        upper = this.upper()
3147        self._advance(2)
3148
3149        parser = self.FUNCTION_PARSERS.get(upper)
3150
3151        if parser and not anonymous:
3152            this = parser(self)
3153        else:
3154            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3155
3156            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3157                this = self.expression(subquery_predicate, this=self._parse_select())
3158                self._match_r_paren()
3159                return this
3160
3161            if functions is None:
3162                functions = self.FUNCTIONS
3163
3164            function = functions.get(upper)
3165
3166            alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS
3167            args = self._parse_csv(lambda: self._parse_lambda(alias=alias))
3168
3169            if function and not anonymous:
3170                this = function(args)
3171                self.validate_expression(this, args)
3172            else:
3173                this = self.expression(exp.Anonymous, this=this, expressions=args)
3174
3175        self._match_r_paren(this)
3176        return self._parse_window(this)
3177
3178    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3179        return self._parse_column_def(self._parse_id_var())
3180
3181    def _parse_user_defined_function(
3182        self, kind: t.Optional[TokenType] = None
3183    ) -> t.Optional[exp.Expression]:
3184        this = self._parse_id_var()
3185
3186        while self._match(TokenType.DOT):
3187            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3188
3189        if not self._match(TokenType.L_PAREN):
3190            return this
3191
3192        expressions = self._parse_csv(self._parse_function_parameter)
3193        self._match_r_paren()
3194        return self.expression(
3195            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3196        )
3197
3198    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3199        literal = self._parse_primary()
3200        if literal:
3201            return self.expression(exp.Introducer, this=token.text, expression=literal)
3202
3203        return self.expression(exp.Identifier, this=token.text)
3204
3205    def _parse_session_parameter(self) -> exp.Expression:
3206        kind = None
3207        this = self._parse_id_var() or self._parse_primary()
3208
3209        if this and self._match(TokenType.DOT):
3210            kind = this.name
3211            this = self._parse_var() or self._parse_primary()
3212
3213        return self.expression(exp.SessionParameter, this=this, kind=kind)
3214
3215    def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]:
3216        index = self._index
3217
3218        if self._match(TokenType.L_PAREN):
3219            expressions = self._parse_csv(self._parse_id_var)
3220
3221            if not self._match(TokenType.R_PAREN):
3222                self._retreat(index)
3223        else:
3224            expressions = [self._parse_id_var()]
3225
3226        if self._match_set(self.LAMBDAS):
3227            return self.LAMBDAS[self._prev.token_type](self, expressions)
3228
3229        self._retreat(index)
3230
3231        this: t.Optional[exp.Expression]
3232
3233        if self._match(TokenType.DISTINCT):
3234            this = self.expression(
3235                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3236            )
3237        else:
3238            this = self._parse_select_or_expression(alias=alias)
3239
3240            if isinstance(this, exp.EQ):
3241                left = this.this
3242                if isinstance(left, exp.Column):
3243                    left.replace(exp.Var(this=left.text("this")))
3244
3245        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3246
3247    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3248        index = self._index
3249
3250        if not self.errors:
3251            try:
3252                if self._parse_select(nested=True):
3253                    return this
3254            except ParseError:
3255                pass
3256            finally:
3257                self.errors.clear()
3258                self._retreat(index)
3259
3260        if not self._match(TokenType.L_PAREN):
3261            return this
3262
3263        args = self._parse_csv(
3264            lambda: self._parse_constraint()
3265            or self._parse_column_def(self._parse_field(any_token=True))
3266        )
3267        self._match_r_paren()
3268        return self.expression(exp.Schema, this=this, expressions=args)
3269
3270    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3271        # column defs are not really columns, they're identifiers
3272        if isinstance(this, exp.Column):
3273            this = this.this
3274        kind = self._parse_types()
3275
3276        if self._match_text_seq("FOR", "ORDINALITY"):
3277            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3278
3279        constraints = []
3280        while True:
3281            constraint = self._parse_column_constraint()
3282            if not constraint:
3283                break
3284            constraints.append(constraint)
3285
3286        if not kind and not constraints:
3287            return this
3288
3289        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3290
3291    def _parse_auto_increment(self) -> exp.Expression:
3292        start = None
3293        increment = None
3294
3295        if self._match(TokenType.L_PAREN, advance=False):
3296            args = self._parse_wrapped_csv(self._parse_bitwise)
3297            start = seq_get(args, 0)
3298            increment = seq_get(args, 1)
3299        elif self._match_text_seq("START"):
3300            start = self._parse_bitwise()
3301            self._match_text_seq("INCREMENT")
3302            increment = self._parse_bitwise()
3303
3304        if start and increment:
3305            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3306
3307        return exp.AutoIncrementColumnConstraint()
3308
3309    def _parse_compress(self) -> exp.Expression:
3310        if self._match(TokenType.L_PAREN, advance=False):
3311            return self.expression(
3312                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3313            )
3314
3315        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3316
3317    def _parse_generated_as_identity(self) -> exp.Expression:
3318        if self._match_text_seq("BY", "DEFAULT"):
3319            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3320            this = self.expression(
3321                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3322            )
3323        else:
3324            self._match_text_seq("ALWAYS")
3325            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3326
3327        self._match(TokenType.ALIAS)
3328        identity = self._match_text_seq("IDENTITY")
3329
3330        if self._match(TokenType.L_PAREN):
3331            if self._match_text_seq("START", "WITH"):
3332                this.set("start", self._parse_bitwise())
3333            if self._match_text_seq("INCREMENT", "BY"):
3334                this.set("increment", self._parse_bitwise())
3335            if self._match_text_seq("MINVALUE"):
3336                this.set("minvalue", self._parse_bitwise())
3337            if self._match_text_seq("MAXVALUE"):
3338                this.set("maxvalue", self._parse_bitwise())
3339
3340            if self._match_text_seq("CYCLE"):
3341                this.set("cycle", True)
3342            elif self._match_text_seq("NO", "CYCLE"):
3343                this.set("cycle", False)
3344
3345            if not identity:
3346                this.set("expression", self._parse_bitwise())
3347
3348            self._match_r_paren()
3349
3350        return this
3351
3352    def _parse_inline(self) -> t.Optional[exp.Expression]:
3353        self._match_text_seq("LENGTH")
3354        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3355
3356    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3357        if self._match_text_seq("NULL"):
3358            return self.expression(exp.NotNullColumnConstraint)
3359        if self._match_text_seq("CASESPECIFIC"):
3360            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3361        return None
3362
3363    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3364        if self._match(TokenType.CONSTRAINT):
3365            this = self._parse_id_var()
3366        else:
3367            this = None
3368
3369        if self._match_texts(self.CONSTRAINT_PARSERS):
3370            return self.expression(
3371                exp.ColumnConstraint,
3372                this=this,
3373                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3374            )
3375
3376        return this
3377
3378    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3379        if not self._match(TokenType.CONSTRAINT):
3380            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3381
3382        this = self._parse_id_var()
3383        expressions = []
3384
3385        while True:
3386            constraint = self._parse_unnamed_constraint() or self._parse_function()
3387            if not constraint:
3388                break
3389            expressions.append(constraint)
3390
3391        return self.expression(exp.Constraint, this=this, expressions=expressions)
3392
3393    def _parse_unnamed_constraint(
3394        self, constraints: t.Optional[t.Collection[str]] = None
3395    ) -> t.Optional[exp.Expression]:
3396        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3397            return None
3398
3399        constraint = self._prev.text.upper()
3400        if constraint not in self.CONSTRAINT_PARSERS:
3401            self.raise_error(f"No parser found for schema constraint {constraint}.")
3402
3403        return self.CONSTRAINT_PARSERS[constraint](self)
3404
3405    def _parse_unique(self) -> exp.Expression:
3406        self._match_text_seq("KEY")
3407        return self.expression(
3408            exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False))
3409        )
3410
3411    def _parse_key_constraint_options(self) -> t.List[str]:
3412        options = []
3413        while True:
3414            if not self._curr:
3415                break
3416
3417            if self._match(TokenType.ON):
3418                action = None
3419                on = self._advance_any() and self._prev.text
3420
3421                if self._match_text_seq("NO", "ACTION"):
3422                    action = "NO ACTION"
3423                elif self._match_text_seq("CASCADE"):
3424                    action = "CASCADE"
3425                elif self._match_pair(TokenType.SET, TokenType.NULL):
3426                    action = "SET NULL"
3427                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3428                    action = "SET DEFAULT"
3429                else:
3430                    self.raise_error("Invalid key constraint")
3431
3432                options.append(f"ON {on} {action}")
3433            elif self._match_text_seq("NOT", "ENFORCED"):
3434                options.append("NOT ENFORCED")
3435            elif self._match_text_seq("DEFERRABLE"):
3436                options.append("DEFERRABLE")
3437            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3438                options.append("INITIALLY DEFERRED")
3439            elif self._match_text_seq("NORELY"):
3440                options.append("NORELY")
3441            elif self._match_text_seq("MATCH", "FULL"):
3442                options.append("MATCH FULL")
3443            else:
3444                break
3445
3446        return options
3447
3448    def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]:
3449        if match and not self._match(TokenType.REFERENCES):
3450            return None
3451
3452        expressions = None
3453        this = self._parse_id_var()
3454
3455        if self._match(TokenType.L_PAREN, advance=False):
3456            expressions = self._parse_wrapped_id_vars()
3457
3458        options = self._parse_key_constraint_options()
3459        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3460
3461    def _parse_foreign_key(self) -> exp.Expression:
3462        expressions = self._parse_wrapped_id_vars()
3463        reference = self._parse_references()
3464        options = {}
3465
3466        while self._match(TokenType.ON):
3467            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3468                self.raise_error("Expected DELETE or UPDATE")
3469
3470            kind = self._prev.text.lower()
3471
3472            if self._match_text_seq("NO", "ACTION"):
3473                action = "NO ACTION"
3474            elif self._match(TokenType.SET):
3475                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3476                action = "SET " + self._prev.text.upper()
3477            else:
3478                self._advance()
3479                action = self._prev.text.upper()
3480
3481            options[kind] = action
3482
3483        return self.expression(
3484            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3485        )
3486
3487    def _parse_primary_key(self) -> exp.Expression:
3488        desc = (
3489            self._match_set((TokenType.ASC, TokenType.DESC))
3490            and self._prev.token_type == TokenType.DESC
3491        )
3492
3493        if not self._match(TokenType.L_PAREN, advance=False):
3494            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3495
3496        expressions = self._parse_wrapped_csv(self._parse_field)
3497        options = self._parse_key_constraint_options()
3498        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3499
3500    @t.overload
3501    def _parse_bracket(self, this: exp.Expression) -> exp.Expression:
3502        ...
3503
3504    @t.overload
3505    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3506        ...
3507
3508    def _parse_bracket(self, this):
3509        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3510            return this
3511
3512        bracket_kind = self._prev.token_type
3513        expressions: t.List[t.Optional[exp.Expression]]
3514
3515        if self._match(TokenType.COLON):
3516            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3517        else:
3518            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3519
3520        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3521        if bracket_kind == TokenType.L_BRACE:
3522            this = self.expression(exp.Struct, expressions=expressions)
3523        elif not this or this.name.upper() == "ARRAY":
3524            this = self.expression(exp.Array, expressions=expressions)
3525        else:
3526            expressions = apply_index_offset(this, expressions, -self.index_offset)
3527            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3528
3529        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3530            self.raise_error("Expected ]")
3531        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3532            self.raise_error("Expected }")
3533
3534        self._add_comments(this)
3535        return self._parse_bracket(this)
3536
3537    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3538        if self._match(TokenType.COLON):
3539            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3540        return this
3541
3542    def _parse_case(self) -> t.Optional[exp.Expression]:
3543        ifs = []
3544        default = None
3545
3546        expression = self._parse_conjunction()
3547
3548        while self._match(TokenType.WHEN):
3549            this = self._parse_conjunction()
3550            self._match(TokenType.THEN)
3551            then = self._parse_conjunction()
3552            ifs.append(self.expression(exp.If, this=this, true=then))
3553
3554        if self._match(TokenType.ELSE):
3555            default = self._parse_conjunction()
3556
3557        if not self._match(TokenType.END):
3558            self.raise_error("Expected END after CASE", self._prev)
3559
3560        return self._parse_window(
3561            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3562        )
3563
3564    def _parse_if(self) -> t.Optional[exp.Expression]:
3565        if self._match(TokenType.L_PAREN):
3566            args = self._parse_csv(self._parse_conjunction)
3567            this = exp.If.from_arg_list(args)
3568            self.validate_expression(this, args)
3569            self._match_r_paren()
3570        else:
3571            index = self._index - 1
3572            condition = self._parse_conjunction()
3573
3574            if not condition:
3575                self._retreat(index)
3576                return None
3577
3578            self._match(TokenType.THEN)
3579            true = self._parse_conjunction()
3580            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3581            self._match(TokenType.END)
3582            this = self.expression(exp.If, this=condition, true=true, false=false)
3583
3584        return self._parse_window(this)
3585
3586    def _parse_extract(self) -> exp.Expression:
3587        this = self._parse_function() or self._parse_var() or self._parse_type()
3588
3589        if self._match(TokenType.FROM):
3590            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3591
3592        if not self._match(TokenType.COMMA):
3593            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3594
3595        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3596
3597    def _parse_cast(self, strict: bool) -> exp.Expression:
3598        this = self._parse_conjunction()
3599
3600        if not self._match(TokenType.ALIAS):
3601            if self._match(TokenType.COMMA):
3602                return self.expression(
3603                    exp.CastToStrType, this=this, expression=self._parse_string()
3604                )
3605            else:
3606                self.raise_error("Expected AS after CAST")
3607
3608        to = self._parse_types()
3609
3610        if not to:
3611            self.raise_error("Expected TYPE after CAST")
3612        elif to.this == exp.DataType.Type.CHAR:
3613            if self._match(TokenType.CHARACTER_SET):
3614                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3615
3616        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3617
3618    def _parse_string_agg(self) -> exp.Expression:
3619        expression: t.Optional[exp.Expression]
3620
3621        if self._match(TokenType.DISTINCT):
3622            args = self._parse_csv(self._parse_conjunction)
3623            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3624        else:
3625            args = self._parse_csv(self._parse_conjunction)
3626            expression = seq_get(args, 0)
3627
3628        index = self._index
3629        if not self._match(TokenType.R_PAREN):
3630            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3631            order = self._parse_order(this=expression)
3632            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3633
3634        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3635        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3636        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3637        if not self._match_text_seq("WITHIN", "GROUP"):
3638            self._retreat(index)
3639            this = exp.GroupConcat.from_arg_list(args)
3640            self.validate_expression(this, args)
3641            return this
3642
3643        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3644        order = self._parse_order(this=expression)
3645        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3646
3647    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3648        to: t.Optional[exp.Expression]
3649        this = self._parse_bitwise()
3650
3651        if self._match(TokenType.USING):
3652            to = self.expression(exp.CharacterSet, this=self._parse_var())
3653        elif self._match(TokenType.COMMA):
3654            to = self._parse_bitwise()
3655        else:
3656            to = None
3657
3658        # Swap the argument order if needed to produce the correct AST
3659        if self.CONVERT_TYPE_FIRST:
3660            this, to = to, this
3661
3662        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3663
3664    def _parse_decode(self) -> t.Optional[exp.Expression]:
3665        """
3666        There are generally two variants of the DECODE function:
3667
3668        - DECODE(bin, charset)
3669        - DECODE(expression, search, result [, search, result] ... [, default])
3670
3671        The second variant will always be parsed into a CASE expression. Note that NULL
3672        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3673        instead of relying on pattern matching.
3674        """
3675        args = self._parse_csv(self._parse_conjunction)
3676
3677        if len(args) < 3:
3678            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3679
3680        expression, *expressions = args
3681        if not expression:
3682            return None
3683
3684        ifs = []
3685        for search, result in zip(expressions[::2], expressions[1::2]):
3686            if not search or not result:
3687                return None
3688
3689            if isinstance(search, exp.Literal):
3690                ifs.append(
3691                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3692                )
3693            elif isinstance(search, exp.Null):
3694                ifs.append(
3695                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3696                )
3697            else:
3698                cond = exp.or_(
3699                    exp.EQ(this=expression.copy(), expression=search),
3700                    exp.and_(
3701                        exp.Is(this=expression.copy(), expression=exp.Null()),
3702                        exp.Is(this=search.copy(), expression=exp.Null()),
3703                        copy=False,
3704                    ),
3705                    copy=False,
3706                )
3707                ifs.append(exp.If(this=cond, true=result))
3708
3709        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3710
3711    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3712        self._match_text_seq("KEY")
3713        key = self._parse_field()
3714        self._match(TokenType.COLON)
3715        self._match_text_seq("VALUE")
3716        value = self._parse_field()
3717        if not key and not value:
3718            return None
3719        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3720
3721    def _parse_json_object(self) -> exp.Expression:
3722        expressions = self._parse_csv(self._parse_json_key_value)
3723
3724        null_handling = None
3725        if self._match_text_seq("NULL", "ON", "NULL"):
3726            null_handling = "NULL ON NULL"
3727        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3728            null_handling = "ABSENT ON NULL"
3729
3730        unique_keys = None
3731        if self._match_text_seq("WITH", "UNIQUE"):
3732            unique_keys = True
3733        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3734            unique_keys = False
3735
3736        self._match_text_seq("KEYS")
3737
3738        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3739        format_json = self._match_text_seq("FORMAT", "JSON")
3740        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3741
3742        return self.expression(
3743            exp.JSONObject,
3744            expressions=expressions,
3745            null_handling=null_handling,
3746            unique_keys=unique_keys,
3747            return_type=return_type,
3748            format_json=format_json,
3749            encoding=encoding,
3750        )
3751
3752    def _parse_logarithm(self) -> exp.Expression:
3753        # Default argument order is base, expression
3754        args = self._parse_csv(self._parse_range)
3755
3756        if len(args) > 1:
3757            if not self.LOG_BASE_FIRST:
3758                args.reverse()
3759            return exp.Log.from_arg_list(args)
3760
3761        return self.expression(
3762            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3763        )
3764
3765    def _parse_match_against(self) -> exp.Expression:
3766        expressions = self._parse_csv(self._parse_column)
3767
3768        self._match_text_seq(")", "AGAINST", "(")
3769
3770        this = self._parse_string()
3771
3772        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3773            modifier = "IN NATURAL LANGUAGE MODE"
3774            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3775                modifier = f"{modifier} WITH QUERY EXPANSION"
3776        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3777            modifier = "IN BOOLEAN MODE"
3778        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3779            modifier = "WITH QUERY EXPANSION"
3780        else:
3781            modifier = None
3782
3783        return self.expression(
3784            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3785        )
3786
3787    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3788    def _parse_open_json(self) -> exp.Expression:
3789        this = self._parse_bitwise()
3790        path = self._match(TokenType.COMMA) and self._parse_string()
3791
3792        def _parse_open_json_column_def() -> exp.Expression:
3793            this = self._parse_field(any_token=True)
3794            kind = self._parse_types()
3795            path = self._parse_string()
3796            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3797            return self.expression(
3798                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3799            )
3800
3801        expressions = None
3802        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3803            self._match_l_paren()
3804            expressions = self._parse_csv(_parse_open_json_column_def)
3805
3806        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3807
3808    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3809        args = self._parse_csv(self._parse_bitwise)
3810
3811        if self._match(TokenType.IN):
3812            return self.expression(
3813                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3814            )
3815
3816        if haystack_first:
3817            haystack = seq_get(args, 0)
3818            needle = seq_get(args, 1)
3819        else:
3820            needle = seq_get(args, 0)
3821            haystack = seq_get(args, 1)
3822
3823        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3824
3825        self.validate_expression(this, args)
3826
3827        return this
3828
3829    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3830        args = self._parse_csv(self._parse_table)
3831        return exp.JoinHint(this=func_name.upper(), expressions=args)
3832
3833    def _parse_substring(self) -> exp.Expression:
3834        # Postgres supports the form: substring(string [from int] [for int])
3835        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3836
3837        args = self._parse_csv(self._parse_bitwise)
3838
3839        if self._match(TokenType.FROM):
3840            args.append(self._parse_bitwise())
3841            if self._match(TokenType.FOR):
3842                args.append(self._parse_bitwise())
3843
3844        this = exp.Substring.from_arg_list(args)
3845        self.validate_expression(this, args)
3846
3847        return this
3848
3849    def _parse_trim(self) -> exp.Expression:
3850        # https://www.w3resource.com/sql/character-functions/trim.php
3851        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3852
3853        position = None
3854        collation = None
3855
3856        if self._match_texts(self.TRIM_TYPES):
3857            position = self._prev.text.upper()
3858
3859        expression = self._parse_bitwise()
3860        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3861            this = self._parse_bitwise()
3862        else:
3863            this = expression
3864            expression = None
3865
3866        if self._match(TokenType.COLLATE):
3867            collation = self._parse_bitwise()
3868
3869        return self.expression(
3870            exp.Trim,
3871            this=this,
3872            position=position,
3873            expression=expression,
3874            collation=collation,
3875        )
3876
3877    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3878        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3879
3880    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3881        return self._parse_window(self._parse_id_var(), alias=True)
3882
3883    def _parse_respect_or_ignore_nulls(
3884        self, this: t.Optional[exp.Expression]
3885    ) -> t.Optional[exp.Expression]:
3886        if self._match_text_seq("IGNORE", "NULLS"):
3887            return self.expression(exp.IgnoreNulls, this=this)
3888        if self._match_text_seq("RESPECT", "NULLS"):
3889            return self.expression(exp.RespectNulls, this=this)
3890        return this
3891
3892    def _parse_window(
3893        self, this: t.Optional[exp.Expression], alias: bool = False
3894    ) -> t.Optional[exp.Expression]:
3895        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3896            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3897            self._match_r_paren()
3898
3899        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3900        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3901        if self._match_text_seq("WITHIN", "GROUP"):
3902            order = self._parse_wrapped(self._parse_order)
3903            this = self.expression(exp.WithinGroup, this=this, expression=order)
3904
3905        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3906        # Some dialects choose to implement and some do not.
3907        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3908
3909        # There is some code above in _parse_lambda that handles
3910        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3911
3912        # The below changes handle
3913        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3914
3915        # Oracle allows both formats
3916        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3917        #   and Snowflake chose to do the same for familiarity
3918        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3919        this = self._parse_respect_or_ignore_nulls(this)
3920
3921        # bigquery select from window x AS (partition by ...)
3922        if alias:
3923            over = None
3924            self._match(TokenType.ALIAS)
3925        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3926            return this
3927        else:
3928            over = self._prev.text.upper()
3929
3930        if not self._match(TokenType.L_PAREN):
3931            return self.expression(
3932                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3933            )
3934
3935        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3936
3937        first = self._match(TokenType.FIRST)
3938        if self._match_text_seq("LAST"):
3939            first = False
3940
3941        partition = self._parse_partition_by()
3942        order = self._parse_order()
3943        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3944
3945        if kind:
3946            self._match(TokenType.BETWEEN)
3947            start = self._parse_window_spec()
3948            self._match(TokenType.AND)
3949            end = self._parse_window_spec()
3950
3951            spec = self.expression(
3952                exp.WindowSpec,
3953                kind=kind,
3954                start=start["value"],
3955                start_side=start["side"],
3956                end=end["value"],
3957                end_side=end["side"],
3958            )
3959        else:
3960            spec = None
3961
3962        self._match_r_paren()
3963
3964        return self.expression(
3965            exp.Window,
3966            this=this,
3967            partition_by=partition,
3968            order=order,
3969            spec=spec,
3970            alias=window_alias,
3971            over=over,
3972            first=first,
3973        )
3974
3975    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3976        self._match(TokenType.BETWEEN)
3977
3978        return {
3979            "value": (
3980                (self._match_text_seq("UNBOUNDED") and "UNBOUNDED")
3981                or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW")
3982                or self._parse_bitwise()
3983            ),
3984            "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text,
3985        }
3986
3987    def _parse_alias(
3988        self, this: t.Optional[exp.Expression], explicit: bool = False
3989    ) -> t.Optional[exp.Expression]:
3990        any_token = self._match(TokenType.ALIAS)
3991
3992        if explicit and not any_token:
3993            return this
3994
3995        if self._match(TokenType.L_PAREN):
3996            aliases = self.expression(
3997                exp.Aliases,
3998                this=this,
3999                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
4000            )
4001            self._match_r_paren(aliases)
4002            return aliases
4003
4004        alias = self._parse_id_var(any_token)
4005
4006        if alias:
4007            return self.expression(exp.Alias, this=this, alias=alias)
4008
4009        return this
4010
4011    def _parse_id_var(
4012        self,
4013        any_token: bool = True,
4014        tokens: t.Optional[t.Collection[TokenType]] = None,
4015        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
4016    ) -> t.Optional[exp.Expression]:
4017        identifier = self._parse_identifier()
4018
4019        if identifier:
4020            return identifier
4021
4022        prefix = ""
4023
4024        if prefix_tokens:
4025            while self._match_set(prefix_tokens):
4026                prefix += self._prev.text
4027
4028        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
4029            quoted = self._prev.token_type == TokenType.STRING
4030            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
4031
4032        return None
4033
4034    def _parse_string(self) -> t.Optional[exp.Expression]:
4035        if self._match(TokenType.STRING):
4036            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
4037        return self._parse_placeholder()
4038
4039    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
4040        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
4041
4042    def _parse_number(self) -> t.Optional[exp.Expression]:
4043        if self._match(TokenType.NUMBER):
4044            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
4045        return self._parse_placeholder()
4046
4047    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4048        if self._match(TokenType.IDENTIFIER):
4049            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4050        return self._parse_placeholder()
4051
4052    def _parse_var(
4053        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4054    ) -> t.Optional[exp.Expression]:
4055        if (
4056            (any_token and self._advance_any())
4057            or self._match(TokenType.VAR)
4058            or (self._match_set(tokens) if tokens else False)
4059        ):
4060            return self.expression(exp.Var, this=self._prev.text)
4061        return self._parse_placeholder()
4062
4063    def _advance_any(self) -> t.Optional[Token]:
4064        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4065            self._advance()
4066            return self._prev
4067        return None
4068
4069    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4070        return self._parse_var() or self._parse_string()
4071
4072    def _parse_null(self) -> t.Optional[exp.Expression]:
4073        if self._match(TokenType.NULL):
4074            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4075        return None
4076
4077    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4078        if self._match(TokenType.TRUE):
4079            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4080        if self._match(TokenType.FALSE):
4081            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4082        return None
4083
4084    def _parse_star(self) -> t.Optional[exp.Expression]:
4085        if self._match(TokenType.STAR):
4086            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4087        return None
4088
4089    def _parse_parameter(self) -> exp.Expression:
4090        wrapped = self._match(TokenType.L_BRACE)
4091        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4092        self._match(TokenType.R_BRACE)
4093        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4094
4095    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4096        if self._match_set(self.PLACEHOLDER_PARSERS):
4097            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4098            if placeholder:
4099                return placeholder
4100            self._advance(-1)
4101        return None
4102
4103    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4104        if not self._match(TokenType.EXCEPT):
4105            return None
4106        if self._match(TokenType.L_PAREN, advance=False):
4107            return self._parse_wrapped_csv(self._parse_column)
4108        return self._parse_csv(self._parse_column)
4109
4110    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4111        if not self._match(TokenType.REPLACE):
4112            return None
4113        if self._match(TokenType.L_PAREN, advance=False):
4114            return self._parse_wrapped_csv(self._parse_expression)
4115        return self._parse_csv(self._parse_expression)
4116
4117    def _parse_csv(
4118        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4119    ) -> t.List[t.Optional[exp.Expression]]:
4120        parse_result = parse_method()
4121        items = [parse_result] if parse_result is not None else []
4122
4123        while self._match(sep):
4124            self._add_comments(parse_result)
4125            parse_result = parse_method()
4126            if parse_result is not None:
4127                items.append(parse_result)
4128
4129        return items
4130
4131    def _parse_tokens(
4132        self, parse_method: t.Callable, expressions: t.Dict
4133    ) -> t.Optional[exp.Expression]:
4134        this = parse_method()
4135
4136        while self._match_set(expressions):
4137            this = self.expression(
4138                expressions[self._prev.token_type],
4139                this=this,
4140                comments=self._prev_comments,
4141                expression=parse_method(),
4142            )
4143
4144        return this
4145
4146    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4147        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4148
4149    def _parse_wrapped_csv(
4150        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4151    ) -> t.List[t.Optional[exp.Expression]]:
4152        return self._parse_wrapped(
4153            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4154        )
4155
4156    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4157        wrapped = self._match(TokenType.L_PAREN)
4158        if not wrapped and not optional:
4159            self.raise_error("Expecting (")
4160        parse_result = parse_method()
4161        if wrapped:
4162            self._match_r_paren()
4163        return parse_result
4164
4165    def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]:
4166        return self._parse_select() or self._parse_set_operations(
4167            self._parse_expression() if alias else self._parse_conjunction()
4168        )
4169
4170    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4171        return self._parse_query_modifiers(
4172            self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False))
4173        )
4174
4175    def _parse_transaction(self) -> exp.Expression:
4176        this = None
4177        if self._match_texts(self.TRANSACTION_KIND):
4178            this = self._prev.text
4179
4180        self._match_texts({"TRANSACTION", "WORK"})
4181
4182        modes = []
4183        while True:
4184            mode = []
4185            while self._match(TokenType.VAR):
4186                mode.append(self._prev.text)
4187
4188            if mode:
4189                modes.append(" ".join(mode))
4190            if not self._match(TokenType.COMMA):
4191                break
4192
4193        return self.expression(exp.Transaction, this=this, modes=modes)
4194
4195    def _parse_commit_or_rollback(self) -> exp.Expression:
4196        chain = None
4197        savepoint = None
4198        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4199
4200        self._match_texts({"TRANSACTION", "WORK"})
4201
4202        if self._match_text_seq("TO"):
4203            self._match_text_seq("SAVEPOINT")
4204            savepoint = self._parse_id_var()
4205
4206        if self._match(TokenType.AND):
4207            chain = not self._match_text_seq("NO")
4208            self._match_text_seq("CHAIN")
4209
4210        if is_rollback:
4211            return self.expression(exp.Rollback, savepoint=savepoint)
4212        return self.expression(exp.Commit, chain=chain)
4213
4214    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4215        if not self._match_text_seq("ADD"):
4216            return None
4217
4218        self._match(TokenType.COLUMN)
4219        exists_column = self._parse_exists(not_=True)
4220        expression = self._parse_column_def(self._parse_field(any_token=True))
4221
4222        if expression:
4223            expression.set("exists", exists_column)
4224
4225            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4226            if self._match_texts(("FIRST", "AFTER")):
4227                position = self._prev.text
4228                column_position = self.expression(
4229                    exp.ColumnPosition, this=self._parse_column(), position=position
4230                )
4231                expression.set("position", column_position)
4232
4233        return expression
4234
4235    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4236        drop = self._match(TokenType.DROP) and self._parse_drop()
4237        if drop and not isinstance(drop, exp.Command):
4238            drop.set("kind", drop.args.get("kind", "COLUMN"))
4239        return drop
4240
4241    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4242    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4243        return self.expression(
4244            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4245        )
4246
4247    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4248        this = None
4249        kind = self._prev.token_type
4250
4251        if kind == TokenType.CONSTRAINT:
4252            this = self._parse_id_var()
4253
4254            if self._match_text_seq("CHECK"):
4255                expression = self._parse_wrapped(self._parse_conjunction)
4256                enforced = self._match_text_seq("ENFORCED")
4257
4258                return self.expression(
4259                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4260                )
4261
4262        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4263            expression = self._parse_foreign_key()
4264        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4265            expression = self._parse_primary_key()
4266        else:
4267            expression = None
4268
4269        return self.expression(exp.AddConstraint, this=this, expression=expression)
4270
4271    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4272        index = self._index - 1
4273
4274        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4275            return self._parse_csv(self._parse_add_constraint)
4276
4277        self._retreat(index)
4278        return self._parse_csv(self._parse_add_column)
4279
4280    def _parse_alter_table_alter(self) -> exp.Expression:
4281        self._match(TokenType.COLUMN)
4282        column = self._parse_field(any_token=True)
4283
4284        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4285            return self.expression(exp.AlterColumn, this=column, drop=True)
4286        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4287            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4288
4289        self._match_text_seq("SET", "DATA")
4290        return self.expression(
4291            exp.AlterColumn,
4292            this=column,
4293            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4294            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4295            using=self._match(TokenType.USING) and self._parse_conjunction(),
4296        )
4297
4298    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4299        index = self._index - 1
4300
4301        partition_exists = self._parse_exists()
4302        if self._match(TokenType.PARTITION, advance=False):
4303            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4304
4305        self._retreat(index)
4306        return self._parse_csv(self._parse_drop_column)
4307
4308    def _parse_alter_table_rename(self) -> exp.Expression:
4309        self._match_text_seq("TO")
4310        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4311
4312    def _parse_alter(self) -> t.Optional[exp.Expression]:
4313        start = self._prev
4314
4315        if not self._match(TokenType.TABLE):
4316            return self._parse_as_command(start)
4317
4318        exists = self._parse_exists()
4319        this = self._parse_table(schema=True)
4320
4321        if self._next:
4322            self._advance()
4323        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4324
4325        if parser:
4326            actions = ensure_list(parser(self))
4327
4328            if not self._curr:
4329                return self.expression(
4330                    exp.AlterTable,
4331                    this=this,
4332                    exists=exists,
4333                    actions=actions,
4334                )
4335        return self._parse_as_command(start)
4336
4337    def _parse_merge(self) -> exp.Expression:
4338        self._match(TokenType.INTO)
4339        target = self._parse_table()
4340
4341        self._match(TokenType.USING)
4342        using = self._parse_table()
4343
4344        self._match(TokenType.ON)
4345        on = self._parse_conjunction()
4346
4347        whens = []
4348        while self._match(TokenType.WHEN):
4349            matched = not self._match(TokenType.NOT)
4350            self._match_text_seq("MATCHED")
4351            source = (
4352                False
4353                if self._match_text_seq("BY", "TARGET")
4354                else self._match_text_seq("BY", "SOURCE")
4355            )
4356            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4357
4358            self._match(TokenType.THEN)
4359
4360            if self._match(TokenType.INSERT):
4361                _this = self._parse_star()
4362                if _this:
4363                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4364                else:
4365                    then = self.expression(
4366                        exp.Insert,
4367                        this=self._parse_value(),
4368                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4369                    )
4370            elif self._match(TokenType.UPDATE):
4371                expressions = self._parse_star()
4372                if expressions:
4373                    then = self.expression(exp.Update, expressions=expressions)
4374                else:
4375                    then = self.expression(
4376                        exp.Update,
4377                        expressions=self._match(TokenType.SET)
4378                        and self._parse_csv(self._parse_equality),
4379                    )
4380            elif self._match(TokenType.DELETE):
4381                then = self.expression(exp.Var, this=self._prev.text)
4382            else:
4383                then = None
4384
4385            whens.append(
4386                self.expression(
4387                    exp.When,
4388                    matched=matched,
4389                    source=source,
4390                    condition=condition,
4391                    then=then,
4392                )
4393            )
4394
4395        return self.expression(
4396            exp.Merge,
4397            this=target,
4398            using=using,
4399            on=on,
4400            expressions=whens,
4401        )
4402
4403    def _parse_show(self) -> t.Optional[exp.Expression]:
4404        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4405        if parser:
4406            return parser(self)
4407        self._advance()
4408        return self.expression(exp.Show, this=self._prev.text.upper())
4409
4410    def _parse_set_item_assignment(
4411        self, kind: t.Optional[str] = None
4412    ) -> t.Optional[exp.Expression]:
4413        index = self._index
4414
4415        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4416            return self._parse_set_transaction(global_=kind == "GLOBAL")
4417
4418        left = self._parse_primary() or self._parse_id_var()
4419
4420        if not self._match_texts(("=", "TO")):
4421            self._retreat(index)
4422            return None
4423
4424        right = self._parse_statement() or self._parse_id_var()
4425        this = self.expression(
4426            exp.EQ,
4427            this=left,
4428            expression=right,
4429        )
4430
4431        return self.expression(
4432            exp.SetItem,
4433            this=this,
4434            kind=kind,
4435        )
4436
4437    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4438        self._match_text_seq("TRANSACTION")
4439        characteristics = self._parse_csv(
4440            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4441        )
4442        return self.expression(
4443            exp.SetItem,
4444            expressions=characteristics,
4445            kind="TRANSACTION",
4446            **{"global": global_},  # type: ignore
4447        )
4448
4449    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4450        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4451        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4452
4453    def _parse_set(self) -> exp.Expression:
4454        index = self._index
4455        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4456
4457        if self._curr:
4458            self._retreat(index)
4459            return self._parse_as_command(self._prev)
4460
4461        return set_
4462
4463    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4464        for option in options:
4465            if self._match_text_seq(*option.split(" ")):
4466                return exp.Var(this=option)
4467        return None
4468
4469    def _parse_as_command(self, start: Token) -> exp.Command:
4470        while self._curr:
4471            self._advance()
4472        text = self._find_sql(start, self._prev)
4473        size = len(start.text)
4474        return exp.Command(this=text[:size], expression=text[size:])
4475
4476    def _find_parser(
4477        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4478    ) -> t.Optional[t.Callable]:
4479        if not self._curr:
4480            return None
4481
4482        index = self._index
4483        this = []
4484        while True:
4485            # The current token might be multiple words
4486            curr = self._curr.text.upper()
4487            key = curr.split(" ")
4488            this.append(curr)
4489            self._advance()
4490            result, trie = in_trie(trie, key)
4491            if result == 0:
4492                break
4493            if result == 2:
4494                subparser = parsers[" ".join(this)]
4495                return subparser
4496        self._retreat(index)
4497        return None
4498
4499    def _match(self, token_type, advance=True, expression=None):
4500        if not self._curr:
4501            return None
4502
4503        if self._curr.token_type == token_type:
4504            if advance:
4505                self._advance()
4506            self._add_comments(expression)
4507            return True
4508
4509        return None
4510
4511    def _match_set(self, types, advance=True):
4512        if not self._curr:
4513            return None
4514
4515        if self._curr.token_type in types:
4516            if advance:
4517                self._advance()
4518            return True
4519
4520        return None
4521
4522    def _match_pair(self, token_type_a, token_type_b, advance=True):
4523        if not self._curr or not self._next:
4524            return None
4525
4526        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4527            if advance:
4528                self._advance(2)
4529            return True
4530
4531        return None
4532
4533    def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4534        if not self._match(TokenType.L_PAREN, expression=expression):
4535            self.raise_error("Expecting (")
4536
4537    def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4538        if not self._match(TokenType.R_PAREN, expression=expression):
4539            self.raise_error("Expecting )")
4540
4541    def _match_texts(self, texts, advance=True):
4542        if self._curr and self._curr.text.upper() in texts:
4543            if advance:
4544                self._advance()
4545            return True
4546        return False
4547
4548    def _match_text_seq(self, *texts, advance=True):
4549        index = self._index
4550        for text in texts:
4551            if self._curr and self._curr.text.upper() == text:
4552                self._advance()
4553            else:
4554                self._retreat(index)
4555                return False
4556
4557        if not advance:
4558            self._retreat(index)
4559
4560        return True
4561
4562    @t.overload
4563    def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression:
4564        ...
4565
4566    @t.overload
4567    def _replace_columns_with_dots(
4568        self, this: t.Optional[exp.Expression]
4569    ) -> t.Optional[exp.Expression]:
4570        ...
4571
4572    def _replace_columns_with_dots(self, this):
4573        if isinstance(this, exp.Dot):
4574            exp.replace_children(this, self._replace_columns_with_dots)
4575        elif isinstance(this, exp.Column):
4576            exp.replace_children(this, self._replace_columns_with_dots)
4577            table = this.args.get("table")
4578            this = (
4579                self.expression(exp.Dot, this=table, expression=this.this)
4580                if table
4581                else self.expression(exp.Var, this=this.name)
4582            )
4583        elif isinstance(this, exp.Identifier):
4584            this = self.expression(exp.Var, this=this.name)
4585
4586        return this
4587
4588    def _replace_lambda(
4589        self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str]
4590    ) -> t.Optional[exp.Expression]:
4591        if not node:
4592            return node
4593
4594        for column in node.find_all(exp.Column):
4595            if column.parts[0].name in lambda_variables:
4596                dot_or_id = column.to_dot() if column.table else column.this
4597                parent = column.parent
4598
4599                while isinstance(parent, exp.Dot):
4600                    if not isinstance(parent.parent, exp.Dot):
4601                        parent.replace(dot_or_id)
4602                        break
4603                    parent = parent.parent
4604                else:
4605                    if column is node:
4606                        node = dot_or_id
4607                    else:
4608                        column.replace(dot_or_id)
4609        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.IMMEDIATE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
786    def __init__(
787        self,
788        error_level: t.Optional[ErrorLevel] = None,
789        error_message_context: int = 100,
790        index_offset: int = 0,
791        unnest_column_only: bool = False,
792        alias_post_tablesample: bool = False,
793        max_errors: int = 3,
794        null_ordering: t.Optional[str] = None,
795    ):
796        self.error_level = error_level or ErrorLevel.IMMEDIATE
797        self.error_message_context = error_message_context
798        self.index_offset = index_offset
799        self.unnest_column_only = unnest_column_only
800        self.alias_post_tablesample = alias_post_tablesample
801        self.max_errors = max_errors
802        self.null_ordering = null_ordering
803        self.reset()
def reset(self):
805    def reset(self):
806        self.sql = ""
807        self.errors = []
808        self._tokens = []
809        self._index = 0
810        self._curr = None
811        self._next = None
812        self._prev = None
813        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
815    def parse(
816        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
817    ) -> t.List[t.Optional[exp.Expression]]:
818        """
819        Parses a list of tokens and returns a list of syntax trees, one tree
820        per parsed SQL statement.
821
822        Args:
823            raw_tokens: the list of tokens.
824            sql: the original SQL string, used to produce helpful debug messages.
825
826        Returns:
827            The list of syntax trees.
828        """
829        return self._parse(
830            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
831        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
833    def parse_into(
834        self,
835        expression_types: exp.IntoType,
836        raw_tokens: t.List[Token],
837        sql: t.Optional[str] = None,
838    ) -> t.List[t.Optional[exp.Expression]]:
839        """
840        Parses a list of tokens into a given Expression type. If a collection of Expression
841        types is given instead, this method will try to parse the token list into each one
842        of them, stopping at the first for which the parsing succeeds.
843
844        Args:
845            expression_types: the expression type(s) to try and parse the token list into.
846            raw_tokens: the list of tokens.
847            sql: the original SQL string, used to produce helpful debug messages.
848
849        Returns:
850            The target Expression.
851        """
852        errors = []
853        for expression_type in ensure_collection(expression_types):
854            parser = self.EXPRESSION_PARSERS.get(expression_type)
855            if not parser:
856                raise TypeError(f"No parser registered for {expression_type}")
857            try:
858                return self._parse(parser, raw_tokens, sql)
859            except ParseError as e:
860                e.errors[0]["into_expression"] = expression_type
861                errors.append(e)
862        raise ParseError(
863            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
864            errors=merge_errors(errors),
865        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
901    def check_errors(self) -> None:
902        """
903        Logs or raises any found errors, depending on the chosen error level setting.
904        """
905        if self.error_level == ErrorLevel.WARN:
906            for error in self.errors:
907                logger.error(str(error))
908        elif self.error_level == ErrorLevel.RAISE and self.errors:
909            raise ParseError(
910                concat_messages(self.errors, self.max_errors),
911                errors=merge_errors(self.errors),
912            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
914    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
915        """
916        Appends an error in the list of recorded errors or raises it, depending on the chosen
917        error level setting.
918        """
919        token = token or self._curr or self._prev or Token.string("")
920        start = token.start
921        end = token.end + 1
922        start_context = self.sql[max(start - self.error_message_context, 0) : start]
923        highlight = self.sql[start:end]
924        end_context = self.sql[end : end + self.error_message_context]
925
926        error = ParseError.new(
927            f"{message}. Line {token.line}, Col: {token.col}.\n"
928            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
929            description=message,
930            line=token.line,
931            col=token.col,
932            start_context=start_context,
933            highlight=highlight,
934            end_context=end_context,
935        )
936
937        if self.error_level == ErrorLevel.IMMEDIATE:
938            raise error
939
940        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[~E], comments: Optional[List[str]] = None, **kwargs) -> ~E:
942    def expression(
943        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
944    ) -> E:
945        """
946        Creates a new, validated Expression.
947
948        Args:
949            exp_class: the expression class to instantiate.
950            comments: an optional list of comments to attach to the expression.
951            kwargs: the arguments to set for the expression along with their respective values.
952
953        Returns:
954            The target expression.
955        """
956        instance = exp_class(**kwargs)
957        instance.add_comments(comments) if comments else self._add_comments(instance)
958        self.validate_expression(instance)
959        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
966    def validate_expression(
967        self, expression: exp.Expression, args: t.Optional[t.List] = None
968    ) -> None:
969        """
970        Validates an already instantiated expression, making sure that all its mandatory arguments
971        are set.
972
973        Args:
974            expression: the expression to validate.
975            args: an optional list of items that was used to instantiate the expression, if it's a Func.
976        """
977        if self.error_level == ErrorLevel.IGNORE:
978            return
979
980        for error_message in expression.error_messages(args):
981            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.