Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get
  10from sqlglot.tokens import Token, Tokenizer, TokenType
  11from sqlglot.trie import in_trie, new_trie
  12
  13if t.TYPE_CHECKING:
  14    from sqlglot._typing import E
  15
  16logger = logging.getLogger("sqlglot")
  17
  18
  19def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap:
  20    if len(args) == 1 and args[0].is_star:
  21        return exp.StarMap(this=args[0])
  22
  23    keys = []
  24    values = []
  25    for i in range(0, len(args), 2):
  26        keys.append(args[i])
  27        values.append(args[i + 1])
  28    return exp.VarMap(
  29        keys=exp.Array(expressions=keys),
  30        values=exp.Array(expressions=values),
  31    )
  32
  33
  34def parse_like(args: t.List) -> exp.Expression:
  35    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  36    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  37
  38
  39def binary_range_parser(
  40    expr_type: t.Type[exp.Expression],
  41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  42    return lambda self, this: self._parse_escape(
  43        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  44    )
  45
  46
  47class _Parser(type):
  48    def __new__(cls, clsname, bases, attrs):
  49        klass = super().__new__(cls, clsname, bases, attrs)
  50        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  51        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  52
  53        return klass
  54
  55
  56class Parser(metaclass=_Parser):
  57    """
  58    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  59    a parsed syntax tree.
  60
  61    Args:
  62        error_level: the desired error level.
  63            Default: ErrorLevel.IMMEDIATE
  64        error_message_context: determines the amount of context to capture from a
  65            query string when displaying the error message (in number of characters).
  66            Default: 50.
  67        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  68            Default: 0
  69        alias_post_tablesample: If the table alias comes after tablesample.
  70            Default: False
  71        max_errors: Maximum number of error messages to include in a raised ParseError.
  72            This is only relevant if error_level is ErrorLevel.RAISE.
  73            Default: 3
  74        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  75            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  76            Default: "nulls_are_small"
  77    """
  78
  79    FUNCTIONS: t.Dict[str, t.Callable] = {
  80        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  81        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  82            this=seq_get(args, 0),
  83            to=exp.DataType(this=exp.DataType.Type.TEXT),
  84        ),
  85        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  86        "IFNULL": exp.Coalesce.from_arg_list,
  87        "LIKE": parse_like,
  88        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  89            this=seq_get(args, 0),
  90            to=exp.DataType(this=exp.DataType.Type.TEXT),
  91        ),
  92        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  93            this=exp.Cast(
  94                this=seq_get(args, 0),
  95                to=exp.DataType(this=exp.DataType.Type.TEXT),
  96            ),
  97            start=exp.Literal.number(1),
  98            length=exp.Literal.number(10),
  99        ),
 100        "VAR_MAP": parse_var_map,
 101    }
 102
 103    NO_PAREN_FUNCTIONS = {
 104        TokenType.CURRENT_DATE: exp.CurrentDate,
 105        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 106        TokenType.CURRENT_TIME: exp.CurrentTime,
 107        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 108        TokenType.CURRENT_USER: exp.CurrentUser,
 109    }
 110
 111    JOIN_HINTS: t.Set[str] = set()
 112
 113    NESTED_TYPE_TOKENS = {
 114        TokenType.ARRAY,
 115        TokenType.MAP,
 116        TokenType.NULLABLE,
 117        TokenType.STRUCT,
 118    }
 119
 120    TYPE_TOKENS = {
 121        TokenType.BIT,
 122        TokenType.BOOLEAN,
 123        TokenType.TINYINT,
 124        TokenType.UTINYINT,
 125        TokenType.SMALLINT,
 126        TokenType.USMALLINT,
 127        TokenType.INT,
 128        TokenType.UINT,
 129        TokenType.BIGINT,
 130        TokenType.UBIGINT,
 131        TokenType.INT128,
 132        TokenType.UINT128,
 133        TokenType.INT256,
 134        TokenType.UINT256,
 135        TokenType.FLOAT,
 136        TokenType.DOUBLE,
 137        TokenType.CHAR,
 138        TokenType.NCHAR,
 139        TokenType.VARCHAR,
 140        TokenType.NVARCHAR,
 141        TokenType.TEXT,
 142        TokenType.MEDIUMTEXT,
 143        TokenType.LONGTEXT,
 144        TokenType.MEDIUMBLOB,
 145        TokenType.LONGBLOB,
 146        TokenType.BINARY,
 147        TokenType.VARBINARY,
 148        TokenType.JSON,
 149        TokenType.JSONB,
 150        TokenType.INTERVAL,
 151        TokenType.TIME,
 152        TokenType.TIMESTAMP,
 153        TokenType.TIMESTAMPTZ,
 154        TokenType.TIMESTAMPLTZ,
 155        TokenType.DATETIME,
 156        TokenType.DATETIME64,
 157        TokenType.DATE,
 158        TokenType.INT4RANGE,
 159        TokenType.INT4MULTIRANGE,
 160        TokenType.INT8RANGE,
 161        TokenType.INT8MULTIRANGE,
 162        TokenType.NUMRANGE,
 163        TokenType.NUMMULTIRANGE,
 164        TokenType.TSRANGE,
 165        TokenType.TSMULTIRANGE,
 166        TokenType.TSTZRANGE,
 167        TokenType.TSTZMULTIRANGE,
 168        TokenType.DATERANGE,
 169        TokenType.DATEMULTIRANGE,
 170        TokenType.DECIMAL,
 171        TokenType.BIGDECIMAL,
 172        TokenType.UUID,
 173        TokenType.GEOGRAPHY,
 174        TokenType.GEOMETRY,
 175        TokenType.HLLSKETCH,
 176        TokenType.HSTORE,
 177        TokenType.PSEUDO_TYPE,
 178        TokenType.SUPER,
 179        TokenType.SERIAL,
 180        TokenType.SMALLSERIAL,
 181        TokenType.BIGSERIAL,
 182        TokenType.XML,
 183        TokenType.UNIQUEIDENTIFIER,
 184        TokenType.MONEY,
 185        TokenType.SMALLMONEY,
 186        TokenType.ROWVERSION,
 187        TokenType.IMAGE,
 188        TokenType.VARIANT,
 189        TokenType.OBJECT,
 190        TokenType.INET,
 191        *NESTED_TYPE_TOKENS,
 192    }
 193
 194    SUBQUERY_PREDICATES = {
 195        TokenType.ANY: exp.Any,
 196        TokenType.ALL: exp.All,
 197        TokenType.EXISTS: exp.Exists,
 198        TokenType.SOME: exp.Any,
 199    }
 200
 201    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 202
 203    DB_CREATABLES = {
 204        TokenType.DATABASE,
 205        TokenType.SCHEMA,
 206        TokenType.TABLE,
 207        TokenType.VIEW,
 208        TokenType.DICTIONARY,
 209    }
 210
 211    CREATABLES = {
 212        TokenType.COLUMN,
 213        TokenType.FUNCTION,
 214        TokenType.INDEX,
 215        TokenType.PROCEDURE,
 216        *DB_CREATABLES,
 217    }
 218
 219    ID_VAR_TOKENS = {
 220        TokenType.VAR,
 221        TokenType.ANTI,
 222        TokenType.APPLY,
 223        TokenType.ASC,
 224        TokenType.AUTO_INCREMENT,
 225        TokenType.BEGIN,
 226        TokenType.CACHE,
 227        TokenType.COLLATE,
 228        TokenType.COMMAND,
 229        TokenType.COMMENT,
 230        TokenType.COMMIT,
 231        TokenType.CONSTRAINT,
 232        TokenType.DEFAULT,
 233        TokenType.DELETE,
 234        TokenType.DESC,
 235        TokenType.DESCRIBE,
 236        TokenType.DICTIONARY,
 237        TokenType.DIV,
 238        TokenType.END,
 239        TokenType.EXECUTE,
 240        TokenType.ESCAPE,
 241        TokenType.FALSE,
 242        TokenType.FIRST,
 243        TokenType.FILTER,
 244        TokenType.FORMAT,
 245        TokenType.FULL,
 246        TokenType.IF,
 247        TokenType.IS,
 248        TokenType.ISNULL,
 249        TokenType.INTERVAL,
 250        TokenType.KEEP,
 251        TokenType.LEFT,
 252        TokenType.LOAD,
 253        TokenType.MERGE,
 254        TokenType.NATURAL,
 255        TokenType.NEXT,
 256        TokenType.OFFSET,
 257        TokenType.ORDINALITY,
 258        TokenType.OVERWRITE,
 259        TokenType.PARTITION,
 260        TokenType.PERCENT,
 261        TokenType.PIVOT,
 262        TokenType.PRAGMA,
 263        TokenType.RANGE,
 264        TokenType.REFERENCES,
 265        TokenType.RIGHT,
 266        TokenType.ROW,
 267        TokenType.ROWS,
 268        TokenType.SEMI,
 269        TokenType.SET,
 270        TokenType.SETTINGS,
 271        TokenType.SHOW,
 272        TokenType.TEMPORARY,
 273        TokenType.TOP,
 274        TokenType.TRUE,
 275        TokenType.UNIQUE,
 276        TokenType.UNPIVOT,
 277        TokenType.VOLATILE,
 278        TokenType.WINDOW,
 279        *CREATABLES,
 280        *SUBQUERY_PREDICATES,
 281        *TYPE_TOKENS,
 282        *NO_PAREN_FUNCTIONS,
 283    }
 284
 285    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 286
 287    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 288        TokenType.APPLY,
 289        TokenType.ASOF,
 290        TokenType.FULL,
 291        TokenType.LEFT,
 292        TokenType.LOCK,
 293        TokenType.NATURAL,
 294        TokenType.OFFSET,
 295        TokenType.RIGHT,
 296        TokenType.WINDOW,
 297    }
 298
 299    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 300
 301    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 302
 303    TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
 304
 305    FUNC_TOKENS = {
 306        TokenType.COMMAND,
 307        TokenType.CURRENT_DATE,
 308        TokenType.CURRENT_DATETIME,
 309        TokenType.CURRENT_TIMESTAMP,
 310        TokenType.CURRENT_TIME,
 311        TokenType.CURRENT_USER,
 312        TokenType.FILTER,
 313        TokenType.FIRST,
 314        TokenType.FORMAT,
 315        TokenType.GLOB,
 316        TokenType.IDENTIFIER,
 317        TokenType.INDEX,
 318        TokenType.ISNULL,
 319        TokenType.ILIKE,
 320        TokenType.LIKE,
 321        TokenType.MERGE,
 322        TokenType.OFFSET,
 323        TokenType.PRIMARY_KEY,
 324        TokenType.RANGE,
 325        TokenType.REPLACE,
 326        TokenType.ROW,
 327        TokenType.UNNEST,
 328        TokenType.VAR,
 329        TokenType.LEFT,
 330        TokenType.RIGHT,
 331        TokenType.DATE,
 332        TokenType.DATETIME,
 333        TokenType.TABLE,
 334        TokenType.TIMESTAMP,
 335        TokenType.TIMESTAMPTZ,
 336        TokenType.WINDOW,
 337        *TYPE_TOKENS,
 338        *SUBQUERY_PREDICATES,
 339    }
 340
 341    CONJUNCTION = {
 342        TokenType.AND: exp.And,
 343        TokenType.OR: exp.Or,
 344    }
 345
 346    EQUALITY = {
 347        TokenType.EQ: exp.EQ,
 348        TokenType.NEQ: exp.NEQ,
 349        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 350    }
 351
 352    COMPARISON = {
 353        TokenType.GT: exp.GT,
 354        TokenType.GTE: exp.GTE,
 355        TokenType.LT: exp.LT,
 356        TokenType.LTE: exp.LTE,
 357    }
 358
 359    BITWISE = {
 360        TokenType.AMP: exp.BitwiseAnd,
 361        TokenType.CARET: exp.BitwiseXor,
 362        TokenType.PIPE: exp.BitwiseOr,
 363        TokenType.DPIPE: exp.DPipe,
 364    }
 365
 366    TERM = {
 367        TokenType.DASH: exp.Sub,
 368        TokenType.PLUS: exp.Add,
 369        TokenType.MOD: exp.Mod,
 370        TokenType.COLLATE: exp.Collate,
 371    }
 372
 373    FACTOR = {
 374        TokenType.DIV: exp.IntDiv,
 375        TokenType.LR_ARROW: exp.Distance,
 376        TokenType.SLASH: exp.Div,
 377        TokenType.STAR: exp.Mul,
 378    }
 379
 380    TIMESTAMPS = {
 381        TokenType.TIME,
 382        TokenType.TIMESTAMP,
 383        TokenType.TIMESTAMPTZ,
 384        TokenType.TIMESTAMPLTZ,
 385    }
 386
 387    SET_OPERATIONS = {
 388        TokenType.UNION,
 389        TokenType.INTERSECT,
 390        TokenType.EXCEPT,
 391    }
 392
 393    JOIN_METHODS = {
 394        TokenType.NATURAL,
 395        TokenType.ASOF,
 396    }
 397
 398    JOIN_SIDES = {
 399        TokenType.LEFT,
 400        TokenType.RIGHT,
 401        TokenType.FULL,
 402    }
 403
 404    JOIN_KINDS = {
 405        TokenType.INNER,
 406        TokenType.OUTER,
 407        TokenType.CROSS,
 408        TokenType.SEMI,
 409        TokenType.ANTI,
 410    }
 411
 412    LAMBDAS = {
 413        TokenType.ARROW: lambda self, expressions: self.expression(
 414            exp.Lambda,
 415            this=self._replace_lambda(
 416                self._parse_conjunction(),
 417                {node.name for node in expressions},
 418            ),
 419            expressions=expressions,
 420        ),
 421        TokenType.FARROW: lambda self, expressions: self.expression(
 422            exp.Kwarg,
 423            this=exp.Var(this=expressions[0].name),
 424            expression=self._parse_conjunction(),
 425        ),
 426    }
 427
 428    COLUMN_OPERATORS = {
 429        TokenType.DOT: None,
 430        TokenType.DCOLON: lambda self, this, to: self.expression(
 431            exp.Cast if self.STRICT_CAST else exp.TryCast,
 432            this=this,
 433            to=to,
 434        ),
 435        TokenType.ARROW: lambda self, this, path: self.expression(
 436            exp.JSONExtract,
 437            this=this,
 438            expression=path,
 439        ),
 440        TokenType.DARROW: lambda self, this, path: self.expression(
 441            exp.JSONExtractScalar,
 442            this=this,
 443            expression=path,
 444        ),
 445        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 446            exp.JSONBExtract,
 447            this=this,
 448            expression=path,
 449        ),
 450        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 451            exp.JSONBExtractScalar,
 452            this=this,
 453            expression=path,
 454        ),
 455        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 456            exp.JSONBContains,
 457            this=this,
 458            expression=key,
 459        ),
 460    }
 461
 462    EXPRESSION_PARSERS = {
 463        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"),
 464        exp.Column: lambda self: self._parse_column(),
 465        exp.Condition: lambda self: self._parse_conjunction(),
 466        exp.DataType: lambda self: self._parse_types(),
 467        exp.Expression: lambda self: self._parse_statement(),
 468        exp.From: lambda self: self._parse_from(),
 469        exp.Group: lambda self: self._parse_group(),
 470        exp.Having: lambda self: self._parse_having(),
 471        exp.Identifier: lambda self: self._parse_id_var(),
 472        exp.Join: lambda self: self._parse_join(),
 473        exp.Lambda: lambda self: self._parse_lambda(),
 474        exp.Lateral: lambda self: self._parse_lateral(),
 475        exp.Limit: lambda self: self._parse_limit(),
 476        exp.Offset: lambda self: self._parse_offset(),
 477        exp.Order: lambda self: self._parse_order(),
 478        exp.Ordered: lambda self: self._parse_ordered(),
 479        exp.Properties: lambda self: self._parse_properties(),
 480        exp.Qualify: lambda self: self._parse_qualify(),
 481        exp.Returning: lambda self: self._parse_returning(),
 482        exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"),
 483        exp.Table: lambda self: self._parse_table_parts(),
 484        exp.TableAlias: lambda self: self._parse_table_alias(),
 485        exp.Where: lambda self: self._parse_where(),
 486        exp.Window: lambda self: self._parse_named_window(),
 487        exp.With: lambda self: self._parse_with(),
 488        "JOIN_TYPE": lambda self: self._parse_join_parts(),
 489    }
 490
 491    STATEMENT_PARSERS = {
 492        TokenType.ALTER: lambda self: self._parse_alter(),
 493        TokenType.BEGIN: lambda self: self._parse_transaction(),
 494        TokenType.CACHE: lambda self: self._parse_cache(),
 495        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 496        TokenType.COMMENT: lambda self: self._parse_comment(),
 497        TokenType.CREATE: lambda self: self._parse_create(),
 498        TokenType.DELETE: lambda self: self._parse_delete(),
 499        TokenType.DESC: lambda self: self._parse_describe(),
 500        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 501        TokenType.DROP: lambda self: self._parse_drop(),
 502        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 503        TokenType.FROM: lambda self: exp.select("*").from_(
 504            t.cast(exp.From, self._parse_from(skip_from_token=True))
 505        ),
 506        TokenType.INSERT: lambda self: self._parse_insert(),
 507        TokenType.LOAD: lambda self: self._parse_load(),
 508        TokenType.MERGE: lambda self: self._parse_merge(),
 509        TokenType.PIVOT: lambda self: self._parse_simplified_pivot(),
 510        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 511        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 512        TokenType.SET: lambda self: self._parse_set(),
 513        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 514        TokenType.UPDATE: lambda self: self._parse_update(),
 515        TokenType.USE: lambda self: self.expression(
 516            exp.Use,
 517            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 518            and exp.Var(this=self._prev.text),
 519            this=self._parse_table(schema=False),
 520        ),
 521    }
 522
 523    UNARY_PARSERS = {
 524        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 525        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 526        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 527        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 528    }
 529
 530    PRIMARY_PARSERS = {
 531        TokenType.STRING: lambda self, token: self.expression(
 532            exp.Literal, this=token.text, is_string=True
 533        ),
 534        TokenType.NUMBER: lambda self, token: self.expression(
 535            exp.Literal, this=token.text, is_string=False
 536        ),
 537        TokenType.STAR: lambda self, _: self.expression(
 538            exp.Star,
 539            **{"except": self._parse_except(), "replace": self._parse_replace()},
 540        ),
 541        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 542        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 543        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 544        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 545        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 546        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 547        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 548        TokenType.NATIONAL_STRING: lambda self, token: self.expression(
 549            exp.National, this=token.text
 550        ),
 551        TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
 552        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 553    }
 554
 555    PLACEHOLDER_PARSERS = {
 556        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 557        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 558        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 559        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 560        else None,
 561    }
 562
 563    RANGE_PARSERS = {
 564        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 565        TokenType.GLOB: binary_range_parser(exp.Glob),
 566        TokenType.ILIKE: binary_range_parser(exp.ILike),
 567        TokenType.IN: lambda self, this: self._parse_in(this),
 568        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 569        TokenType.IS: lambda self, this: self._parse_is(this),
 570        TokenType.LIKE: binary_range_parser(exp.Like),
 571        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 572        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 573        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 574    }
 575
 576    PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
 577        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 578        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 579        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 580        "CHARACTER SET": lambda self: self._parse_character_set(),
 581        "CHECKSUM": lambda self: self._parse_checksum(),
 582        "CLUSTER": lambda self: self._parse_cluster(),
 583        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 584        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 585        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 586        "DEFINER": lambda self: self._parse_definer(),
 587        "DETERMINISTIC": lambda self: self.expression(
 588            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 589        ),
 590        "DISTKEY": lambda self: self._parse_distkey(),
 591        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 592        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 593        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 594        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 595        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 596        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 597        "FREESPACE": lambda self: self._parse_freespace(),
 598        "IMMUTABLE": lambda self: self.expression(
 599            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 600        ),
 601        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 602        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 603        "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"),
 604        "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"),
 605        "LIKE": lambda self: self._parse_create_like(),
 606        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 607        "LOCK": lambda self: self._parse_locking(),
 608        "LOCKING": lambda self: self._parse_locking(),
 609        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 610        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 611        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 612        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 613        "NO": lambda self: self._parse_no_property(),
 614        "ON": lambda self: self._parse_on_property(),
 615        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 616        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 617        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 618        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 619        "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True),
 620        "RANGE": lambda self: self._parse_dict_range(this="RANGE"),
 621        "RETURNS": lambda self: self._parse_returns(),
 622        "ROW": lambda self: self._parse_row(),
 623        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 624        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 625        "SETTINGS": lambda self: self.expression(
 626            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 627        ),
 628        "SORTKEY": lambda self: self._parse_sortkey(),
 629        "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
 630        "STABLE": lambda self: self.expression(
 631            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 632        ),
 633        "STORED": lambda self: self._parse_stored(),
 634        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 635        "TEMP": lambda self: self.expression(exp.TemporaryProperty),
 636        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
 637        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 638        "TTL": lambda self: self._parse_ttl(),
 639        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 640        "VOLATILE": lambda self: self._parse_volatile_property(),
 641        "WITH": lambda self: self._parse_with_property(),
 642    }
 643
 644    CONSTRAINT_PARSERS = {
 645        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 646        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 647        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 648        "CHARACTER SET": lambda self: self.expression(
 649            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 650        ),
 651        "CHECK": lambda self: self.expression(
 652            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 653        ),
 654        "COLLATE": lambda self: self.expression(
 655            exp.CollateColumnConstraint, this=self._parse_var()
 656        ),
 657        "COMMENT": lambda self: self.expression(
 658            exp.CommentColumnConstraint, this=self._parse_string()
 659        ),
 660        "COMPRESS": lambda self: self._parse_compress(),
 661        "DEFAULT": lambda self: self.expression(
 662            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 663        ),
 664        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 665        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 666        "FORMAT": lambda self: self.expression(
 667            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 668        ),
 669        "GENERATED": lambda self: self._parse_generated_as_identity(),
 670        "IDENTITY": lambda self: self._parse_auto_increment(),
 671        "INLINE": lambda self: self._parse_inline(),
 672        "LIKE": lambda self: self._parse_create_like(),
 673        "NOT": lambda self: self._parse_not_constraint(),
 674        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 675        "ON": lambda self: self._match(TokenType.UPDATE)
 676        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 677        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 678        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 679        "REFERENCES": lambda self: self._parse_references(match=False),
 680        "TITLE": lambda self: self.expression(
 681            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 682        ),
 683        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 684        "UNIQUE": lambda self: self._parse_unique(),
 685        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 686    }
 687
 688    ALTER_PARSERS = {
 689        "ADD": lambda self: self._parse_alter_table_add(),
 690        "ALTER": lambda self: self._parse_alter_table_alter(),
 691        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 692        "DROP": lambda self: self._parse_alter_table_drop(),
 693        "RENAME": lambda self: self._parse_alter_table_rename(),
 694    }
 695
 696    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 697
 698    NO_PAREN_FUNCTION_PARSERS = {
 699        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 700        TokenType.CASE: lambda self: self._parse_case(),
 701        TokenType.IF: lambda self: self._parse_if(),
 702        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 703            exp.NextValueFor,
 704            this=self._parse_column(),
 705            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 706        ),
 707    }
 708
 709    FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
 710
 711    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 712        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 713        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 714        "DECODE": lambda self: self._parse_decode(),
 715        "EXTRACT": lambda self: self._parse_extract(),
 716        "JSON_OBJECT": lambda self: self._parse_json_object(),
 717        "LOG": lambda self: self._parse_logarithm(),
 718        "MATCH": lambda self: self._parse_match_against(),
 719        "OPENJSON": lambda self: self._parse_open_json(),
 720        "POSITION": lambda self: self._parse_position(),
 721        "SAFE_CAST": lambda self: self._parse_cast(False),
 722        "STRING_AGG": lambda self: self._parse_string_agg(),
 723        "SUBSTRING": lambda self: self._parse_substring(),
 724        "TRIM": lambda self: self._parse_trim(),
 725        "TRY_CAST": lambda self: self._parse_cast(False),
 726        "TRY_CONVERT": lambda self: self._parse_convert(False),
 727    }
 728
 729    QUERY_MODIFIER_PARSERS = {
 730        "joins": lambda self: list(iter(self._parse_join, None)),
 731        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 732        "match": lambda self: self._parse_match_recognize(),
 733        "where": lambda self: self._parse_where(),
 734        "group": lambda self: self._parse_group(),
 735        "having": lambda self: self._parse_having(),
 736        "qualify": lambda self: self._parse_qualify(),
 737        "windows": lambda self: self._parse_window_clause(),
 738        "order": lambda self: self._parse_order(),
 739        "limit": lambda self: self._parse_limit(),
 740        "offset": lambda self: self._parse_offset(),
 741        "locks": lambda self: self._parse_locks(),
 742        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 743    }
 744
 745    SET_PARSERS = {
 746        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 747        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 748        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 749        "TRANSACTION": lambda self: self._parse_set_transaction(),
 750    }
 751
 752    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 753
 754    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 755
 756    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 757
 758    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 759
 760    TRANSACTION_CHARACTERISTICS = {
 761        "ISOLATION LEVEL REPEATABLE READ",
 762        "ISOLATION LEVEL READ COMMITTED",
 763        "ISOLATION LEVEL READ UNCOMMITTED",
 764        "ISOLATION LEVEL SERIALIZABLE",
 765        "READ WRITE",
 766        "READ ONLY",
 767    }
 768
 769    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 770
 771    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 772
 773    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 774    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 775    WINDOW_SIDES = {"FOLLOWING", "PRECEDING"}
 776
 777    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 778
 779    STRICT_CAST = True
 780
 781    CONVERT_TYPE_FIRST = False
 782
 783    PREFIXED_PIVOT_COLUMNS = False
 784    IDENTIFY_PIVOT_STRINGS = False
 785
 786    LOG_BASE_FIRST = True
 787    LOG_DEFAULTS_TO_LN = False
 788
 789    __slots__ = (
 790        "error_level",
 791        "error_message_context",
 792        "sql",
 793        "errors",
 794        "index_offset",
 795        "unnest_column_only",
 796        "alias_post_tablesample",
 797        "max_errors",
 798        "null_ordering",
 799        "_tokens",
 800        "_index",
 801        "_curr",
 802        "_next",
 803        "_prev",
 804        "_prev_comments",
 805        "_show_trie",
 806        "_set_trie",
 807    )
 808
 809    def __init__(
 810        self,
 811        error_level: t.Optional[ErrorLevel] = None,
 812        error_message_context: int = 100,
 813        index_offset: int = 0,
 814        unnest_column_only: bool = False,
 815        alias_post_tablesample: bool = False,
 816        max_errors: int = 3,
 817        null_ordering: t.Optional[str] = None,
 818    ):
 819        self.error_level = error_level or ErrorLevel.IMMEDIATE
 820        self.error_message_context = error_message_context
 821        self.index_offset = index_offset
 822        self.unnest_column_only = unnest_column_only
 823        self.alias_post_tablesample = alias_post_tablesample
 824        self.max_errors = max_errors
 825        self.null_ordering = null_ordering
 826        self.reset()
 827
 828    def reset(self):
 829        self.sql = ""
 830        self.errors = []
 831        self._tokens = []
 832        self._index = 0
 833        self._curr = None
 834        self._next = None
 835        self._prev = None
 836        self._prev_comments = None
 837
 838    def parse(
 839        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 840    ) -> t.List[t.Optional[exp.Expression]]:
 841        """
 842        Parses a list of tokens and returns a list of syntax trees, one tree
 843        per parsed SQL statement.
 844
 845        Args:
 846            raw_tokens: the list of tokens.
 847            sql: the original SQL string, used to produce helpful debug messages.
 848
 849        Returns:
 850            The list of syntax trees.
 851        """
 852        return self._parse(
 853            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 854        )
 855
 856    def parse_into(
 857        self,
 858        expression_types: exp.IntoType,
 859        raw_tokens: t.List[Token],
 860        sql: t.Optional[str] = None,
 861    ) -> t.List[t.Optional[exp.Expression]]:
 862        """
 863        Parses a list of tokens into a given Expression type. If a collection of Expression
 864        types is given instead, this method will try to parse the token list into each one
 865        of them, stopping at the first for which the parsing succeeds.
 866
 867        Args:
 868            expression_types: the expression type(s) to try and parse the token list into.
 869            raw_tokens: the list of tokens.
 870            sql: the original SQL string, used to produce helpful debug messages.
 871
 872        Returns:
 873            The target Expression.
 874        """
 875        errors = []
 876        for expression_type in ensure_collection(expression_types):
 877            parser = self.EXPRESSION_PARSERS.get(expression_type)
 878            if not parser:
 879                raise TypeError(f"No parser registered for {expression_type}")
 880            try:
 881                return self._parse(parser, raw_tokens, sql)
 882            except ParseError as e:
 883                e.errors[0]["into_expression"] = expression_type
 884                errors.append(e)
 885        raise ParseError(
 886            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
 887            errors=merge_errors(errors),
 888        ) from errors[-1]
 889
 890    def _parse(
 891        self,
 892        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 893        raw_tokens: t.List[Token],
 894        sql: t.Optional[str] = None,
 895    ) -> t.List[t.Optional[exp.Expression]]:
 896        self.reset()
 897        self.sql = sql or ""
 898        total = len(raw_tokens)
 899        chunks: t.List[t.List[Token]] = [[]]
 900
 901        for i, token in enumerate(raw_tokens):
 902            if token.token_type == TokenType.SEMICOLON:
 903                if i < total - 1:
 904                    chunks.append([])
 905            else:
 906                chunks[-1].append(token)
 907
 908        expressions = []
 909
 910        for tokens in chunks:
 911            self._index = -1
 912            self._tokens = tokens
 913            self._advance()
 914
 915            expressions.append(parse_method(self))
 916
 917            if self._index < len(self._tokens):
 918                self.raise_error("Invalid expression / Unexpected token")
 919
 920            self.check_errors()
 921
 922        return expressions
 923
 924    def check_errors(self) -> None:
 925        """
 926        Logs or raises any found errors, depending on the chosen error level setting.
 927        """
 928        if self.error_level == ErrorLevel.WARN:
 929            for error in self.errors:
 930                logger.error(str(error))
 931        elif self.error_level == ErrorLevel.RAISE and self.errors:
 932            raise ParseError(
 933                concat_messages(self.errors, self.max_errors),
 934                errors=merge_errors(self.errors),
 935            )
 936
 937    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 938        """
 939        Appends an error in the list of recorded errors or raises it, depending on the chosen
 940        error level setting.
 941        """
 942        token = token or self._curr or self._prev or Token.string("")
 943        start = token.start
 944        end = token.end + 1
 945        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 946        highlight = self.sql[start:end]
 947        end_context = self.sql[end : end + self.error_message_context]
 948
 949        error = ParseError.new(
 950            f"{message}. Line {token.line}, Col: {token.col}.\n"
 951            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 952            description=message,
 953            line=token.line,
 954            col=token.col,
 955            start_context=start_context,
 956            highlight=highlight,
 957            end_context=end_context,
 958        )
 959
 960        if self.error_level == ErrorLevel.IMMEDIATE:
 961            raise error
 962
 963        self.errors.append(error)
 964
 965    def expression(
 966        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 967    ) -> E:
 968        """
 969        Creates a new, validated Expression.
 970
 971        Args:
 972            exp_class: the expression class to instantiate.
 973            comments: an optional list of comments to attach to the expression.
 974            kwargs: the arguments to set for the expression along with their respective values.
 975
 976        Returns:
 977            The target expression.
 978        """
 979        instance = exp_class(**kwargs)
 980        instance.add_comments(comments) if comments else self._add_comments(instance)
 981        self.validate_expression(instance)
 982        return instance
 983
 984    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 985        if expression and self._prev_comments:
 986            expression.add_comments(self._prev_comments)
 987            self._prev_comments = None
 988
 989    def validate_expression(
 990        self, expression: exp.Expression, args: t.Optional[t.List] = None
 991    ) -> None:
 992        """
 993        Validates an already instantiated expression, making sure that all its mandatory arguments
 994        are set.
 995
 996        Args:
 997            expression: the expression to validate.
 998            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 999        """
1000        if self.error_level == ErrorLevel.IGNORE:
1001            return
1002
1003        for error_message in expression.error_messages(args):
1004            self.raise_error(error_message)
1005
1006    def _find_sql(self, start: Token, end: Token) -> str:
1007        return self.sql[start.start : end.end + 1]
1008
1009    def _advance(self, times: int = 1) -> None:
1010        self._index += times
1011        self._curr = seq_get(self._tokens, self._index)
1012        self._next = seq_get(self._tokens, self._index + 1)
1013        if self._index > 0:
1014            self._prev = self._tokens[self._index - 1]
1015            self._prev_comments = self._prev.comments
1016        else:
1017            self._prev = None
1018            self._prev_comments = None
1019
1020    def _retreat(self, index: int) -> None:
1021        if index != self._index:
1022            self._advance(index - self._index)
1023
1024    def _parse_command(self) -> exp.Command:
1025        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1026
1027    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1028        start = self._prev
1029        exists = self._parse_exists() if allow_exists else None
1030
1031        self._match(TokenType.ON)
1032
1033        kind = self._match_set(self.CREATABLES) and self._prev
1034
1035        if not kind:
1036            return self._parse_as_command(start)
1037
1038        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1039            this = self._parse_user_defined_function(kind=kind.token_type)
1040        elif kind.token_type == TokenType.TABLE:
1041            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1042        elif kind.token_type == TokenType.COLUMN:
1043            this = self._parse_column()
1044        else:
1045            this = self._parse_id_var()
1046
1047        self._match(TokenType.IS)
1048
1049        return self.expression(
1050            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1051        )
1052
1053    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1054    def _parse_ttl(self) -> exp.Expression:
1055        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1056            this = self._parse_bitwise()
1057
1058            if self._match_text_seq("DELETE"):
1059                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1060            if self._match_text_seq("RECOMPRESS"):
1061                return self.expression(
1062                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1063                )
1064            if self._match_text_seq("TO", "DISK"):
1065                return self.expression(
1066                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1067                )
1068            if self._match_text_seq("TO", "VOLUME"):
1069                return self.expression(
1070                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1071                )
1072
1073            return this
1074
1075        expressions = self._parse_csv(_parse_ttl_action)
1076        where = self._parse_where()
1077        group = self._parse_group()
1078
1079        aggregates = None
1080        if group and self._match(TokenType.SET):
1081            aggregates = self._parse_csv(self._parse_set_item)
1082
1083        return self.expression(
1084            exp.MergeTreeTTL,
1085            expressions=expressions,
1086            where=where,
1087            group=group,
1088            aggregates=aggregates,
1089        )
1090
1091    def _parse_statement(self) -> t.Optional[exp.Expression]:
1092        if self._curr is None:
1093            return None
1094
1095        if self._match_set(self.STATEMENT_PARSERS):
1096            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1097
1098        if self._match_set(Tokenizer.COMMANDS):
1099            return self._parse_command()
1100
1101        expression = self._parse_expression()
1102        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1103        return self._parse_query_modifiers(expression)
1104
1105    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1106        start = self._prev
1107        temporary = self._match(TokenType.TEMPORARY)
1108        materialized = self._match_text_seq("MATERIALIZED")
1109        kind = self._match_set(self.CREATABLES) and self._prev.text
1110        if not kind:
1111            return self._parse_as_command(start)
1112
1113        return self.expression(
1114            exp.Drop,
1115            exists=self._parse_exists(),
1116            this=self._parse_table(schema=True),
1117            kind=kind,
1118            temporary=temporary,
1119            materialized=materialized,
1120            cascade=self._match_text_seq("CASCADE"),
1121            constraints=self._match_text_seq("CONSTRAINTS"),
1122            purge=self._match_text_seq("PURGE"),
1123        )
1124
1125    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1126        return (
1127            self._match(TokenType.IF)
1128            and (not not_ or self._match(TokenType.NOT))
1129            and self._match(TokenType.EXISTS)
1130        )
1131
1132    def _parse_create(self) -> t.Optional[exp.Expression]:
1133        start = self._prev
1134        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1135            TokenType.OR, TokenType.REPLACE
1136        )
1137        unique = self._match(TokenType.UNIQUE)
1138
1139        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1140            self._match(TokenType.TABLE)
1141
1142        properties = None
1143        create_token = self._match_set(self.CREATABLES) and self._prev
1144
1145        if not create_token:
1146            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1147            create_token = self._match_set(self.CREATABLES) and self._prev
1148
1149            if not properties or not create_token:
1150                return self._parse_as_command(start)
1151
1152        exists = self._parse_exists(not_=True)
1153        this = None
1154        expression = None
1155        indexes = None
1156        no_schema_binding = None
1157        begin = None
1158        clone = None
1159
1160        def extend_props(temp_props: t.Optional[exp.Expression]) -> None:
1161            nonlocal properties
1162            if properties and temp_props:
1163                properties.expressions.extend(temp_props.expressions)
1164            elif temp_props:
1165                properties = temp_props
1166
1167        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1168            this = self._parse_user_defined_function(kind=create_token.token_type)
1169            extend_props(self._parse_properties())
1170
1171            self._match(TokenType.ALIAS)
1172            begin = self._match(TokenType.BEGIN)
1173            return_ = self._match_text_seq("RETURN")
1174            expression = self._parse_statement()
1175
1176            if return_:
1177                expression = self.expression(exp.Return, this=expression)
1178        elif create_token.token_type == TokenType.INDEX:
1179            this = self._parse_index(index=self._parse_id_var())
1180        elif create_token.token_type in self.DB_CREATABLES:
1181            table_parts = self._parse_table_parts(schema=True)
1182
1183            # exp.Properties.Location.POST_NAME
1184            self._match(TokenType.COMMA)
1185            extend_props(self._parse_properties(before=True))
1186
1187            this = self._parse_schema(this=table_parts)
1188
1189            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1190            extend_props(self._parse_properties())
1191
1192            self._match(TokenType.ALIAS)
1193
1194            # exp.Properties.Location.POST_ALIAS
1195            if not (
1196                self._match(TokenType.SELECT, advance=False)
1197                or self._match(TokenType.WITH, advance=False)
1198                or self._match(TokenType.L_PAREN, advance=False)
1199            ):
1200                extend_props(self._parse_properties())
1201
1202            expression = self._parse_ddl_select()
1203
1204            if create_token.token_type == TokenType.TABLE:
1205                indexes = []
1206                while True:
1207                    index = self._parse_index()
1208
1209                    # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX
1210                    extend_props(self._parse_properties())
1211
1212                    if not index:
1213                        break
1214                    else:
1215                        self._match(TokenType.COMMA)
1216                        indexes.append(index)
1217            elif create_token.token_type == TokenType.VIEW:
1218                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1219                    no_schema_binding = True
1220
1221            if self._match_text_seq("CLONE"):
1222                clone = self._parse_table(schema=True)
1223                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1224                clone_kind = (
1225                    self._match(TokenType.L_PAREN)
1226                    and self._match_texts(self.CLONE_KINDS)
1227                    and self._prev.text.upper()
1228                )
1229                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1230                self._match(TokenType.R_PAREN)
1231                clone = self.expression(
1232                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1233                )
1234
1235        return self.expression(
1236            exp.Create,
1237            this=this,
1238            kind=create_token.text,
1239            replace=replace,
1240            unique=unique,
1241            expression=expression,
1242            exists=exists,
1243            properties=properties,
1244            indexes=indexes,
1245            no_schema_binding=no_schema_binding,
1246            begin=begin,
1247            clone=clone,
1248        )
1249
1250    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1251        # only used for teradata currently
1252        self._match(TokenType.COMMA)
1253
1254        kwargs = {
1255            "no": self._match_text_seq("NO"),
1256            "dual": self._match_text_seq("DUAL"),
1257            "before": self._match_text_seq("BEFORE"),
1258            "default": self._match_text_seq("DEFAULT"),
1259            "local": (self._match_text_seq("LOCAL") and "LOCAL")
1260            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1261            "after": self._match_text_seq("AFTER"),
1262            "minimum": self._match_texts(("MIN", "MINIMUM")),
1263            "maximum": self._match_texts(("MAX", "MAXIMUM")),
1264        }
1265
1266        if self._match_texts(self.PROPERTY_PARSERS):
1267            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1268            try:
1269                return parser(self, **{k: v for k, v in kwargs.items() if v})
1270            except TypeError:
1271                self.raise_error(f"Cannot parse property '{self._prev.text}'")
1272
1273        return None
1274
1275    def _parse_property(self) -> t.Optional[exp.Expression]:
1276        if self._match_texts(self.PROPERTY_PARSERS):
1277            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1278
1279        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1280            return self._parse_character_set(default=True)
1281
1282        if self._match_text_seq("COMPOUND", "SORTKEY"):
1283            return self._parse_sortkey(compound=True)
1284
1285        if self._match_text_seq("SQL", "SECURITY"):
1286            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1287
1288        assignment = self._match_pair(
1289            TokenType.VAR, TokenType.EQ, advance=False
1290        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1291
1292        if assignment:
1293            key = self._parse_var_or_string()
1294            self._match(TokenType.EQ)
1295            return self.expression(exp.Property, this=key, value=self._parse_column())
1296
1297        return None
1298
1299    def _parse_stored(self) -> exp.Expression:
1300        self._match(TokenType.ALIAS)
1301
1302        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1303        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1304
1305        return self.expression(
1306            exp.FileFormatProperty,
1307            this=self.expression(
1308                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1309            )
1310            if input_format or output_format
1311            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1312        )
1313
1314    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1315        self._match(TokenType.EQ)
1316        self._match(TokenType.ALIAS)
1317        return self.expression(exp_class, this=self._parse_field())
1318
1319    def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]:
1320        properties = []
1321
1322        while True:
1323            if before:
1324                prop = self._parse_property_before()
1325            else:
1326                prop = self._parse_property()
1327
1328            if not prop:
1329                break
1330            for p in ensure_list(prop):
1331                properties.append(p)
1332
1333        if properties:
1334            return self.expression(exp.Properties, expressions=properties)
1335
1336        return None
1337
1338    def _parse_fallback(self, no: bool = False) -> exp.Expression:
1339        return self.expression(
1340            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1341        )
1342
1343    def _parse_volatile_property(self) -> exp.Expression:
1344        if self._index >= 2:
1345            pre_volatile_token = self._tokens[self._index - 2]
1346        else:
1347            pre_volatile_token = None
1348
1349        if pre_volatile_token and pre_volatile_token.token_type in (
1350            TokenType.CREATE,
1351            TokenType.REPLACE,
1352            TokenType.UNIQUE,
1353        ):
1354            return exp.VolatileProperty()
1355
1356        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1357
1358    def _parse_with_property(
1359        self,
1360    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1361        self._match(TokenType.WITH)
1362        if self._match(TokenType.L_PAREN, advance=False):
1363            return self._parse_wrapped_csv(self._parse_property)
1364
1365        if self._match_text_seq("JOURNAL"):
1366            return self._parse_withjournaltable()
1367
1368        if self._match_text_seq("DATA"):
1369            return self._parse_withdata(no=False)
1370        elif self._match_text_seq("NO", "DATA"):
1371            return self._parse_withdata(no=True)
1372
1373        if not self._next:
1374            return None
1375
1376        return self._parse_withisolatedloading()
1377
1378    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1379    def _parse_definer(self) -> t.Optional[exp.Expression]:
1380        self._match(TokenType.EQ)
1381
1382        user = self._parse_id_var()
1383        self._match(TokenType.PARAMETER)
1384        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1385
1386        if not user or not host:
1387            return None
1388
1389        return exp.DefinerProperty(this=f"{user}@{host}")
1390
1391    def _parse_withjournaltable(self) -> exp.Expression:
1392        self._match(TokenType.TABLE)
1393        self._match(TokenType.EQ)
1394        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1395
1396    def _parse_log(self, no: bool = False) -> exp.Expression:
1397        return self.expression(exp.LogProperty, no=no)
1398
1399    def _parse_journal(self, **kwargs) -> exp.Expression:
1400        return self.expression(exp.JournalProperty, **kwargs)
1401
1402    def _parse_checksum(self) -> exp.Expression:
1403        self._match(TokenType.EQ)
1404
1405        on = None
1406        if self._match(TokenType.ON):
1407            on = True
1408        elif self._match_text_seq("OFF"):
1409            on = False
1410        default = self._match(TokenType.DEFAULT)
1411
1412        return self.expression(
1413            exp.ChecksumProperty,
1414            on=on,
1415            default=default,
1416        )
1417
1418    def _parse_cluster(self) -> t.Optional[exp.Expression]:
1419        if not self._match_text_seq("BY"):
1420            self._retreat(self._index - 1)
1421            return None
1422        return self.expression(
1423            exp.Cluster,
1424            expressions=self._parse_csv(self._parse_ordered),
1425        )
1426
1427    def _parse_freespace(self) -> exp.Expression:
1428        self._match(TokenType.EQ)
1429        return self.expression(
1430            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1431        )
1432
1433    def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression:
1434        if self._match(TokenType.EQ):
1435            return self.expression(
1436                exp.MergeBlockRatioProperty,
1437                this=self._parse_number(),
1438                percent=self._match(TokenType.PERCENT),
1439            )
1440        return self.expression(
1441            exp.MergeBlockRatioProperty,
1442            no=no,
1443            default=default,
1444        )
1445
1446    def _parse_datablocksize(
1447        self,
1448        default: t.Optional[bool] = None,
1449        minimum: t.Optional[bool] = None,
1450        maximum: t.Optional[bool] = None,
1451    ) -> exp.Expression:
1452        self._match(TokenType.EQ)
1453        size = self._parse_number()
1454        units = None
1455        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1456            units = self._prev.text
1457        return self.expression(
1458            exp.DataBlocksizeProperty,
1459            size=size,
1460            units=units,
1461            default=default,
1462            minimum=minimum,
1463            maximum=maximum,
1464        )
1465
1466    def _parse_blockcompression(self) -> exp.Expression:
1467        self._match(TokenType.EQ)
1468        always = self._match_text_seq("ALWAYS")
1469        manual = self._match_text_seq("MANUAL")
1470        never = self._match_text_seq("NEVER")
1471        default = self._match_text_seq("DEFAULT")
1472        autotemp = None
1473        if self._match_text_seq("AUTOTEMP"):
1474            autotemp = self._parse_schema()
1475
1476        return self.expression(
1477            exp.BlockCompressionProperty,
1478            always=always,
1479            manual=manual,
1480            never=never,
1481            default=default,
1482            autotemp=autotemp,
1483        )
1484
1485    def _parse_withisolatedloading(self) -> exp.Expression:
1486        no = self._match_text_seq("NO")
1487        concurrent = self._match_text_seq("CONCURRENT")
1488        self._match_text_seq("ISOLATED", "LOADING")
1489        for_all = self._match_text_seq("FOR", "ALL")
1490        for_insert = self._match_text_seq("FOR", "INSERT")
1491        for_none = self._match_text_seq("FOR", "NONE")
1492        return self.expression(
1493            exp.IsolatedLoadingProperty,
1494            no=no,
1495            concurrent=concurrent,
1496            for_all=for_all,
1497            for_insert=for_insert,
1498            for_none=for_none,
1499        )
1500
1501    def _parse_locking(self) -> exp.Expression:
1502        if self._match(TokenType.TABLE):
1503            kind = "TABLE"
1504        elif self._match(TokenType.VIEW):
1505            kind = "VIEW"
1506        elif self._match(TokenType.ROW):
1507            kind = "ROW"
1508        elif self._match_text_seq("DATABASE"):
1509            kind = "DATABASE"
1510        else:
1511            kind = None
1512
1513        if kind in ("DATABASE", "TABLE", "VIEW"):
1514            this = self._parse_table_parts()
1515        else:
1516            this = None
1517
1518        if self._match(TokenType.FOR):
1519            for_or_in = "FOR"
1520        elif self._match(TokenType.IN):
1521            for_or_in = "IN"
1522        else:
1523            for_or_in = None
1524
1525        if self._match_text_seq("ACCESS"):
1526            lock_type = "ACCESS"
1527        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1528            lock_type = "EXCLUSIVE"
1529        elif self._match_text_seq("SHARE"):
1530            lock_type = "SHARE"
1531        elif self._match_text_seq("READ"):
1532            lock_type = "READ"
1533        elif self._match_text_seq("WRITE"):
1534            lock_type = "WRITE"
1535        elif self._match_text_seq("CHECKSUM"):
1536            lock_type = "CHECKSUM"
1537        else:
1538            lock_type = None
1539
1540        override = self._match_text_seq("OVERRIDE")
1541
1542        return self.expression(
1543            exp.LockingProperty,
1544            this=this,
1545            kind=kind,
1546            for_or_in=for_or_in,
1547            lock_type=lock_type,
1548            override=override,
1549        )
1550
1551    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1552        if self._match(TokenType.PARTITION_BY):
1553            return self._parse_csv(self._parse_conjunction)
1554        return []
1555
1556    def _parse_partitioned_by(self) -> exp.Expression:
1557        self._match(TokenType.EQ)
1558        return self.expression(
1559            exp.PartitionedByProperty,
1560            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1561        )
1562
1563    def _parse_withdata(self, no: bool = False) -> exp.Expression:
1564        if self._match_text_seq("AND", "STATISTICS"):
1565            statistics = True
1566        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1567            statistics = False
1568        else:
1569            statistics = None
1570
1571        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1572
1573    def _parse_no_property(self) -> t.Optional[exp.Property]:
1574        if self._match_text_seq("PRIMARY", "INDEX"):
1575            return exp.NoPrimaryIndexProperty()
1576        return None
1577
1578    def _parse_on_property(self) -> t.Optional[exp.Property]:
1579        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1580            return exp.OnCommitProperty()
1581        elif self._match_text_seq("COMMIT", "DELETE", "ROWS"):
1582            return exp.OnCommitProperty(delete=True)
1583        return None
1584
1585    def _parse_distkey(self) -> exp.Expression:
1586        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1587
1588    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1589        table = self._parse_table(schema=True)
1590        options = []
1591        while self._match_texts(("INCLUDING", "EXCLUDING")):
1592            this = self._prev.text.upper()
1593            id_var = self._parse_id_var()
1594
1595            if not id_var:
1596                return None
1597
1598            options.append(
1599                self.expression(
1600                    exp.Property,
1601                    this=this,
1602                    value=exp.Var(this=id_var.this.upper()),
1603                )
1604            )
1605        return self.expression(exp.LikeProperty, this=table, expressions=options)
1606
1607    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1608        return self.expression(
1609            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1610        )
1611
1612    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1613        self._match(TokenType.EQ)
1614        return self.expression(
1615            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1616        )
1617
1618    def _parse_returns(self) -> exp.Expression:
1619        value: t.Optional[exp.Expression]
1620        is_table = self._match(TokenType.TABLE)
1621
1622        if is_table:
1623            if self._match(TokenType.LT):
1624                value = self.expression(
1625                    exp.Schema,
1626                    this="TABLE",
1627                    expressions=self._parse_csv(self._parse_struct_types),
1628                )
1629                if not self._match(TokenType.GT):
1630                    self.raise_error("Expecting >")
1631            else:
1632                value = self._parse_schema(exp.Var(this="TABLE"))
1633        else:
1634            value = self._parse_types()
1635
1636        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1637
1638    def _parse_describe(self) -> exp.Expression:
1639        kind = self._match_set(self.CREATABLES) and self._prev.text
1640        this = self._parse_table()
1641
1642        return self.expression(exp.Describe, this=this, kind=kind)
1643
1644    def _parse_insert(self) -> exp.Expression:
1645        overwrite = self._match(TokenType.OVERWRITE)
1646        local = self._match_text_seq("LOCAL")
1647        alternative = None
1648
1649        if self._match_text_seq("DIRECTORY"):
1650            this: t.Optional[exp.Expression] = self.expression(
1651                exp.Directory,
1652                this=self._parse_var_or_string(),
1653                local=local,
1654                row_format=self._parse_row_format(match_row=True),
1655            )
1656        else:
1657            if self._match(TokenType.OR):
1658                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1659
1660            self._match(TokenType.INTO)
1661            self._match(TokenType.TABLE)
1662            this = self._parse_table(schema=True)
1663
1664        return self.expression(
1665            exp.Insert,
1666            this=this,
1667            exists=self._parse_exists(),
1668            partition=self._parse_partition(),
1669            expression=self._parse_ddl_select(),
1670            conflict=self._parse_on_conflict(),
1671            returning=self._parse_returning(),
1672            overwrite=overwrite,
1673            alternative=alternative,
1674        )
1675
1676    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1677        conflict = self._match_text_seq("ON", "CONFLICT")
1678        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1679
1680        if not (conflict or duplicate):
1681            return None
1682
1683        nothing = None
1684        expressions = None
1685        key = None
1686        constraint = None
1687
1688        if conflict:
1689            if self._match_text_seq("ON", "CONSTRAINT"):
1690                constraint = self._parse_id_var()
1691            else:
1692                key = self._parse_csv(self._parse_value)
1693
1694        self._match_text_seq("DO")
1695        if self._match_text_seq("NOTHING"):
1696            nothing = True
1697        else:
1698            self._match(TokenType.UPDATE)
1699            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1700
1701        return self.expression(
1702            exp.OnConflict,
1703            duplicate=duplicate,
1704            expressions=expressions,
1705            nothing=nothing,
1706            key=key,
1707            constraint=constraint,
1708        )
1709
1710    def _parse_returning(self) -> t.Optional[exp.Expression]:
1711        if not self._match(TokenType.RETURNING):
1712            return None
1713
1714        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1715
1716    def _parse_row(self) -> t.Optional[exp.Expression]:
1717        if not self._match(TokenType.FORMAT):
1718            return None
1719        return self._parse_row_format()
1720
1721    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1722        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1723            return None
1724
1725        if self._match_text_seq("SERDE"):
1726            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1727
1728        self._match_text_seq("DELIMITED")
1729
1730        kwargs = {}
1731
1732        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1733            kwargs["fields"] = self._parse_string()
1734            if self._match_text_seq("ESCAPED", "BY"):
1735                kwargs["escaped"] = self._parse_string()
1736        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1737            kwargs["collection_items"] = self._parse_string()
1738        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1739            kwargs["map_keys"] = self._parse_string()
1740        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1741            kwargs["lines"] = self._parse_string()
1742        if self._match_text_seq("NULL", "DEFINED", "AS"):
1743            kwargs["null"] = self._parse_string()
1744
1745        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1746
1747    def _parse_load(self) -> exp.Expression:
1748        if self._match_text_seq("DATA"):
1749            local = self._match_text_seq("LOCAL")
1750            self._match_text_seq("INPATH")
1751            inpath = self._parse_string()
1752            overwrite = self._match(TokenType.OVERWRITE)
1753            self._match_pair(TokenType.INTO, TokenType.TABLE)
1754
1755            return self.expression(
1756                exp.LoadData,
1757                this=self._parse_table(schema=True),
1758                local=local,
1759                overwrite=overwrite,
1760                inpath=inpath,
1761                partition=self._parse_partition(),
1762                input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1763                serde=self._match_text_seq("SERDE") and self._parse_string(),
1764            )
1765        return self._parse_as_command(self._prev)
1766
1767    def _parse_delete(self) -> exp.Expression:
1768        self._match(TokenType.FROM)
1769
1770        return self.expression(
1771            exp.Delete,
1772            this=self._parse_table(),
1773            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1774            where=self._parse_where(),
1775            returning=self._parse_returning(),
1776        )
1777
1778    def _parse_update(self) -> exp.Expression:
1779        return self.expression(
1780            exp.Update,
1781            **{  # type: ignore
1782                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1783                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1784                "from": self._parse_from(modifiers=True),
1785                "where": self._parse_where(),
1786                "returning": self._parse_returning(),
1787            },
1788        )
1789
1790    def _parse_uncache(self) -> exp.Expression:
1791        if not self._match(TokenType.TABLE):
1792            self.raise_error("Expecting TABLE after UNCACHE")
1793
1794        return self.expression(
1795            exp.Uncache,
1796            exists=self._parse_exists(),
1797            this=self._parse_table(schema=True),
1798        )
1799
1800    def _parse_cache(self) -> exp.Expression:
1801        lazy = self._match_text_seq("LAZY")
1802        self._match(TokenType.TABLE)
1803        table = self._parse_table(schema=True)
1804        options = []
1805
1806        if self._match_text_seq("OPTIONS"):
1807            self._match_l_paren()
1808            k = self._parse_string()
1809            self._match(TokenType.EQ)
1810            v = self._parse_string()
1811            options = [k, v]
1812            self._match_r_paren()
1813
1814        self._match(TokenType.ALIAS)
1815        return self.expression(
1816            exp.Cache,
1817            this=table,
1818            lazy=lazy,
1819            options=options,
1820            expression=self._parse_select(nested=True),
1821        )
1822
1823    def _parse_partition(self) -> t.Optional[exp.Expression]:
1824        if not self._match(TokenType.PARTITION):
1825            return None
1826
1827        return self.expression(
1828            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1829        )
1830
1831    def _parse_value(self) -> exp.Expression:
1832        if self._match(TokenType.L_PAREN):
1833            expressions = self._parse_csv(self._parse_conjunction)
1834            self._match_r_paren()
1835            return self.expression(exp.Tuple, expressions=expressions)
1836
1837        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1838        # Source: https://prestodb.io/docs/current/sql/values.html
1839        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1840
1841    def _parse_select(
1842        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1843    ) -> t.Optional[exp.Expression]:
1844        cte = self._parse_with()
1845        if cte:
1846            this = self._parse_statement()
1847
1848            if not this:
1849                self.raise_error("Failed to parse any statement following CTE")
1850                return cte
1851
1852            if "with" in this.arg_types:
1853                this.set("with", cte)
1854            else:
1855                self.raise_error(f"{this.key} does not support CTE")
1856                this = cte
1857        elif self._match(TokenType.SELECT):
1858            comments = self._prev_comments
1859
1860            hint = self._parse_hint()
1861            all_ = self._match(TokenType.ALL)
1862            distinct = self._match(TokenType.DISTINCT)
1863
1864            kind = (
1865                self._match(TokenType.ALIAS)
1866                and self._match_texts(("STRUCT", "VALUE"))
1867                and self._prev.text
1868            )
1869
1870            if distinct:
1871                distinct = self.expression(
1872                    exp.Distinct,
1873                    on=self._parse_value() if self._match(TokenType.ON) else None,
1874                )
1875
1876            if all_ and distinct:
1877                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1878
1879            limit = self._parse_limit(top=True)
1880            expressions = self._parse_csv(self._parse_expression)
1881
1882            this = self.expression(
1883                exp.Select,
1884                kind=kind,
1885                hint=hint,
1886                distinct=distinct,
1887                expressions=expressions,
1888                limit=limit,
1889            )
1890            this.comments = comments
1891
1892            into = self._parse_into()
1893            if into:
1894                this.set("into", into)
1895
1896            from_ = self._parse_from()
1897            if from_:
1898                this.set("from", from_)
1899
1900            this = self._parse_query_modifiers(this)
1901        elif (table or nested) and self._match(TokenType.L_PAREN):
1902            if self._match(TokenType.PIVOT):
1903                this = self._parse_simplified_pivot()
1904            elif self._match(TokenType.FROM):
1905                this = exp.select("*").from_(
1906                    t.cast(exp.From, self._parse_from(skip_from_token=True))
1907                )
1908            else:
1909                this = self._parse_table() if table else self._parse_select(nested=True)
1910                this = self._parse_set_operations(self._parse_query_modifiers(this))
1911
1912            self._match_r_paren()
1913
1914            # early return so that subquery unions aren't parsed again
1915            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1916            # Union ALL should be a property of the top select node, not the subquery
1917            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1918        elif self._match(TokenType.VALUES):
1919            this = self.expression(
1920                exp.Values,
1921                expressions=self._parse_csv(self._parse_value),
1922                alias=self._parse_table_alias(),
1923            )
1924        else:
1925            this = None
1926
1927        return self._parse_set_operations(this)
1928
1929    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1930        if not skip_with_token and not self._match(TokenType.WITH):
1931            return None
1932
1933        comments = self._prev_comments
1934        recursive = self._match(TokenType.RECURSIVE)
1935
1936        expressions = []
1937        while True:
1938            expressions.append(self._parse_cte())
1939
1940            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1941                break
1942            else:
1943                self._match(TokenType.WITH)
1944
1945        return self.expression(
1946            exp.With, comments=comments, expressions=expressions, recursive=recursive
1947        )
1948
1949    def _parse_cte(self) -> exp.Expression:
1950        alias = self._parse_table_alias()
1951        if not alias or not alias.this:
1952            self.raise_error("Expected CTE to have alias")
1953
1954        self._match(TokenType.ALIAS)
1955
1956        return self.expression(
1957            exp.CTE,
1958            this=self._parse_wrapped(self._parse_statement),
1959            alias=alias,
1960        )
1961
1962    def _parse_table_alias(
1963        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1964    ) -> t.Optional[exp.Expression]:
1965        any_token = self._match(TokenType.ALIAS)
1966        alias = (
1967            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1968            or self._parse_string_as_identifier()
1969        )
1970
1971        index = self._index
1972        if self._match(TokenType.L_PAREN):
1973            columns = self._parse_csv(self._parse_function_parameter)
1974            self._match_r_paren() if columns else self._retreat(index)
1975        else:
1976            columns = None
1977
1978        if not alias and not columns:
1979            return None
1980
1981        return self.expression(exp.TableAlias, this=alias, columns=columns)
1982
1983    def _parse_subquery(
1984        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1985    ) -> t.Optional[exp.Expression]:
1986        if not this:
1987            return None
1988        return self.expression(
1989            exp.Subquery,
1990            this=this,
1991            pivots=self._parse_pivots(),
1992            alias=self._parse_table_alias() if parse_alias else None,
1993        )
1994
1995    def _parse_query_modifiers(
1996        self, this: t.Optional[exp.Expression]
1997    ) -> t.Optional[exp.Expression]:
1998        if isinstance(this, self.MODIFIABLES):
1999            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
2000                expression = parser(self)
2001
2002                if expression:
2003                    this.set(key, expression)
2004        return this
2005
2006    def _parse_hint(self) -> t.Optional[exp.Expression]:
2007        if self._match(TokenType.HINT):
2008            hints = self._parse_csv(self._parse_function)
2009            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2010                self.raise_error("Expected */ after HINT")
2011            return self.expression(exp.Hint, expressions=hints)
2012
2013        return None
2014
2015    def _parse_into(self) -> t.Optional[exp.Expression]:
2016        if not self._match(TokenType.INTO):
2017            return None
2018
2019        temp = self._match(TokenType.TEMPORARY)
2020        unlogged = self._match_text_seq("UNLOGGED")
2021        self._match(TokenType.TABLE)
2022
2023        return self.expression(
2024            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2025        )
2026
2027    def _parse_from(
2028        self, modifiers: bool = False, skip_from_token: bool = False
2029    ) -> t.Optional[exp.From]:
2030        if not skip_from_token and not self._match(TokenType.FROM):
2031            return None
2032
2033        comments = self._prev_comments
2034        this = self._parse_table()
2035
2036        return self.expression(
2037            exp.From,
2038            comments=comments,
2039            this=self._parse_query_modifiers(this) if modifiers else this,
2040        )
2041
2042    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2043        if not self._match(TokenType.MATCH_RECOGNIZE):
2044            return None
2045
2046        self._match_l_paren()
2047
2048        partition = self._parse_partition_by()
2049        order = self._parse_order()
2050        measures = (
2051            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2052        )
2053
2054        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2055            rows = exp.Var(this="ONE ROW PER MATCH")
2056        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2057            text = "ALL ROWS PER MATCH"
2058            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2059                text += f" SHOW EMPTY MATCHES"
2060            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2061                text += f" OMIT EMPTY MATCHES"
2062            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2063                text += f" WITH UNMATCHED ROWS"
2064            rows = exp.Var(this=text)
2065        else:
2066            rows = None
2067
2068        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2069            text = "AFTER MATCH SKIP"
2070            if self._match_text_seq("PAST", "LAST", "ROW"):
2071                text += f" PAST LAST ROW"
2072            elif self._match_text_seq("TO", "NEXT", "ROW"):
2073                text += f" TO NEXT ROW"
2074            elif self._match_text_seq("TO", "FIRST"):
2075                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2076            elif self._match_text_seq("TO", "LAST"):
2077                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2078            after = exp.Var(this=text)
2079        else:
2080            after = None
2081
2082        if self._match_text_seq("PATTERN"):
2083            self._match_l_paren()
2084
2085            if not self._curr:
2086                self.raise_error("Expecting )", self._curr)
2087
2088            paren = 1
2089            start = self._curr
2090
2091            while self._curr and paren > 0:
2092                if self._curr.token_type == TokenType.L_PAREN:
2093                    paren += 1
2094                if self._curr.token_type == TokenType.R_PAREN:
2095                    paren -= 1
2096                end = self._prev
2097                self._advance()
2098            if paren > 0:
2099                self.raise_error("Expecting )", self._curr)
2100            pattern = exp.Var(this=self._find_sql(start, end))
2101        else:
2102            pattern = None
2103
2104        define = (
2105            self._parse_csv(
2106                lambda: self.expression(
2107                    exp.Alias,
2108                    alias=self._parse_id_var(any_token=True),
2109                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2110                )
2111            )
2112            if self._match_text_seq("DEFINE")
2113            else None
2114        )
2115
2116        self._match_r_paren()
2117
2118        return self.expression(
2119            exp.MatchRecognize,
2120            partition_by=partition,
2121            order=order,
2122            measures=measures,
2123            rows=rows,
2124            after=after,
2125            pattern=pattern,
2126            define=define,
2127            alias=self._parse_table_alias(),
2128        )
2129
2130    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2131        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2132        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2133
2134        if outer_apply or cross_apply:
2135            this = self._parse_select(table=True)
2136            view = None
2137            outer = not cross_apply
2138        elif self._match(TokenType.LATERAL):
2139            this = self._parse_select(table=True)
2140            view = self._match(TokenType.VIEW)
2141            outer = self._match(TokenType.OUTER)
2142        else:
2143            return None
2144
2145        if not this:
2146            this = self._parse_function() or self._parse_id_var(any_token=False)
2147            while self._match(TokenType.DOT):
2148                this = exp.Dot(
2149                    this=this,
2150                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2151                )
2152
2153        table_alias: t.Optional[exp.Expression]
2154
2155        if view:
2156            table = self._parse_id_var(any_token=False)
2157            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2158            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2159        else:
2160            table_alias = self._parse_table_alias()
2161
2162        expression = self.expression(
2163            exp.Lateral,
2164            this=this,
2165            view=view,
2166            outer=outer,
2167            alias=table_alias,
2168        )
2169
2170        return expression
2171
2172    def _parse_join_parts(
2173        self,
2174    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2175        return (
2176            self._match_set(self.JOIN_METHODS) and self._prev,
2177            self._match_set(self.JOIN_SIDES) and self._prev,
2178            self._match_set(self.JOIN_KINDS) and self._prev,
2179        )
2180
2181    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2182        if self._match(TokenType.COMMA):
2183            return self.expression(exp.Join, this=self._parse_table())
2184
2185        index = self._index
2186        method, side, kind = self._parse_join_parts()
2187        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2188        join = self._match(TokenType.JOIN)
2189
2190        if not skip_join_token and not join:
2191            self._retreat(index)
2192            kind = None
2193            method = None
2194            side = None
2195
2196        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2197        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2198
2199        if not skip_join_token and not join and not outer_apply and not cross_apply:
2200            return None
2201
2202        if outer_apply:
2203            side = Token(TokenType.LEFT, "LEFT")
2204
2205        kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()}
2206
2207        if method:
2208            kwargs["method"] = method.text
2209        if side:
2210            kwargs["side"] = side.text
2211        if kind:
2212            kwargs["kind"] = kind.text
2213        if hint:
2214            kwargs["hint"] = hint
2215
2216        if self._match(TokenType.ON):
2217            kwargs["on"] = self._parse_conjunction()
2218        elif self._match(TokenType.USING):
2219            kwargs["using"] = self._parse_wrapped_id_vars()
2220
2221        return self.expression(exp.Join, **kwargs)
2222
2223    def _parse_index(
2224        self,
2225        index: t.Optional[exp.Expression] = None,
2226    ) -> t.Optional[exp.Expression]:
2227        if index:
2228            unique = None
2229            primary = None
2230            amp = None
2231
2232            self._match(TokenType.ON)
2233            self._match(TokenType.TABLE)  # hive
2234            table = self._parse_table_parts(schema=True)
2235        else:
2236            unique = self._match(TokenType.UNIQUE)
2237            primary = self._match_text_seq("PRIMARY")
2238            amp = self._match_text_seq("AMP")
2239            if not self._match(TokenType.INDEX):
2240                return None
2241            index = self._parse_id_var()
2242            table = None
2243
2244        if self._match(TokenType.L_PAREN, advance=False):
2245            columns = self._parse_wrapped_csv(self._parse_ordered)
2246        else:
2247            columns = None
2248
2249        return self.expression(
2250            exp.Index,
2251            this=index,
2252            table=table,
2253            columns=columns,
2254            unique=unique,
2255            primary=primary,
2256            amp=amp,
2257            partition_by=self._parse_partition_by(),
2258        )
2259
2260    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2261        return (
2262            (not schema and self._parse_function())
2263            or self._parse_id_var(any_token=False)
2264            or self._parse_string_as_identifier()
2265            or self._parse_placeholder()
2266        )
2267
2268    def _parse_table_parts(self, schema: bool = False) -> exp.Table:
2269        catalog = None
2270        db = None
2271        table = self._parse_table_part(schema=schema)
2272
2273        while self._match(TokenType.DOT):
2274            if catalog:
2275                # This allows nesting the table in arbitrarily many dot expressions if needed
2276                table = self.expression(
2277                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2278                )
2279            else:
2280                catalog = db
2281                db = table
2282                table = self._parse_table_part(schema=schema)
2283
2284        if not table:
2285            self.raise_error(f"Expected table name but got {self._curr}")
2286
2287        return self.expression(
2288            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2289        )
2290
2291    def _parse_table(
2292        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2293    ) -> t.Optional[exp.Expression]:
2294        lateral = self._parse_lateral()
2295        if lateral:
2296            return lateral
2297
2298        unnest = self._parse_unnest()
2299        if unnest:
2300            return unnest
2301
2302        values = self._parse_derived_table_values()
2303        if values:
2304            return values
2305
2306        subquery = self._parse_select(table=True)
2307        if subquery:
2308            if not subquery.args.get("pivots"):
2309                subquery.set("pivots", self._parse_pivots())
2310            return subquery
2311
2312        this: exp.Expression = self._parse_table_parts(schema=schema)
2313
2314        if schema:
2315            return self._parse_schema(this=this)
2316
2317        if self.alias_post_tablesample:
2318            table_sample = self._parse_table_sample()
2319
2320        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2321        if alias:
2322            this.set("alias", alias)
2323
2324        if not this.args.get("pivots"):
2325            this.set("pivots", self._parse_pivots())
2326
2327        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2328            this.set(
2329                "hints",
2330                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2331            )
2332            self._match_r_paren()
2333
2334        if not self.alias_post_tablesample:
2335            table_sample = self._parse_table_sample()
2336
2337        if table_sample:
2338            table_sample.set("this", this)
2339            this = table_sample
2340
2341        return this
2342
2343    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2344        if not self._match(TokenType.UNNEST):
2345            return None
2346
2347        expressions = self._parse_wrapped_csv(self._parse_type)
2348        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2349        alias = self._parse_table_alias()
2350
2351        if alias and self.unnest_column_only:
2352            if alias.args.get("columns"):
2353                self.raise_error("Unexpected extra column alias in unnest.")
2354            alias.set("columns", [alias.this])
2355            alias.set("this", None)
2356
2357        offset = None
2358        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2359            self._match(TokenType.ALIAS)
2360            offset = self._parse_id_var() or exp.Identifier(this="offset")
2361
2362        return self.expression(
2363            exp.Unnest,
2364            expressions=expressions,
2365            ordinality=ordinality,
2366            alias=alias,
2367            offset=offset,
2368        )
2369
2370    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2371        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2372        if not is_derived and not self._match(TokenType.VALUES):
2373            return None
2374
2375        expressions = self._parse_csv(self._parse_value)
2376
2377        if is_derived:
2378            self._match_r_paren()
2379
2380        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2381
2382    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2383        if not self._match(TokenType.TABLE_SAMPLE) and not (
2384            as_modifier and self._match_text_seq("USING", "SAMPLE")
2385        ):
2386            return None
2387
2388        bucket_numerator = None
2389        bucket_denominator = None
2390        bucket_field = None
2391        percent = None
2392        rows = None
2393        size = None
2394        seed = None
2395
2396        kind = (
2397            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2398        )
2399        method = self._parse_var(tokens=(TokenType.ROW,))
2400
2401        self._match(TokenType.L_PAREN)
2402
2403        num = self._parse_number()
2404
2405        if self._match_text_seq("BUCKET"):
2406            bucket_numerator = self._parse_number()
2407            self._match_text_seq("OUT", "OF")
2408            bucket_denominator = bucket_denominator = self._parse_number()
2409            self._match(TokenType.ON)
2410            bucket_field = self._parse_field()
2411        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2412            percent = num
2413        elif self._match(TokenType.ROWS):
2414            rows = num
2415        else:
2416            size = num
2417
2418        self._match(TokenType.R_PAREN)
2419
2420        if self._match(TokenType.L_PAREN):
2421            method = self._parse_var()
2422            seed = self._match(TokenType.COMMA) and self._parse_number()
2423            self._match_r_paren()
2424        elif self._match_texts(("SEED", "REPEATABLE")):
2425            seed = self._parse_wrapped(self._parse_number)
2426
2427        return self.expression(
2428            exp.TableSample,
2429            method=method,
2430            bucket_numerator=bucket_numerator,
2431            bucket_denominator=bucket_denominator,
2432            bucket_field=bucket_field,
2433            percent=percent,
2434            rows=rows,
2435            size=size,
2436            seed=seed,
2437            kind=kind,
2438        )
2439
2440    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2441        return list(iter(self._parse_pivot, None))
2442
2443    # https://duckdb.org/docs/sql/statements/pivot
2444    def _parse_simplified_pivot(self) -> exp.Pivot:
2445        def _parse_on() -> t.Optional[exp.Expression]:
2446            this = self._parse_bitwise()
2447            return self._parse_in(this) if self._match(TokenType.IN) else this
2448
2449        this = self._parse_table()
2450        expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on)
2451        using = self._match(TokenType.USING) and self._parse_csv(
2452            lambda: self._parse_alias(self._parse_function())
2453        )
2454        group = self._parse_group()
2455        return self.expression(
2456            exp.Pivot, this=this, expressions=expressions, using=using, group=group
2457        )
2458
2459    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2460        index = self._index
2461
2462        if self._match(TokenType.PIVOT):
2463            unpivot = False
2464        elif self._match(TokenType.UNPIVOT):
2465            unpivot = True
2466        else:
2467            return None
2468
2469        expressions = []
2470        field = None
2471
2472        if not self._match(TokenType.L_PAREN):
2473            self._retreat(index)
2474            return None
2475
2476        if unpivot:
2477            expressions = self._parse_csv(self._parse_column)
2478        else:
2479            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2480
2481        if not expressions:
2482            self.raise_error("Failed to parse PIVOT's aggregation list")
2483
2484        if not self._match(TokenType.FOR):
2485            self.raise_error("Expecting FOR")
2486
2487        value = self._parse_column()
2488
2489        if not self._match(TokenType.IN):
2490            self.raise_error("Expecting IN")
2491
2492        field = self._parse_in(value, alias=True)
2493
2494        self._match_r_paren()
2495
2496        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2497
2498        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2499            pivot.set("alias", self._parse_table_alias())
2500
2501        if not unpivot:
2502            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2503
2504            columns: t.List[exp.Expression] = []
2505            for fld in pivot.args["field"].expressions:
2506                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2507                for name in names:
2508                    if self.PREFIXED_PIVOT_COLUMNS:
2509                        name = f"{name}_{field_name}" if name else field_name
2510                    else:
2511                        name = f"{field_name}_{name}" if name else field_name
2512
2513                    columns.append(exp.to_identifier(name))
2514
2515            pivot.set("columns", columns)
2516
2517        return pivot
2518
2519    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2520        return [agg.alias for agg in aggregations]
2521
2522    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2523        if not skip_where_token and not self._match(TokenType.WHERE):
2524            return None
2525
2526        return self.expression(
2527            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2528        )
2529
2530    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2531        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2532            return None
2533
2534        elements = defaultdict(list)
2535
2536        while True:
2537            expressions = self._parse_csv(self._parse_conjunction)
2538            if expressions:
2539                elements["expressions"].extend(expressions)
2540
2541            grouping_sets = self._parse_grouping_sets()
2542            if grouping_sets:
2543                elements["grouping_sets"].extend(grouping_sets)
2544
2545            rollup = None
2546            cube = None
2547            totals = None
2548
2549            with_ = self._match(TokenType.WITH)
2550            if self._match(TokenType.ROLLUP):
2551                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2552                elements["rollup"].extend(ensure_list(rollup))
2553
2554            if self._match(TokenType.CUBE):
2555                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2556                elements["cube"].extend(ensure_list(cube))
2557
2558            if self._match_text_seq("TOTALS"):
2559                totals = True
2560                elements["totals"] = True  # type: ignore
2561
2562            if not (grouping_sets or rollup or cube or totals):
2563                break
2564
2565        return self.expression(exp.Group, **elements)  # type: ignore
2566
2567    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2568        if not self._match(TokenType.GROUPING_SETS):
2569            return None
2570
2571        return self._parse_wrapped_csv(self._parse_grouping_set)
2572
2573    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2574        if self._match(TokenType.L_PAREN):
2575            grouping_set = self._parse_csv(self._parse_column)
2576            self._match_r_paren()
2577            return self.expression(exp.Tuple, expressions=grouping_set)
2578
2579        return self._parse_column()
2580
2581    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2582        if not skip_having_token and not self._match(TokenType.HAVING):
2583            return None
2584        return self.expression(exp.Having, this=self._parse_conjunction())
2585
2586    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2587        if not self._match(TokenType.QUALIFY):
2588            return None
2589        return self.expression(exp.Qualify, this=self._parse_conjunction())
2590
2591    def _parse_order(
2592        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2593    ) -> t.Optional[exp.Expression]:
2594        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2595            return this
2596
2597        return self.expression(
2598            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2599        )
2600
2601    def _parse_sort(
2602        self, exp_class: t.Type[exp.Expression], *texts: str
2603    ) -> t.Optional[exp.Expression]:
2604        if not self._match_text_seq(*texts):
2605            return None
2606        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2607
2608    def _parse_ordered(self) -> exp.Expression:
2609        this = self._parse_conjunction()
2610        self._match(TokenType.ASC)
2611        is_desc = self._match(TokenType.DESC)
2612        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
2613        is_nulls_last = self._match_text_seq("NULLS", "LAST")
2614        desc = is_desc or False
2615        asc = not desc
2616        nulls_first = is_nulls_first or False
2617        explicitly_null_ordered = is_nulls_first or is_nulls_last
2618        if (
2619            not explicitly_null_ordered
2620            and (
2621                (asc and self.null_ordering == "nulls_are_small")
2622                or (desc and self.null_ordering != "nulls_are_small")
2623            )
2624            and self.null_ordering != "nulls_are_last"
2625        ):
2626            nulls_first = True
2627
2628        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2629
2630    def _parse_limit(
2631        self, this: t.Optional[exp.Expression] = None, top: bool = False
2632    ) -> t.Optional[exp.Expression]:
2633        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2634            limit_paren = self._match(TokenType.L_PAREN)
2635            limit_exp = self.expression(
2636                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2637            )
2638
2639            if limit_paren:
2640                self._match_r_paren()
2641
2642            return limit_exp
2643
2644        if self._match(TokenType.FETCH):
2645            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2646            direction = self._prev.text if direction else "FIRST"
2647
2648            count = self._parse_number()
2649            percent = self._match(TokenType.PERCENT)
2650
2651            self._match_set((TokenType.ROW, TokenType.ROWS))
2652
2653            only = self._match_text_seq("ONLY")
2654            with_ties = self._match_text_seq("WITH", "TIES")
2655
2656            if only and with_ties:
2657                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2658
2659            return self.expression(
2660                exp.Fetch,
2661                direction=direction,
2662                count=count,
2663                percent=percent,
2664                with_ties=with_ties,
2665            )
2666
2667        return this
2668
2669    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2670        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2671            return this
2672
2673        count = self._parse_number()
2674        self._match_set((TokenType.ROW, TokenType.ROWS))
2675        return self.expression(exp.Offset, this=this, expression=count)
2676
2677    def _parse_locks(self) -> t.List[exp.Expression]:
2678        # Lists are invariant, so we need to use a type hint here
2679        locks: t.List[exp.Expression] = []
2680
2681        while True:
2682            if self._match_text_seq("FOR", "UPDATE"):
2683                update = True
2684            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2685                "LOCK", "IN", "SHARE", "MODE"
2686            ):
2687                update = False
2688            else:
2689                break
2690
2691            expressions = None
2692            if self._match_text_seq("OF"):
2693                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2694
2695            wait: t.Optional[bool | exp.Expression] = None
2696            if self._match_text_seq("NOWAIT"):
2697                wait = True
2698            elif self._match_text_seq("WAIT"):
2699                wait = self._parse_primary()
2700            elif self._match_text_seq("SKIP", "LOCKED"):
2701                wait = False
2702
2703            locks.append(
2704                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2705            )
2706
2707        return locks
2708
2709    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2710        if not self._match_set(self.SET_OPERATIONS):
2711            return this
2712
2713        token_type = self._prev.token_type
2714
2715        if token_type == TokenType.UNION:
2716            expression = exp.Union
2717        elif token_type == TokenType.EXCEPT:
2718            expression = exp.Except
2719        else:
2720            expression = exp.Intersect
2721
2722        return self.expression(
2723            expression,
2724            this=this,
2725            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2726            expression=self._parse_set_operations(self._parse_select(nested=True)),
2727        )
2728
2729    def _parse_expression(self) -> t.Optional[exp.Expression]:
2730        return self._parse_alias(self._parse_conjunction())
2731
2732    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2733        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2734
2735    def _parse_equality(self) -> t.Optional[exp.Expression]:
2736        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2737
2738    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2739        return self._parse_tokens(self._parse_range, self.COMPARISON)
2740
2741    def _parse_range(self) -> t.Optional[exp.Expression]:
2742        this = self._parse_bitwise()
2743        negate = self._match(TokenType.NOT)
2744
2745        if self._match_set(self.RANGE_PARSERS):
2746            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2747            if not expression:
2748                return this
2749
2750            this = expression
2751        elif self._match(TokenType.ISNULL):
2752            this = self.expression(exp.Is, this=this, expression=exp.Null())
2753
2754        # Postgres supports ISNULL and NOTNULL for conditions.
2755        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2756        if self._match(TokenType.NOTNULL):
2757            this = self.expression(exp.Is, this=this, expression=exp.Null())
2758            this = self.expression(exp.Not, this=this)
2759
2760        if negate:
2761            this = self.expression(exp.Not, this=this)
2762
2763        if self._match(TokenType.IS):
2764            this = self._parse_is(this)
2765
2766        return this
2767
2768    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2769        index = self._index - 1
2770        negate = self._match(TokenType.NOT)
2771        if self._match_text_seq("DISTINCT", "FROM"):
2772            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2773            return self.expression(klass, this=this, expression=self._parse_expression())
2774
2775        expression = self._parse_null() or self._parse_boolean()
2776        if not expression:
2777            self._retreat(index)
2778            return None
2779
2780        this = self.expression(exp.Is, this=this, expression=expression)
2781        return self.expression(exp.Not, this=this) if negate else this
2782
2783    def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In:
2784        unnest = self._parse_unnest()
2785        if unnest:
2786            this = self.expression(exp.In, this=this, unnest=unnest)
2787        elif self._match(TokenType.L_PAREN):
2788            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
2789
2790            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2791                this = self.expression(exp.In, this=this, query=expressions[0])
2792            else:
2793                this = self.expression(exp.In, this=this, expressions=expressions)
2794
2795            self._match_r_paren(this)
2796        else:
2797            this = self.expression(exp.In, this=this, field=self._parse_field())
2798
2799        return this
2800
2801    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2802        low = self._parse_bitwise()
2803        self._match(TokenType.AND)
2804        high = self._parse_bitwise()
2805        return self.expression(exp.Between, this=this, low=low, high=high)
2806
2807    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2808        if not self._match(TokenType.ESCAPE):
2809            return this
2810        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2811
2812    def _parse_interval(self) -> t.Optional[exp.Expression]:
2813        if not self._match(TokenType.INTERVAL):
2814            return None
2815
2816        this = self._parse_primary() or self._parse_term()
2817        unit = self._parse_function() or self._parse_var()
2818
2819        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2820        # each INTERVAL expression into this canonical form so it's easy to transpile
2821        if this and this.is_number:
2822            this = exp.Literal.string(this.name)
2823        elif this and this.is_string:
2824            parts = this.name.split()
2825
2826            if len(parts) == 2:
2827                if unit:
2828                    # this is not actually a unit, it's something else
2829                    unit = None
2830                    self._retreat(self._index - 1)
2831                else:
2832                    this = exp.Literal.string(parts[0])
2833                    unit = self.expression(exp.Var, this=parts[1])
2834
2835        return self.expression(exp.Interval, this=this, unit=unit)
2836
2837    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2838        this = self._parse_term()
2839
2840        while True:
2841            if self._match_set(self.BITWISE):
2842                this = self.expression(
2843                    self.BITWISE[self._prev.token_type],
2844                    this=this,
2845                    expression=self._parse_term(),
2846                )
2847            elif self._match_pair(TokenType.LT, TokenType.LT):
2848                this = self.expression(
2849                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2850                )
2851            elif self._match_pair(TokenType.GT, TokenType.GT):
2852                this = self.expression(
2853                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2854                )
2855            else:
2856                break
2857
2858        return this
2859
2860    def _parse_term(self) -> t.Optional[exp.Expression]:
2861        return self._parse_tokens(self._parse_factor, self.TERM)
2862
2863    def _parse_factor(self) -> t.Optional[exp.Expression]:
2864        return self._parse_tokens(self._parse_unary, self.FACTOR)
2865
2866    def _parse_unary(self) -> t.Optional[exp.Expression]:
2867        if self._match_set(self.UNARY_PARSERS):
2868            return self.UNARY_PARSERS[self._prev.token_type](self)
2869        return self._parse_at_time_zone(self._parse_type())
2870
2871    def _parse_type(self) -> t.Optional[exp.Expression]:
2872        interval = self._parse_interval()
2873        if interval:
2874            return interval
2875
2876        index = self._index
2877        data_type = self._parse_types(check_func=True)
2878        this = self._parse_column()
2879
2880        if data_type:
2881            if isinstance(this, exp.Literal):
2882                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2883                if parser:
2884                    return parser(self, this, data_type)
2885                return self.expression(exp.Cast, this=this, to=data_type)
2886            if not data_type.expressions:
2887                self._retreat(index)
2888                return self._parse_column()
2889            return self._parse_column_ops(data_type)
2890
2891        return this
2892
2893    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2894        this = self._parse_type()
2895        if not this:
2896            return None
2897
2898        return self.expression(
2899            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2900        )
2901
2902    def _parse_types(
2903        self, check_func: bool = False, schema: bool = False
2904    ) -> t.Optional[exp.Expression]:
2905        index = self._index
2906
2907        prefix = self._match_text_seq("SYSUDTLIB", ".")
2908
2909        if not self._match_set(self.TYPE_TOKENS):
2910            return None
2911
2912        type_token = self._prev.token_type
2913
2914        if type_token == TokenType.PSEUDO_TYPE:
2915            return self.expression(exp.PseudoType, this=self._prev.text)
2916
2917        nested = type_token in self.NESTED_TYPE_TOKENS
2918        is_struct = type_token == TokenType.STRUCT
2919        expressions = None
2920        maybe_func = False
2921
2922        if self._match(TokenType.L_PAREN):
2923            if is_struct:
2924                expressions = self._parse_csv(self._parse_struct_types)
2925            elif nested:
2926                expressions = self._parse_csv(
2927                    lambda: self._parse_types(check_func=check_func, schema=schema)
2928                )
2929            else:
2930                expressions = self._parse_csv(self._parse_type_size)
2931
2932            if not expressions or not self._match(TokenType.R_PAREN):
2933                self._retreat(index)
2934                return None
2935
2936            maybe_func = True
2937
2938        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2939            this = exp.DataType(
2940                this=exp.DataType.Type.ARRAY,
2941                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2942                nested=True,
2943            )
2944
2945            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2946                this = exp.DataType(
2947                    this=exp.DataType.Type.ARRAY,
2948                    expressions=[this],
2949                    nested=True,
2950                )
2951
2952            return this
2953
2954        if self._match(TokenType.L_BRACKET):
2955            self._retreat(index)
2956            return None
2957
2958        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2959        if nested and self._match(TokenType.LT):
2960            if is_struct:
2961                expressions = self._parse_csv(self._parse_struct_types)
2962            else:
2963                expressions = self._parse_csv(
2964                    lambda: self._parse_types(check_func=check_func, schema=schema)
2965                )
2966
2967            if not self._match(TokenType.GT):
2968                self.raise_error("Expecting >")
2969
2970            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2971                values = self._parse_csv(self._parse_conjunction)
2972                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2973
2974        value: t.Optional[exp.Expression] = None
2975        if type_token in self.TIMESTAMPS:
2976            if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ:
2977                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2978            elif (
2979                self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE")
2980                or type_token == TokenType.TIMESTAMPLTZ
2981            ):
2982                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2983            elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
2984                if type_token == TokenType.TIME:
2985                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2986                else:
2987                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2988
2989            maybe_func = maybe_func and value is None
2990
2991            if value is None:
2992                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2993        elif type_token == TokenType.INTERVAL:
2994            unit = self._parse_var()
2995
2996            if not unit:
2997                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2998            else:
2999                value = self.expression(exp.Interval, unit=unit)
3000
3001        if maybe_func and check_func:
3002            index2 = self._index
3003            peek = self._parse_string()
3004
3005            if not peek:
3006                self._retreat(index)
3007                return None
3008
3009            self._retreat(index2)
3010
3011        if value:
3012            return value
3013
3014        return exp.DataType(
3015            this=exp.DataType.Type[type_token.value.upper()],
3016            expressions=expressions,
3017            nested=nested,
3018            values=values,
3019            prefix=prefix,
3020        )
3021
3022    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
3023        this = self._parse_type() or self._parse_id_var()
3024        self._match(TokenType.COLON)
3025        return self._parse_column_def(this)
3026
3027    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3028        if not self._match_text_seq("AT", "TIME", "ZONE"):
3029            return this
3030        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
3031
3032    def _parse_column(self) -> t.Optional[exp.Expression]:
3033        this = self._parse_field()
3034        if isinstance(this, exp.Identifier):
3035            this = self.expression(exp.Column, this=this)
3036        elif not this:
3037            return self._parse_bracket(this)
3038        return self._parse_column_ops(this)
3039
3040    def _parse_column_ops(self, this: exp.Expression) -> exp.Expression:
3041        this = self._parse_bracket(this)
3042
3043        while self._match_set(self.COLUMN_OPERATORS):
3044            op_token = self._prev.token_type
3045            op = self.COLUMN_OPERATORS.get(op_token)
3046
3047            if op_token == TokenType.DCOLON:
3048                field = self._parse_types()
3049                if not field:
3050                    self.raise_error("Expected type")
3051            elif op and self._curr:
3052                self._advance()
3053                value = self._prev.text
3054                field = (
3055                    exp.Literal.number(value)
3056                    if self._prev.token_type == TokenType.NUMBER
3057                    else exp.Literal.string(value)
3058                )
3059            else:
3060                field = self._parse_field(anonymous_func=True)
3061
3062            if isinstance(field, exp.Func):
3063                # bigquery allows function calls like x.y.count(...)
3064                # SAFE.SUBSTR(...)
3065                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3066                this = self._replace_columns_with_dots(this)
3067
3068            if op:
3069                this = op(self, this, field)
3070            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3071                this = self.expression(
3072                    exp.Column,
3073                    this=field,
3074                    table=this.this,
3075                    db=this.args.get("table"),
3076                    catalog=this.args.get("db"),
3077                )
3078            else:
3079                this = self.expression(exp.Dot, this=this, expression=field)
3080            this = self._parse_bracket(this)
3081        return this
3082
3083    def _parse_primary(self) -> t.Optional[exp.Expression]:
3084        if self._match_set(self.PRIMARY_PARSERS):
3085            token_type = self._prev.token_type
3086            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3087
3088            if token_type == TokenType.STRING:
3089                expressions = [primary]
3090                while self._match(TokenType.STRING):
3091                    expressions.append(exp.Literal.string(self._prev.text))
3092                if len(expressions) > 1:
3093                    return self.expression(exp.Concat, expressions=expressions)
3094            return primary
3095
3096        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3097            return exp.Literal.number(f"0.{self._prev.text}")
3098
3099        if self._match(TokenType.L_PAREN):
3100            comments = self._prev_comments
3101            query = self._parse_select()
3102
3103            if query:
3104                expressions = [query]
3105            else:
3106                expressions = self._parse_csv(self._parse_expression)
3107
3108            this = self._parse_query_modifiers(seq_get(expressions, 0))
3109
3110            if isinstance(this, exp.Subqueryable):
3111                this = self._parse_set_operations(
3112                    self._parse_subquery(this=this, parse_alias=False)
3113                )
3114            elif len(expressions) > 1:
3115                this = self.expression(exp.Tuple, expressions=expressions)
3116            else:
3117                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3118
3119            if this:
3120                this.add_comments(comments)
3121            self._match_r_paren(expression=this)
3122
3123            return this
3124
3125        return None
3126
3127    def _parse_field(
3128        self,
3129        any_token: bool = False,
3130        tokens: t.Optional[t.Collection[TokenType]] = None,
3131        anonymous_func: bool = False,
3132    ) -> t.Optional[exp.Expression]:
3133        return (
3134            self._parse_primary()
3135            or self._parse_function(anonymous=anonymous_func)
3136            or self._parse_id_var(any_token=any_token, tokens=tokens)
3137        )
3138
3139    def _parse_function(
3140        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3141    ) -> t.Optional[exp.Expression]:
3142        if not self._curr:
3143            return None
3144
3145        token_type = self._curr.token_type
3146
3147        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3148            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3149
3150        if not self._next or self._next.token_type != TokenType.L_PAREN:
3151            if token_type in self.NO_PAREN_FUNCTIONS:
3152                self._advance()
3153                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3154
3155            return None
3156
3157        if token_type not in self.FUNC_TOKENS:
3158            return None
3159
3160        this = self._curr.text
3161        upper = this.upper()
3162        self._advance(2)
3163
3164        parser = self.FUNCTION_PARSERS.get(upper)
3165
3166        if parser and not anonymous:
3167            this = parser(self)
3168        else:
3169            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3170
3171            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3172                this = self.expression(subquery_predicate, this=self._parse_select())
3173                self._match_r_paren()
3174                return this
3175
3176            if functions is None:
3177                functions = self.FUNCTIONS
3178
3179            function = functions.get(upper)
3180
3181            alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS
3182            args = self._parse_csv(lambda: self._parse_lambda(alias=alias))
3183
3184            if function and not anonymous:
3185                this = function(args)
3186                self.validate_expression(this, args)
3187            else:
3188                this = self.expression(exp.Anonymous, this=this, expressions=args)
3189
3190        self._match_r_paren(this)
3191        return self._parse_window(this)
3192
3193    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3194        return self._parse_column_def(self._parse_id_var())
3195
3196    def _parse_user_defined_function(
3197        self, kind: t.Optional[TokenType] = None
3198    ) -> t.Optional[exp.Expression]:
3199        this = self._parse_id_var()
3200
3201        while self._match(TokenType.DOT):
3202            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3203
3204        if not self._match(TokenType.L_PAREN):
3205            return this
3206
3207        expressions = self._parse_csv(self._parse_function_parameter)
3208        self._match_r_paren()
3209        return self.expression(
3210            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3211        )
3212
3213    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3214        literal = self._parse_primary()
3215        if literal:
3216            return self.expression(exp.Introducer, this=token.text, expression=literal)
3217
3218        return self.expression(exp.Identifier, this=token.text)
3219
3220    def _parse_session_parameter(self) -> exp.Expression:
3221        kind = None
3222        this = self._parse_id_var() or self._parse_primary()
3223
3224        if this and self._match(TokenType.DOT):
3225            kind = this.name
3226            this = self._parse_var() or self._parse_primary()
3227
3228        return self.expression(exp.SessionParameter, this=this, kind=kind)
3229
3230    def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]:
3231        index = self._index
3232
3233        if self._match(TokenType.L_PAREN):
3234            expressions = self._parse_csv(self._parse_id_var)
3235
3236            if not self._match(TokenType.R_PAREN):
3237                self._retreat(index)
3238        else:
3239            expressions = [self._parse_id_var()]
3240
3241        if self._match_set(self.LAMBDAS):
3242            return self.LAMBDAS[self._prev.token_type](self, expressions)
3243
3244        self._retreat(index)
3245
3246        this: t.Optional[exp.Expression]
3247
3248        if self._match(TokenType.DISTINCT):
3249            this = self.expression(
3250                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3251            )
3252        else:
3253            this = self._parse_select_or_expression(alias=alias)
3254
3255            if isinstance(this, exp.EQ):
3256                left = this.this
3257                if isinstance(left, exp.Column):
3258                    left.replace(exp.Var(this=left.text("this")))
3259
3260        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3261
3262    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3263        index = self._index
3264
3265        if not self.errors:
3266            try:
3267                if self._parse_select(nested=True):
3268                    return this
3269            except ParseError:
3270                pass
3271            finally:
3272                self.errors.clear()
3273                self._retreat(index)
3274
3275        if not self._match(TokenType.L_PAREN):
3276            return this
3277
3278        args = self._parse_csv(
3279            lambda: self._parse_constraint()
3280            or self._parse_column_def(self._parse_field(any_token=True))
3281        )
3282        self._match_r_paren()
3283        return self.expression(exp.Schema, this=this, expressions=args)
3284
3285    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3286        # column defs are not really columns, they're identifiers
3287        if isinstance(this, exp.Column):
3288            this = this.this
3289        kind = self._parse_types(schema=True)
3290
3291        if self._match_text_seq("FOR", "ORDINALITY"):
3292            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3293
3294        constraints = []
3295        while True:
3296            constraint = self._parse_column_constraint()
3297            if not constraint:
3298                break
3299            constraints.append(constraint)
3300
3301        if not kind and not constraints:
3302            return this
3303
3304        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3305
3306    def _parse_auto_increment(self) -> exp.Expression:
3307        start = None
3308        increment = None
3309
3310        if self._match(TokenType.L_PAREN, advance=False):
3311            args = self._parse_wrapped_csv(self._parse_bitwise)
3312            start = seq_get(args, 0)
3313            increment = seq_get(args, 1)
3314        elif self._match_text_seq("START"):
3315            start = self._parse_bitwise()
3316            self._match_text_seq("INCREMENT")
3317            increment = self._parse_bitwise()
3318
3319        if start and increment:
3320            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3321
3322        return exp.AutoIncrementColumnConstraint()
3323
3324    def _parse_compress(self) -> exp.Expression:
3325        if self._match(TokenType.L_PAREN, advance=False):
3326            return self.expression(
3327                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3328            )
3329
3330        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3331
3332    def _parse_generated_as_identity(self) -> exp.Expression:
3333        if self._match_text_seq("BY", "DEFAULT"):
3334            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3335            this = self.expression(
3336                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3337            )
3338        else:
3339            self._match_text_seq("ALWAYS")
3340            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3341
3342        self._match(TokenType.ALIAS)
3343        identity = self._match_text_seq("IDENTITY")
3344
3345        if self._match(TokenType.L_PAREN):
3346            if self._match_text_seq("START", "WITH"):
3347                this.set("start", self._parse_bitwise())
3348            if self._match_text_seq("INCREMENT", "BY"):
3349                this.set("increment", self._parse_bitwise())
3350            if self._match_text_seq("MINVALUE"):
3351                this.set("minvalue", self._parse_bitwise())
3352            if self._match_text_seq("MAXVALUE"):
3353                this.set("maxvalue", self._parse_bitwise())
3354
3355            if self._match_text_seq("CYCLE"):
3356                this.set("cycle", True)
3357            elif self._match_text_seq("NO", "CYCLE"):
3358                this.set("cycle", False)
3359
3360            if not identity:
3361                this.set("expression", self._parse_bitwise())
3362
3363            self._match_r_paren()
3364
3365        return this
3366
3367    def _parse_inline(self) -> t.Optional[exp.Expression]:
3368        self._match_text_seq("LENGTH")
3369        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3370
3371    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3372        if self._match_text_seq("NULL"):
3373            return self.expression(exp.NotNullColumnConstraint)
3374        if self._match_text_seq("CASESPECIFIC"):
3375            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3376        return None
3377
3378    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3379        if self._match(TokenType.CONSTRAINT):
3380            this = self._parse_id_var()
3381        else:
3382            this = None
3383
3384        if self._match_texts(self.CONSTRAINT_PARSERS):
3385            return self.expression(
3386                exp.ColumnConstraint,
3387                this=this,
3388                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3389            )
3390
3391        return this
3392
3393    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3394        if not self._match(TokenType.CONSTRAINT):
3395            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3396
3397        this = self._parse_id_var()
3398        expressions = []
3399
3400        while True:
3401            constraint = self._parse_unnamed_constraint() or self._parse_function()
3402            if not constraint:
3403                break
3404            expressions.append(constraint)
3405
3406        return self.expression(exp.Constraint, this=this, expressions=expressions)
3407
3408    def _parse_unnamed_constraint(
3409        self, constraints: t.Optional[t.Collection[str]] = None
3410    ) -> t.Optional[exp.Expression]:
3411        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3412            return None
3413
3414        constraint = self._prev.text.upper()
3415        if constraint not in self.CONSTRAINT_PARSERS:
3416            self.raise_error(f"No parser found for schema constraint {constraint}.")
3417
3418        return self.CONSTRAINT_PARSERS[constraint](self)
3419
3420    def _parse_unique(self) -> exp.Expression:
3421        self._match_text_seq("KEY")
3422        return self.expression(
3423            exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False))
3424        )
3425
3426    def _parse_key_constraint_options(self) -> t.List[str]:
3427        options = []
3428        while True:
3429            if not self._curr:
3430                break
3431
3432            if self._match(TokenType.ON):
3433                action = None
3434                on = self._advance_any() and self._prev.text
3435
3436                if self._match_text_seq("NO", "ACTION"):
3437                    action = "NO ACTION"
3438                elif self._match_text_seq("CASCADE"):
3439                    action = "CASCADE"
3440                elif self._match_pair(TokenType.SET, TokenType.NULL):
3441                    action = "SET NULL"
3442                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3443                    action = "SET DEFAULT"
3444                else:
3445                    self.raise_error("Invalid key constraint")
3446
3447                options.append(f"ON {on} {action}")
3448            elif self._match_text_seq("NOT", "ENFORCED"):
3449                options.append("NOT ENFORCED")
3450            elif self._match_text_seq("DEFERRABLE"):
3451                options.append("DEFERRABLE")
3452            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3453                options.append("INITIALLY DEFERRED")
3454            elif self._match_text_seq("NORELY"):
3455                options.append("NORELY")
3456            elif self._match_text_seq("MATCH", "FULL"):
3457                options.append("MATCH FULL")
3458            else:
3459                break
3460
3461        return options
3462
3463    def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]:
3464        if match and not self._match(TokenType.REFERENCES):
3465            return None
3466
3467        expressions = None
3468        this = self._parse_id_var()
3469
3470        if self._match(TokenType.L_PAREN, advance=False):
3471            expressions = self._parse_wrapped_id_vars()
3472
3473        options = self._parse_key_constraint_options()
3474        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3475
3476    def _parse_foreign_key(self) -> exp.Expression:
3477        expressions = self._parse_wrapped_id_vars()
3478        reference = self._parse_references()
3479        options = {}
3480
3481        while self._match(TokenType.ON):
3482            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3483                self.raise_error("Expected DELETE or UPDATE")
3484
3485            kind = self._prev.text.lower()
3486
3487            if self._match_text_seq("NO", "ACTION"):
3488                action = "NO ACTION"
3489            elif self._match(TokenType.SET):
3490                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3491                action = "SET " + self._prev.text.upper()
3492            else:
3493                self._advance()
3494                action = self._prev.text.upper()
3495
3496            options[kind] = action
3497
3498        return self.expression(
3499            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3500        )
3501
3502    def _parse_primary_key(
3503        self, wrapped_optional: bool = False, in_props: bool = False
3504    ) -> exp.Expression:
3505        desc = (
3506            self._match_set((TokenType.ASC, TokenType.DESC))
3507            and self._prev.token_type == TokenType.DESC
3508        )
3509
3510        if not in_props and not self._match(TokenType.L_PAREN, advance=False):
3511            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3512
3513        expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional)
3514        options = self._parse_key_constraint_options()
3515        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3516
3517    @t.overload
3518    def _parse_bracket(self, this: exp.Expression) -> exp.Expression:
3519        ...
3520
3521    @t.overload
3522    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3523        ...
3524
3525    def _parse_bracket(self, this):
3526        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3527            return this
3528
3529        bracket_kind = self._prev.token_type
3530
3531        if self._match(TokenType.COLON):
3532            expressions: t.List[t.Optional[exp.Expression]] = [
3533                self.expression(exp.Slice, expression=self._parse_conjunction())
3534            ]
3535        else:
3536            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3537
3538        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3539        if bracket_kind == TokenType.L_BRACE:
3540            this = self.expression(exp.Struct, expressions=expressions)
3541        elif not this or this.name.upper() == "ARRAY":
3542            this = self.expression(exp.Array, expressions=expressions)
3543        else:
3544            expressions = apply_index_offset(this, expressions, -self.index_offset)
3545            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3546
3547        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3548            self.raise_error("Expected ]")
3549        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3550            self.raise_error("Expected }")
3551
3552        self._add_comments(this)
3553        return self._parse_bracket(this)
3554
3555    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3556        if self._match(TokenType.COLON):
3557            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3558        return this
3559
3560    def _parse_case(self) -> t.Optional[exp.Expression]:
3561        ifs = []
3562        default = None
3563
3564        expression = self._parse_conjunction()
3565
3566        while self._match(TokenType.WHEN):
3567            this = self._parse_conjunction()
3568            self._match(TokenType.THEN)
3569            then = self._parse_conjunction()
3570            ifs.append(self.expression(exp.If, this=this, true=then))
3571
3572        if self._match(TokenType.ELSE):
3573            default = self._parse_conjunction()
3574
3575        if not self._match(TokenType.END):
3576            self.raise_error("Expected END after CASE", self._prev)
3577
3578        return self._parse_window(
3579            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3580        )
3581
3582    def _parse_if(self) -> t.Optional[exp.Expression]:
3583        if self._match(TokenType.L_PAREN):
3584            args = self._parse_csv(self._parse_conjunction)
3585            this = exp.If.from_arg_list(args)
3586            self.validate_expression(this, args)
3587            self._match_r_paren()
3588        else:
3589            index = self._index - 1
3590            condition = self._parse_conjunction()
3591
3592            if not condition:
3593                self._retreat(index)
3594                return None
3595
3596            self._match(TokenType.THEN)
3597            true = self._parse_conjunction()
3598            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3599            self._match(TokenType.END)
3600            this = self.expression(exp.If, this=condition, true=true, false=false)
3601
3602        return self._parse_window(this)
3603
3604    def _parse_extract(self) -> exp.Expression:
3605        this = self._parse_function() or self._parse_var() or self._parse_type()
3606
3607        if self._match(TokenType.FROM):
3608            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3609
3610        if not self._match(TokenType.COMMA):
3611            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3612
3613        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3614
3615    def _parse_cast(self, strict: bool) -> exp.Expression:
3616        this = self._parse_conjunction()
3617
3618        if not self._match(TokenType.ALIAS):
3619            if self._match(TokenType.COMMA):
3620                return self.expression(
3621                    exp.CastToStrType, this=this, expression=self._parse_string()
3622                )
3623            else:
3624                self.raise_error("Expected AS after CAST")
3625
3626        to = self._parse_types()
3627
3628        if not to:
3629            self.raise_error("Expected TYPE after CAST")
3630        elif to.this == exp.DataType.Type.CHAR:
3631            if self._match(TokenType.CHARACTER_SET):
3632                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3633
3634        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3635
3636    def _parse_string_agg(self) -> exp.Expression:
3637        expression: t.Optional[exp.Expression]
3638
3639        if self._match(TokenType.DISTINCT):
3640            args = self._parse_csv(self._parse_conjunction)
3641            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3642        else:
3643            args = self._parse_csv(self._parse_conjunction)
3644            expression = seq_get(args, 0)
3645
3646        index = self._index
3647        if not self._match(TokenType.R_PAREN):
3648            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3649            order = self._parse_order(this=expression)
3650            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3651
3652        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3653        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3654        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3655        if not self._match_text_seq("WITHIN", "GROUP"):
3656            self._retreat(index)
3657            this = exp.GroupConcat.from_arg_list(args)
3658            self.validate_expression(this, args)
3659            return this
3660
3661        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3662        order = self._parse_order(this=expression)
3663        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3664
3665    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3666        to: t.Optional[exp.Expression]
3667        this = self._parse_bitwise()
3668
3669        if self._match(TokenType.USING):
3670            to = self.expression(exp.CharacterSet, this=self._parse_var())
3671        elif self._match(TokenType.COMMA):
3672            to = self._parse_bitwise()
3673        else:
3674            to = None
3675
3676        # Swap the argument order if needed to produce the correct AST
3677        if self.CONVERT_TYPE_FIRST:
3678            this, to = to, this
3679
3680        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3681
3682    def _parse_decode(self) -> t.Optional[exp.Expression]:
3683        """
3684        There are generally two variants of the DECODE function:
3685
3686        - DECODE(bin, charset)
3687        - DECODE(expression, search, result [, search, result] ... [, default])
3688
3689        The second variant will always be parsed into a CASE expression. Note that NULL
3690        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3691        instead of relying on pattern matching.
3692        """
3693        args = self._parse_csv(self._parse_conjunction)
3694
3695        if len(args) < 3:
3696            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3697
3698        expression, *expressions = args
3699        if not expression:
3700            return None
3701
3702        ifs = []
3703        for search, result in zip(expressions[::2], expressions[1::2]):
3704            if not search or not result:
3705                return None
3706
3707            if isinstance(search, exp.Literal):
3708                ifs.append(
3709                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3710                )
3711            elif isinstance(search, exp.Null):
3712                ifs.append(
3713                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3714                )
3715            else:
3716                cond = exp.or_(
3717                    exp.EQ(this=expression.copy(), expression=search),
3718                    exp.and_(
3719                        exp.Is(this=expression.copy(), expression=exp.Null()),
3720                        exp.Is(this=search.copy(), expression=exp.Null()),
3721                        copy=False,
3722                    ),
3723                    copy=False,
3724                )
3725                ifs.append(exp.If(this=cond, true=result))
3726
3727        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3728
3729    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3730        self._match_text_seq("KEY")
3731        key = self._parse_field()
3732        self._match(TokenType.COLON)
3733        self._match_text_seq("VALUE")
3734        value = self._parse_field()
3735        if not key and not value:
3736            return None
3737        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3738
3739    def _parse_json_object(self) -> exp.Expression:
3740        expressions = self._parse_csv(self._parse_json_key_value)
3741
3742        null_handling = None
3743        if self._match_text_seq("NULL", "ON", "NULL"):
3744            null_handling = "NULL ON NULL"
3745        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3746            null_handling = "ABSENT ON NULL"
3747
3748        unique_keys = None
3749        if self._match_text_seq("WITH", "UNIQUE"):
3750            unique_keys = True
3751        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3752            unique_keys = False
3753
3754        self._match_text_seq("KEYS")
3755
3756        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3757        format_json = self._match_text_seq("FORMAT", "JSON")
3758        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3759
3760        return self.expression(
3761            exp.JSONObject,
3762            expressions=expressions,
3763            null_handling=null_handling,
3764            unique_keys=unique_keys,
3765            return_type=return_type,
3766            format_json=format_json,
3767            encoding=encoding,
3768        )
3769
3770    def _parse_logarithm(self) -> exp.Expression:
3771        # Default argument order is base, expression
3772        args = self._parse_csv(self._parse_range)
3773
3774        if len(args) > 1:
3775            if not self.LOG_BASE_FIRST:
3776                args.reverse()
3777            return exp.Log.from_arg_list(args)
3778
3779        return self.expression(
3780            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3781        )
3782
3783    def _parse_match_against(self) -> exp.Expression:
3784        expressions = self._parse_csv(self._parse_column)
3785
3786        self._match_text_seq(")", "AGAINST", "(")
3787
3788        this = self._parse_string()
3789
3790        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3791            modifier = "IN NATURAL LANGUAGE MODE"
3792            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3793                modifier = f"{modifier} WITH QUERY EXPANSION"
3794        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3795            modifier = "IN BOOLEAN MODE"
3796        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3797            modifier = "WITH QUERY EXPANSION"
3798        else:
3799            modifier = None
3800
3801        return self.expression(
3802            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3803        )
3804
3805    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3806    def _parse_open_json(self) -> exp.Expression:
3807        this = self._parse_bitwise()
3808        path = self._match(TokenType.COMMA) and self._parse_string()
3809
3810        def _parse_open_json_column_def() -> exp.Expression:
3811            this = self._parse_field(any_token=True)
3812            kind = self._parse_types()
3813            path = self._parse_string()
3814            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3815            return self.expression(
3816                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3817            )
3818
3819        expressions = None
3820        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3821            self._match_l_paren()
3822            expressions = self._parse_csv(_parse_open_json_column_def)
3823
3824        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3825
3826    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3827        args = self._parse_csv(self._parse_bitwise)
3828
3829        if self._match(TokenType.IN):
3830            return self.expression(
3831                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3832            )
3833
3834        if haystack_first:
3835            haystack = seq_get(args, 0)
3836            needle = seq_get(args, 1)
3837        else:
3838            needle = seq_get(args, 0)
3839            haystack = seq_get(args, 1)
3840
3841        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3842
3843        self.validate_expression(this, args)
3844
3845        return this
3846
3847    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3848        args = self._parse_csv(self._parse_table)
3849        return exp.JoinHint(this=func_name.upper(), expressions=args)
3850
3851    def _parse_substring(self) -> exp.Expression:
3852        # Postgres supports the form: substring(string [from int] [for int])
3853        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3854
3855        args = self._parse_csv(self._parse_bitwise)
3856
3857        if self._match(TokenType.FROM):
3858            args.append(self._parse_bitwise())
3859            if self._match(TokenType.FOR):
3860                args.append(self._parse_bitwise())
3861
3862        this = exp.Substring.from_arg_list(args)
3863        self.validate_expression(this, args)
3864
3865        return this
3866
3867    def _parse_trim(self) -> exp.Expression:
3868        # https://www.w3resource.com/sql/character-functions/trim.php
3869        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3870
3871        position = None
3872        collation = None
3873
3874        if self._match_texts(self.TRIM_TYPES):
3875            position = self._prev.text.upper()
3876
3877        expression = self._parse_bitwise()
3878        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3879            this = self._parse_bitwise()
3880        else:
3881            this = expression
3882            expression = None
3883
3884        if self._match(TokenType.COLLATE):
3885            collation = self._parse_bitwise()
3886
3887        return self.expression(
3888            exp.Trim,
3889            this=this,
3890            position=position,
3891            expression=expression,
3892            collation=collation,
3893        )
3894
3895    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3896        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3897
3898    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3899        return self._parse_window(self._parse_id_var(), alias=True)
3900
3901    def _parse_respect_or_ignore_nulls(
3902        self, this: t.Optional[exp.Expression]
3903    ) -> t.Optional[exp.Expression]:
3904        if self._match_text_seq("IGNORE", "NULLS"):
3905            return self.expression(exp.IgnoreNulls, this=this)
3906        if self._match_text_seq("RESPECT", "NULLS"):
3907            return self.expression(exp.RespectNulls, this=this)
3908        return this
3909
3910    def _parse_window(
3911        self, this: t.Optional[exp.Expression], alias: bool = False
3912    ) -> t.Optional[exp.Expression]:
3913        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3914            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3915            self._match_r_paren()
3916
3917        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3918        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3919        if self._match_text_seq("WITHIN", "GROUP"):
3920            order = self._parse_wrapped(self._parse_order)
3921            this = self.expression(exp.WithinGroup, this=this, expression=order)
3922
3923        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3924        # Some dialects choose to implement and some do not.
3925        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3926
3927        # There is some code above in _parse_lambda that handles
3928        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3929
3930        # The below changes handle
3931        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3932
3933        # Oracle allows both formats
3934        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3935        #   and Snowflake chose to do the same for familiarity
3936        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3937        this = self._parse_respect_or_ignore_nulls(this)
3938
3939        # bigquery select from window x AS (partition by ...)
3940        if alias:
3941            over = None
3942            self._match(TokenType.ALIAS)
3943        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3944            return this
3945        else:
3946            over = self._prev.text.upper()
3947
3948        if not self._match(TokenType.L_PAREN):
3949            return self.expression(
3950                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3951            )
3952
3953        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3954
3955        first = self._match(TokenType.FIRST)
3956        if self._match_text_seq("LAST"):
3957            first = False
3958
3959        partition = self._parse_partition_by()
3960        order = self._parse_order()
3961        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3962
3963        if kind:
3964            self._match(TokenType.BETWEEN)
3965            start = self._parse_window_spec()
3966            self._match(TokenType.AND)
3967            end = self._parse_window_spec()
3968
3969            spec = self.expression(
3970                exp.WindowSpec,
3971                kind=kind,
3972                start=start["value"],
3973                start_side=start["side"],
3974                end=end["value"],
3975                end_side=end["side"],
3976            )
3977        else:
3978            spec = None
3979
3980        self._match_r_paren()
3981
3982        return self.expression(
3983            exp.Window,
3984            this=this,
3985            partition_by=partition,
3986            order=order,
3987            spec=spec,
3988            alias=window_alias,
3989            over=over,
3990            first=first,
3991        )
3992
3993    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3994        self._match(TokenType.BETWEEN)
3995
3996        return {
3997            "value": (
3998                (self._match_text_seq("UNBOUNDED") and "UNBOUNDED")
3999                or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW")
4000                or self._parse_bitwise()
4001            ),
4002            "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text,
4003        }
4004
4005    def _parse_alias(
4006        self, this: t.Optional[exp.Expression], explicit: bool = False
4007    ) -> t.Optional[exp.Expression]:
4008        any_token = self._match(TokenType.ALIAS)
4009
4010        if explicit and not any_token:
4011            return this
4012
4013        if self._match(TokenType.L_PAREN):
4014            aliases = self.expression(
4015                exp.Aliases,
4016                this=this,
4017                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
4018            )
4019            self._match_r_paren(aliases)
4020            return aliases
4021
4022        alias = self._parse_id_var(any_token)
4023
4024        if alias:
4025            return self.expression(exp.Alias, this=this, alias=alias)
4026
4027        return this
4028
4029    def _parse_id_var(
4030        self,
4031        any_token: bool = True,
4032        tokens: t.Optional[t.Collection[TokenType]] = None,
4033    ) -> t.Optional[exp.Expression]:
4034        identifier = self._parse_identifier()
4035
4036        if identifier:
4037            return identifier
4038
4039        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
4040            quoted = self._prev.token_type == TokenType.STRING
4041            return exp.Identifier(this=self._prev.text, quoted=quoted)
4042
4043        return None
4044
4045    def _parse_string(self) -> t.Optional[exp.Expression]:
4046        if self._match(TokenType.STRING):
4047            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
4048        return self._parse_placeholder()
4049
4050    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
4051        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
4052
4053    def _parse_number(self) -> t.Optional[exp.Expression]:
4054        if self._match(TokenType.NUMBER):
4055            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
4056        return self._parse_placeholder()
4057
4058    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4059        if self._match(TokenType.IDENTIFIER):
4060            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4061        return self._parse_placeholder()
4062
4063    def _parse_var(
4064        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4065    ) -> t.Optional[exp.Expression]:
4066        if (
4067            (any_token and self._advance_any())
4068            or self._match(TokenType.VAR)
4069            or (self._match_set(tokens) if tokens else False)
4070        ):
4071            return self.expression(exp.Var, this=self._prev.text)
4072        return self._parse_placeholder()
4073
4074    def _advance_any(self) -> t.Optional[Token]:
4075        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4076            self._advance()
4077            return self._prev
4078        return None
4079
4080    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4081        return self._parse_var() or self._parse_string()
4082
4083    def _parse_null(self) -> t.Optional[exp.Expression]:
4084        if self._match(TokenType.NULL):
4085            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4086        return None
4087
4088    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4089        if self._match(TokenType.TRUE):
4090            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4091        if self._match(TokenType.FALSE):
4092            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4093        return None
4094
4095    def _parse_star(self) -> t.Optional[exp.Expression]:
4096        if self._match(TokenType.STAR):
4097            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4098        return None
4099
4100    def _parse_parameter(self) -> exp.Expression:
4101        wrapped = self._match(TokenType.L_BRACE)
4102        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4103        self._match(TokenType.R_BRACE)
4104        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4105
4106    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4107        if self._match_set(self.PLACEHOLDER_PARSERS):
4108            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4109            if placeholder:
4110                return placeholder
4111            self._advance(-1)
4112        return None
4113
4114    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4115        if not self._match(TokenType.EXCEPT):
4116            return None
4117        if self._match(TokenType.L_PAREN, advance=False):
4118            return self._parse_wrapped_csv(self._parse_column)
4119        return self._parse_csv(self._parse_column)
4120
4121    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4122        if not self._match(TokenType.REPLACE):
4123            return None
4124        if self._match(TokenType.L_PAREN, advance=False):
4125            return self._parse_wrapped_csv(self._parse_expression)
4126        return self._parse_csv(self._parse_expression)
4127
4128    def _parse_csv(
4129        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4130    ) -> t.List[t.Optional[exp.Expression]]:
4131        parse_result = parse_method()
4132        items = [parse_result] if parse_result is not None else []
4133
4134        while self._match(sep):
4135            self._add_comments(parse_result)
4136            parse_result = parse_method()
4137            if parse_result is not None:
4138                items.append(parse_result)
4139
4140        return items
4141
4142    def _parse_tokens(
4143        self, parse_method: t.Callable, expressions: t.Dict
4144    ) -> t.Optional[exp.Expression]:
4145        this = parse_method()
4146
4147        while self._match_set(expressions):
4148            this = self.expression(
4149                expressions[self._prev.token_type],
4150                this=this,
4151                comments=self._prev_comments,
4152                expression=parse_method(),
4153            )
4154
4155        return this
4156
4157    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4158        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4159
4160    def _parse_wrapped_csv(
4161        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4162    ) -> t.List[t.Optional[exp.Expression]]:
4163        return self._parse_wrapped(
4164            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4165        )
4166
4167    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4168        wrapped = self._match(TokenType.L_PAREN)
4169        if not wrapped and not optional:
4170            self.raise_error("Expecting (")
4171        parse_result = parse_method()
4172        if wrapped:
4173            self._match_r_paren()
4174        return parse_result
4175
4176    def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]:
4177        return self._parse_select() or self._parse_set_operations(
4178            self._parse_expression() if alias else self._parse_conjunction()
4179        )
4180
4181    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4182        return self._parse_query_modifiers(
4183            self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False))
4184        )
4185
4186    def _parse_transaction(self) -> exp.Expression:
4187        this = None
4188        if self._match_texts(self.TRANSACTION_KIND):
4189            this = self._prev.text
4190
4191        self._match_texts({"TRANSACTION", "WORK"})
4192
4193        modes = []
4194        while True:
4195            mode = []
4196            while self._match(TokenType.VAR):
4197                mode.append(self._prev.text)
4198
4199            if mode:
4200                modes.append(" ".join(mode))
4201            if not self._match(TokenType.COMMA):
4202                break
4203
4204        return self.expression(exp.Transaction, this=this, modes=modes)
4205
4206    def _parse_commit_or_rollback(self) -> exp.Expression:
4207        chain = None
4208        savepoint = None
4209        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4210
4211        self._match_texts({"TRANSACTION", "WORK"})
4212
4213        if self._match_text_seq("TO"):
4214            self._match_text_seq("SAVEPOINT")
4215            savepoint = self._parse_id_var()
4216
4217        if self._match(TokenType.AND):
4218            chain = not self._match_text_seq("NO")
4219            self._match_text_seq("CHAIN")
4220
4221        if is_rollback:
4222            return self.expression(exp.Rollback, savepoint=savepoint)
4223        return self.expression(exp.Commit, chain=chain)
4224
4225    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4226        if not self._match_text_seq("ADD"):
4227            return None
4228
4229        self._match(TokenType.COLUMN)
4230        exists_column = self._parse_exists(not_=True)
4231        expression = self._parse_column_def(self._parse_field(any_token=True))
4232
4233        if expression:
4234            expression.set("exists", exists_column)
4235
4236            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4237            if self._match_texts(("FIRST", "AFTER")):
4238                position = self._prev.text
4239                column_position = self.expression(
4240                    exp.ColumnPosition, this=self._parse_column(), position=position
4241                )
4242                expression.set("position", column_position)
4243
4244        return expression
4245
4246    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4247        drop = self._match(TokenType.DROP) and self._parse_drop()
4248        if drop and not isinstance(drop, exp.Command):
4249            drop.set("kind", drop.args.get("kind", "COLUMN"))
4250        return drop
4251
4252    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4253    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4254        return self.expression(
4255            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4256        )
4257
4258    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4259        this = None
4260        kind = self._prev.token_type
4261
4262        if kind == TokenType.CONSTRAINT:
4263            this = self._parse_id_var()
4264
4265            if self._match_text_seq("CHECK"):
4266                expression = self._parse_wrapped(self._parse_conjunction)
4267                enforced = self._match_text_seq("ENFORCED")
4268
4269                return self.expression(
4270                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4271                )
4272
4273        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4274            expression = self._parse_foreign_key()
4275        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4276            expression = self._parse_primary_key()
4277        else:
4278            expression = None
4279
4280        return self.expression(exp.AddConstraint, this=this, expression=expression)
4281
4282    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4283        index = self._index - 1
4284
4285        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4286            return self._parse_csv(self._parse_add_constraint)
4287
4288        self._retreat(index)
4289        return self._parse_csv(self._parse_add_column)
4290
4291    def _parse_alter_table_alter(self) -> exp.Expression:
4292        self._match(TokenType.COLUMN)
4293        column = self._parse_field(any_token=True)
4294
4295        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4296            return self.expression(exp.AlterColumn, this=column, drop=True)
4297        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4298            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4299
4300        self._match_text_seq("SET", "DATA")
4301        return self.expression(
4302            exp.AlterColumn,
4303            this=column,
4304            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4305            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4306            using=self._match(TokenType.USING) and self._parse_conjunction(),
4307        )
4308
4309    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4310        index = self._index - 1
4311
4312        partition_exists = self._parse_exists()
4313        if self._match(TokenType.PARTITION, advance=False):
4314            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4315
4316        self._retreat(index)
4317        return self._parse_csv(self._parse_drop_column)
4318
4319    def _parse_alter_table_rename(self) -> exp.Expression:
4320        self._match_text_seq("TO")
4321        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4322
4323    def _parse_alter(self) -> t.Optional[exp.Expression]:
4324        start = self._prev
4325
4326        if not self._match(TokenType.TABLE):
4327            return self._parse_as_command(start)
4328
4329        exists = self._parse_exists()
4330        this = self._parse_table(schema=True)
4331
4332        if self._next:
4333            self._advance()
4334        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4335
4336        if parser:
4337            actions = ensure_list(parser(self))
4338
4339            if not self._curr:
4340                return self.expression(
4341                    exp.AlterTable,
4342                    this=this,
4343                    exists=exists,
4344                    actions=actions,
4345                )
4346        return self._parse_as_command(start)
4347
4348    def _parse_merge(self) -> exp.Expression:
4349        self._match(TokenType.INTO)
4350        target = self._parse_table()
4351
4352        self._match(TokenType.USING)
4353        using = self._parse_table()
4354
4355        self._match(TokenType.ON)
4356        on = self._parse_conjunction()
4357
4358        whens = []
4359        while self._match(TokenType.WHEN):
4360            matched = not self._match(TokenType.NOT)
4361            self._match_text_seq("MATCHED")
4362            source = (
4363                False
4364                if self._match_text_seq("BY", "TARGET")
4365                else self._match_text_seq("BY", "SOURCE")
4366            )
4367            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4368
4369            self._match(TokenType.THEN)
4370
4371            if self._match(TokenType.INSERT):
4372                _this = self._parse_star()
4373                if _this:
4374                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4375                else:
4376                    then = self.expression(
4377                        exp.Insert,
4378                        this=self._parse_value(),
4379                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4380                    )
4381            elif self._match(TokenType.UPDATE):
4382                expressions = self._parse_star()
4383                if expressions:
4384                    then = self.expression(exp.Update, expressions=expressions)
4385                else:
4386                    then = self.expression(
4387                        exp.Update,
4388                        expressions=self._match(TokenType.SET)
4389                        and self._parse_csv(self._parse_equality),
4390                    )
4391            elif self._match(TokenType.DELETE):
4392                then = self.expression(exp.Var, this=self._prev.text)
4393            else:
4394                then = None
4395
4396            whens.append(
4397                self.expression(
4398                    exp.When,
4399                    matched=matched,
4400                    source=source,
4401                    condition=condition,
4402                    then=then,
4403                )
4404            )
4405
4406        return self.expression(
4407            exp.Merge,
4408            this=target,
4409            using=using,
4410            on=on,
4411            expressions=whens,
4412        )
4413
4414    def _parse_show(self) -> t.Optional[exp.Expression]:
4415        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4416        if parser:
4417            return parser(self)
4418        self._advance()
4419        return self.expression(exp.Show, this=self._prev.text.upper())
4420
4421    def _parse_set_item_assignment(
4422        self, kind: t.Optional[str] = None
4423    ) -> t.Optional[exp.Expression]:
4424        index = self._index
4425
4426        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4427            return self._parse_set_transaction(global_=kind == "GLOBAL")
4428
4429        left = self._parse_primary() or self._parse_id_var()
4430
4431        if not self._match_texts(("=", "TO")):
4432            self._retreat(index)
4433            return None
4434
4435        right = self._parse_statement() or self._parse_id_var()
4436        this = self.expression(
4437            exp.EQ,
4438            this=left,
4439            expression=right,
4440        )
4441
4442        return self.expression(
4443            exp.SetItem,
4444            this=this,
4445            kind=kind,
4446        )
4447
4448    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4449        self._match_text_seq("TRANSACTION")
4450        characteristics = self._parse_csv(
4451            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4452        )
4453        return self.expression(
4454            exp.SetItem,
4455            expressions=characteristics,
4456            kind="TRANSACTION",
4457            **{"global": global_},  # type: ignore
4458        )
4459
4460    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4461        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4462        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4463
4464    def _parse_set(self) -> exp.Expression:
4465        index = self._index
4466        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4467
4468        if self._curr:
4469            self._retreat(index)
4470            return self._parse_as_command(self._prev)
4471
4472        return set_
4473
4474    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4475        for option in options:
4476            if self._match_text_seq(*option.split(" ")):
4477                return exp.Var(this=option)
4478        return None
4479
4480    def _parse_as_command(self, start: Token) -> exp.Command:
4481        while self._curr:
4482            self._advance()
4483        text = self._find_sql(start, self._prev)
4484        size = len(start.text)
4485        return exp.Command(this=text[:size], expression=text[size:])
4486
4487    def _parse_dict_property(self, this: str) -> exp.DictProperty:
4488        settings = []
4489
4490        self._match_l_paren()
4491        kind = self._parse_id_var()
4492
4493        if self._match(TokenType.L_PAREN):
4494            while True:
4495                key = self._parse_id_var()
4496                value = self._parse_primary()
4497
4498                if not key and value is None:
4499                    break
4500                settings.append(self.expression(exp.DictSubProperty, this=key, value=value))
4501            self._match(TokenType.R_PAREN)
4502
4503        self._match_r_paren()
4504
4505        return self.expression(
4506            exp.DictProperty,
4507            this=this,
4508            kind=kind.this if kind else None,
4509            settings=settings,
4510        )
4511
4512    def _parse_dict_range(self, this: str) -> exp.DictRange:
4513        self._match_l_paren()
4514        has_min = self._match_text_seq("MIN")
4515        if has_min:
4516            min = self._parse_var() or self._parse_primary()
4517            self._match_text_seq("MAX")
4518            max = self._parse_var() or self._parse_primary()
4519        else:
4520            max = self._parse_var() or self._parse_primary()
4521            min = exp.Literal.number(0)
4522        self._match_r_paren()
4523        return self.expression(exp.DictRange, this=this, min=min, max=max)
4524
4525    def _find_parser(
4526        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4527    ) -> t.Optional[t.Callable]:
4528        if not self._curr:
4529            return None
4530
4531        index = self._index
4532        this = []
4533        while True:
4534            # The current token might be multiple words
4535            curr = self._curr.text.upper()
4536            key = curr.split(" ")
4537            this.append(curr)
4538            self._advance()
4539            result, trie = in_trie(trie, key)
4540            if result == 0:
4541                break
4542            if result == 2:
4543                subparser = parsers[" ".join(this)]
4544                return subparser
4545        self._retreat(index)
4546        return None
4547
4548    def _match(self, token_type, advance=True, expression=None):
4549        if not self._curr:
4550            return None
4551
4552        if self._curr.token_type == token_type:
4553            if advance:
4554                self._advance()
4555            self._add_comments(expression)
4556            return True
4557
4558        return None
4559
4560    def _match_set(self, types, advance=True):
4561        if not self._curr:
4562            return None
4563
4564        if self._curr.token_type in types:
4565            if advance:
4566                self._advance()
4567            return True
4568
4569        return None
4570
4571    def _match_pair(self, token_type_a, token_type_b, advance=True):
4572        if not self._curr or not self._next:
4573            return None
4574
4575        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4576            if advance:
4577                self._advance(2)
4578            return True
4579
4580        return None
4581
4582    def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4583        if not self._match(TokenType.L_PAREN, expression=expression):
4584            self.raise_error("Expecting (")
4585
4586    def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4587        if not self._match(TokenType.R_PAREN, expression=expression):
4588            self.raise_error("Expecting )")
4589
4590    def _match_texts(self, texts, advance=True):
4591        if self._curr and self._curr.text.upper() in texts:
4592            if advance:
4593                self._advance()
4594            return True
4595        return False
4596
4597    def _match_text_seq(self, *texts, advance=True):
4598        index = self._index
4599        for text in texts:
4600            if self._curr and self._curr.text.upper() == text:
4601                self._advance()
4602            else:
4603                self._retreat(index)
4604                return False
4605
4606        if not advance:
4607            self._retreat(index)
4608
4609        return True
4610
4611    @t.overload
4612    def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression:
4613        ...
4614
4615    @t.overload
4616    def _replace_columns_with_dots(
4617        self, this: t.Optional[exp.Expression]
4618    ) -> t.Optional[exp.Expression]:
4619        ...
4620
4621    def _replace_columns_with_dots(self, this):
4622        if isinstance(this, exp.Dot):
4623            exp.replace_children(this, self._replace_columns_with_dots)
4624        elif isinstance(this, exp.Column):
4625            exp.replace_children(this, self._replace_columns_with_dots)
4626            table = this.args.get("table")
4627            this = (
4628                self.expression(exp.Dot, this=table, expression=this.this)
4629                if table
4630                else self.expression(exp.Var, this=this.name)
4631            )
4632        elif isinstance(this, exp.Identifier):
4633            this = self.expression(exp.Var, this=this.name)
4634
4635        return this
4636
4637    def _replace_lambda(
4638        self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str]
4639    ) -> t.Optional[exp.Expression]:
4640        if not node:
4641            return node
4642
4643        for column in node.find_all(exp.Column):
4644            if column.parts[0].name in lambda_variables:
4645                dot_or_id = column.to_dot() if column.table else column.this
4646                parent = column.parent
4647
4648                while isinstance(parent, exp.Dot):
4649                    if not isinstance(parent.parent, exp.Dot):
4650                        parent.replace(dot_or_id)
4651                        break
4652                    parent = parent.parent
4653                else:
4654                    if column is node:
4655                        node = dot_or_id
4656                    else:
4657                        column.replace(dot_or_id)
4658        return node
def parse_var_map(args: List) -> sqlglot.expressions.StarMap | sqlglot.expressions.VarMap:
20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap:
21    if len(args) == 1 and args[0].is_star:
22        return exp.StarMap(this=args[0])
23
24    keys = []
25    values = []
26    for i in range(0, len(args), 2):
27        keys.append(args[i])
28        values.append(args[i + 1])
29    return exp.VarMap(
30        keys=exp.Array(expressions=keys),
31        values=exp.Array(expressions=values),
32    )
def parse_like(args: List) -> sqlglot.expressions.Expression:
35def parse_like(args: t.List) -> exp.Expression:
36    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
37    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
def binary_range_parser( expr_type: Type[sqlglot.expressions.Expression]) -> Callable[[sqlglot.parser.Parser, Optional[sqlglot.expressions.Expression]], Optional[sqlglot.expressions.Expression]]:
40def binary_range_parser(
41    expr_type: t.Type[exp.Expression],
42) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
43    return lambda self, this: self._parse_escape(
44        self.expression(expr_type, this=this, expression=self._parse_bitwise())
45    )
class Parser:
  57class Parser(metaclass=_Parser):
  58    """
  59    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  60    a parsed syntax tree.
  61
  62    Args:
  63        error_level: the desired error level.
  64            Default: ErrorLevel.IMMEDIATE
  65        error_message_context: determines the amount of context to capture from a
  66            query string when displaying the error message (in number of characters).
  67            Default: 50.
  68        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  69            Default: 0
  70        alias_post_tablesample: If the table alias comes after tablesample.
  71            Default: False
  72        max_errors: Maximum number of error messages to include in a raised ParseError.
  73            This is only relevant if error_level is ErrorLevel.RAISE.
  74            Default: 3
  75        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  76            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  77            Default: "nulls_are_small"
  78    """
  79
  80    FUNCTIONS: t.Dict[str, t.Callable] = {
  81        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  82        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  83            this=seq_get(args, 0),
  84            to=exp.DataType(this=exp.DataType.Type.TEXT),
  85        ),
  86        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  87        "IFNULL": exp.Coalesce.from_arg_list,
  88        "LIKE": parse_like,
  89        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  90            this=seq_get(args, 0),
  91            to=exp.DataType(this=exp.DataType.Type.TEXT),
  92        ),
  93        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  94            this=exp.Cast(
  95                this=seq_get(args, 0),
  96                to=exp.DataType(this=exp.DataType.Type.TEXT),
  97            ),
  98            start=exp.Literal.number(1),
  99            length=exp.Literal.number(10),
 100        ),
 101        "VAR_MAP": parse_var_map,
 102    }
 103
 104    NO_PAREN_FUNCTIONS = {
 105        TokenType.CURRENT_DATE: exp.CurrentDate,
 106        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 107        TokenType.CURRENT_TIME: exp.CurrentTime,
 108        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 109        TokenType.CURRENT_USER: exp.CurrentUser,
 110    }
 111
 112    JOIN_HINTS: t.Set[str] = set()
 113
 114    NESTED_TYPE_TOKENS = {
 115        TokenType.ARRAY,
 116        TokenType.MAP,
 117        TokenType.NULLABLE,
 118        TokenType.STRUCT,
 119    }
 120
 121    TYPE_TOKENS = {
 122        TokenType.BIT,
 123        TokenType.BOOLEAN,
 124        TokenType.TINYINT,
 125        TokenType.UTINYINT,
 126        TokenType.SMALLINT,
 127        TokenType.USMALLINT,
 128        TokenType.INT,
 129        TokenType.UINT,
 130        TokenType.BIGINT,
 131        TokenType.UBIGINT,
 132        TokenType.INT128,
 133        TokenType.UINT128,
 134        TokenType.INT256,
 135        TokenType.UINT256,
 136        TokenType.FLOAT,
 137        TokenType.DOUBLE,
 138        TokenType.CHAR,
 139        TokenType.NCHAR,
 140        TokenType.VARCHAR,
 141        TokenType.NVARCHAR,
 142        TokenType.TEXT,
 143        TokenType.MEDIUMTEXT,
 144        TokenType.LONGTEXT,
 145        TokenType.MEDIUMBLOB,
 146        TokenType.LONGBLOB,
 147        TokenType.BINARY,
 148        TokenType.VARBINARY,
 149        TokenType.JSON,
 150        TokenType.JSONB,
 151        TokenType.INTERVAL,
 152        TokenType.TIME,
 153        TokenType.TIMESTAMP,
 154        TokenType.TIMESTAMPTZ,
 155        TokenType.TIMESTAMPLTZ,
 156        TokenType.DATETIME,
 157        TokenType.DATETIME64,
 158        TokenType.DATE,
 159        TokenType.INT4RANGE,
 160        TokenType.INT4MULTIRANGE,
 161        TokenType.INT8RANGE,
 162        TokenType.INT8MULTIRANGE,
 163        TokenType.NUMRANGE,
 164        TokenType.NUMMULTIRANGE,
 165        TokenType.TSRANGE,
 166        TokenType.TSMULTIRANGE,
 167        TokenType.TSTZRANGE,
 168        TokenType.TSTZMULTIRANGE,
 169        TokenType.DATERANGE,
 170        TokenType.DATEMULTIRANGE,
 171        TokenType.DECIMAL,
 172        TokenType.BIGDECIMAL,
 173        TokenType.UUID,
 174        TokenType.GEOGRAPHY,
 175        TokenType.GEOMETRY,
 176        TokenType.HLLSKETCH,
 177        TokenType.HSTORE,
 178        TokenType.PSEUDO_TYPE,
 179        TokenType.SUPER,
 180        TokenType.SERIAL,
 181        TokenType.SMALLSERIAL,
 182        TokenType.BIGSERIAL,
 183        TokenType.XML,
 184        TokenType.UNIQUEIDENTIFIER,
 185        TokenType.MONEY,
 186        TokenType.SMALLMONEY,
 187        TokenType.ROWVERSION,
 188        TokenType.IMAGE,
 189        TokenType.VARIANT,
 190        TokenType.OBJECT,
 191        TokenType.INET,
 192        *NESTED_TYPE_TOKENS,
 193    }
 194
 195    SUBQUERY_PREDICATES = {
 196        TokenType.ANY: exp.Any,
 197        TokenType.ALL: exp.All,
 198        TokenType.EXISTS: exp.Exists,
 199        TokenType.SOME: exp.Any,
 200    }
 201
 202    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 203
 204    DB_CREATABLES = {
 205        TokenType.DATABASE,
 206        TokenType.SCHEMA,
 207        TokenType.TABLE,
 208        TokenType.VIEW,
 209        TokenType.DICTIONARY,
 210    }
 211
 212    CREATABLES = {
 213        TokenType.COLUMN,
 214        TokenType.FUNCTION,
 215        TokenType.INDEX,
 216        TokenType.PROCEDURE,
 217        *DB_CREATABLES,
 218    }
 219
 220    ID_VAR_TOKENS = {
 221        TokenType.VAR,
 222        TokenType.ANTI,
 223        TokenType.APPLY,
 224        TokenType.ASC,
 225        TokenType.AUTO_INCREMENT,
 226        TokenType.BEGIN,
 227        TokenType.CACHE,
 228        TokenType.COLLATE,
 229        TokenType.COMMAND,
 230        TokenType.COMMENT,
 231        TokenType.COMMIT,
 232        TokenType.CONSTRAINT,
 233        TokenType.DEFAULT,
 234        TokenType.DELETE,
 235        TokenType.DESC,
 236        TokenType.DESCRIBE,
 237        TokenType.DICTIONARY,
 238        TokenType.DIV,
 239        TokenType.END,
 240        TokenType.EXECUTE,
 241        TokenType.ESCAPE,
 242        TokenType.FALSE,
 243        TokenType.FIRST,
 244        TokenType.FILTER,
 245        TokenType.FORMAT,
 246        TokenType.FULL,
 247        TokenType.IF,
 248        TokenType.IS,
 249        TokenType.ISNULL,
 250        TokenType.INTERVAL,
 251        TokenType.KEEP,
 252        TokenType.LEFT,
 253        TokenType.LOAD,
 254        TokenType.MERGE,
 255        TokenType.NATURAL,
 256        TokenType.NEXT,
 257        TokenType.OFFSET,
 258        TokenType.ORDINALITY,
 259        TokenType.OVERWRITE,
 260        TokenType.PARTITION,
 261        TokenType.PERCENT,
 262        TokenType.PIVOT,
 263        TokenType.PRAGMA,
 264        TokenType.RANGE,
 265        TokenType.REFERENCES,
 266        TokenType.RIGHT,
 267        TokenType.ROW,
 268        TokenType.ROWS,
 269        TokenType.SEMI,
 270        TokenType.SET,
 271        TokenType.SETTINGS,
 272        TokenType.SHOW,
 273        TokenType.TEMPORARY,
 274        TokenType.TOP,
 275        TokenType.TRUE,
 276        TokenType.UNIQUE,
 277        TokenType.UNPIVOT,
 278        TokenType.VOLATILE,
 279        TokenType.WINDOW,
 280        *CREATABLES,
 281        *SUBQUERY_PREDICATES,
 282        *TYPE_TOKENS,
 283        *NO_PAREN_FUNCTIONS,
 284    }
 285
 286    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 287
 288    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 289        TokenType.APPLY,
 290        TokenType.ASOF,
 291        TokenType.FULL,
 292        TokenType.LEFT,
 293        TokenType.LOCK,
 294        TokenType.NATURAL,
 295        TokenType.OFFSET,
 296        TokenType.RIGHT,
 297        TokenType.WINDOW,
 298    }
 299
 300    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 301
 302    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 303
 304    TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
 305
 306    FUNC_TOKENS = {
 307        TokenType.COMMAND,
 308        TokenType.CURRENT_DATE,
 309        TokenType.CURRENT_DATETIME,
 310        TokenType.CURRENT_TIMESTAMP,
 311        TokenType.CURRENT_TIME,
 312        TokenType.CURRENT_USER,
 313        TokenType.FILTER,
 314        TokenType.FIRST,
 315        TokenType.FORMAT,
 316        TokenType.GLOB,
 317        TokenType.IDENTIFIER,
 318        TokenType.INDEX,
 319        TokenType.ISNULL,
 320        TokenType.ILIKE,
 321        TokenType.LIKE,
 322        TokenType.MERGE,
 323        TokenType.OFFSET,
 324        TokenType.PRIMARY_KEY,
 325        TokenType.RANGE,
 326        TokenType.REPLACE,
 327        TokenType.ROW,
 328        TokenType.UNNEST,
 329        TokenType.VAR,
 330        TokenType.LEFT,
 331        TokenType.RIGHT,
 332        TokenType.DATE,
 333        TokenType.DATETIME,
 334        TokenType.TABLE,
 335        TokenType.TIMESTAMP,
 336        TokenType.TIMESTAMPTZ,
 337        TokenType.WINDOW,
 338        *TYPE_TOKENS,
 339        *SUBQUERY_PREDICATES,
 340    }
 341
 342    CONJUNCTION = {
 343        TokenType.AND: exp.And,
 344        TokenType.OR: exp.Or,
 345    }
 346
 347    EQUALITY = {
 348        TokenType.EQ: exp.EQ,
 349        TokenType.NEQ: exp.NEQ,
 350        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 351    }
 352
 353    COMPARISON = {
 354        TokenType.GT: exp.GT,
 355        TokenType.GTE: exp.GTE,
 356        TokenType.LT: exp.LT,
 357        TokenType.LTE: exp.LTE,
 358    }
 359
 360    BITWISE = {
 361        TokenType.AMP: exp.BitwiseAnd,
 362        TokenType.CARET: exp.BitwiseXor,
 363        TokenType.PIPE: exp.BitwiseOr,
 364        TokenType.DPIPE: exp.DPipe,
 365    }
 366
 367    TERM = {
 368        TokenType.DASH: exp.Sub,
 369        TokenType.PLUS: exp.Add,
 370        TokenType.MOD: exp.Mod,
 371        TokenType.COLLATE: exp.Collate,
 372    }
 373
 374    FACTOR = {
 375        TokenType.DIV: exp.IntDiv,
 376        TokenType.LR_ARROW: exp.Distance,
 377        TokenType.SLASH: exp.Div,
 378        TokenType.STAR: exp.Mul,
 379    }
 380
 381    TIMESTAMPS = {
 382        TokenType.TIME,
 383        TokenType.TIMESTAMP,
 384        TokenType.TIMESTAMPTZ,
 385        TokenType.TIMESTAMPLTZ,
 386    }
 387
 388    SET_OPERATIONS = {
 389        TokenType.UNION,
 390        TokenType.INTERSECT,
 391        TokenType.EXCEPT,
 392    }
 393
 394    JOIN_METHODS = {
 395        TokenType.NATURAL,
 396        TokenType.ASOF,
 397    }
 398
 399    JOIN_SIDES = {
 400        TokenType.LEFT,
 401        TokenType.RIGHT,
 402        TokenType.FULL,
 403    }
 404
 405    JOIN_KINDS = {
 406        TokenType.INNER,
 407        TokenType.OUTER,
 408        TokenType.CROSS,
 409        TokenType.SEMI,
 410        TokenType.ANTI,
 411    }
 412
 413    LAMBDAS = {
 414        TokenType.ARROW: lambda self, expressions: self.expression(
 415            exp.Lambda,
 416            this=self._replace_lambda(
 417                self._parse_conjunction(),
 418                {node.name for node in expressions},
 419            ),
 420            expressions=expressions,
 421        ),
 422        TokenType.FARROW: lambda self, expressions: self.expression(
 423            exp.Kwarg,
 424            this=exp.Var(this=expressions[0].name),
 425            expression=self._parse_conjunction(),
 426        ),
 427    }
 428
 429    COLUMN_OPERATORS = {
 430        TokenType.DOT: None,
 431        TokenType.DCOLON: lambda self, this, to: self.expression(
 432            exp.Cast if self.STRICT_CAST else exp.TryCast,
 433            this=this,
 434            to=to,
 435        ),
 436        TokenType.ARROW: lambda self, this, path: self.expression(
 437            exp.JSONExtract,
 438            this=this,
 439            expression=path,
 440        ),
 441        TokenType.DARROW: lambda self, this, path: self.expression(
 442            exp.JSONExtractScalar,
 443            this=this,
 444            expression=path,
 445        ),
 446        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 447            exp.JSONBExtract,
 448            this=this,
 449            expression=path,
 450        ),
 451        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 452            exp.JSONBExtractScalar,
 453            this=this,
 454            expression=path,
 455        ),
 456        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 457            exp.JSONBContains,
 458            this=this,
 459            expression=key,
 460        ),
 461    }
 462
 463    EXPRESSION_PARSERS = {
 464        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"),
 465        exp.Column: lambda self: self._parse_column(),
 466        exp.Condition: lambda self: self._parse_conjunction(),
 467        exp.DataType: lambda self: self._parse_types(),
 468        exp.Expression: lambda self: self._parse_statement(),
 469        exp.From: lambda self: self._parse_from(),
 470        exp.Group: lambda self: self._parse_group(),
 471        exp.Having: lambda self: self._parse_having(),
 472        exp.Identifier: lambda self: self._parse_id_var(),
 473        exp.Join: lambda self: self._parse_join(),
 474        exp.Lambda: lambda self: self._parse_lambda(),
 475        exp.Lateral: lambda self: self._parse_lateral(),
 476        exp.Limit: lambda self: self._parse_limit(),
 477        exp.Offset: lambda self: self._parse_offset(),
 478        exp.Order: lambda self: self._parse_order(),
 479        exp.Ordered: lambda self: self._parse_ordered(),
 480        exp.Properties: lambda self: self._parse_properties(),
 481        exp.Qualify: lambda self: self._parse_qualify(),
 482        exp.Returning: lambda self: self._parse_returning(),
 483        exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"),
 484        exp.Table: lambda self: self._parse_table_parts(),
 485        exp.TableAlias: lambda self: self._parse_table_alias(),
 486        exp.Where: lambda self: self._parse_where(),
 487        exp.Window: lambda self: self._parse_named_window(),
 488        exp.With: lambda self: self._parse_with(),
 489        "JOIN_TYPE": lambda self: self._parse_join_parts(),
 490    }
 491
 492    STATEMENT_PARSERS = {
 493        TokenType.ALTER: lambda self: self._parse_alter(),
 494        TokenType.BEGIN: lambda self: self._parse_transaction(),
 495        TokenType.CACHE: lambda self: self._parse_cache(),
 496        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 497        TokenType.COMMENT: lambda self: self._parse_comment(),
 498        TokenType.CREATE: lambda self: self._parse_create(),
 499        TokenType.DELETE: lambda self: self._parse_delete(),
 500        TokenType.DESC: lambda self: self._parse_describe(),
 501        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 502        TokenType.DROP: lambda self: self._parse_drop(),
 503        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 504        TokenType.FROM: lambda self: exp.select("*").from_(
 505            t.cast(exp.From, self._parse_from(skip_from_token=True))
 506        ),
 507        TokenType.INSERT: lambda self: self._parse_insert(),
 508        TokenType.LOAD: lambda self: self._parse_load(),
 509        TokenType.MERGE: lambda self: self._parse_merge(),
 510        TokenType.PIVOT: lambda self: self._parse_simplified_pivot(),
 511        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 512        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 513        TokenType.SET: lambda self: self._parse_set(),
 514        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 515        TokenType.UPDATE: lambda self: self._parse_update(),
 516        TokenType.USE: lambda self: self.expression(
 517            exp.Use,
 518            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 519            and exp.Var(this=self._prev.text),
 520            this=self._parse_table(schema=False),
 521        ),
 522    }
 523
 524    UNARY_PARSERS = {
 525        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 526        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 527        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 528        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 529    }
 530
 531    PRIMARY_PARSERS = {
 532        TokenType.STRING: lambda self, token: self.expression(
 533            exp.Literal, this=token.text, is_string=True
 534        ),
 535        TokenType.NUMBER: lambda self, token: self.expression(
 536            exp.Literal, this=token.text, is_string=False
 537        ),
 538        TokenType.STAR: lambda self, _: self.expression(
 539            exp.Star,
 540            **{"except": self._parse_except(), "replace": self._parse_replace()},
 541        ),
 542        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 543        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 544        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 545        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 546        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 547        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 548        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 549        TokenType.NATIONAL_STRING: lambda self, token: self.expression(
 550            exp.National, this=token.text
 551        ),
 552        TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text),
 553        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 554    }
 555
 556    PLACEHOLDER_PARSERS = {
 557        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 558        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 559        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 560        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 561        else None,
 562    }
 563
 564    RANGE_PARSERS = {
 565        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 566        TokenType.GLOB: binary_range_parser(exp.Glob),
 567        TokenType.ILIKE: binary_range_parser(exp.ILike),
 568        TokenType.IN: lambda self, this: self._parse_in(this),
 569        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 570        TokenType.IS: lambda self, this: self._parse_is(this),
 571        TokenType.LIKE: binary_range_parser(exp.Like),
 572        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 573        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 574        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 575    }
 576
 577    PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
 578        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 579        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 580        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 581        "CHARACTER SET": lambda self: self._parse_character_set(),
 582        "CHECKSUM": lambda self: self._parse_checksum(),
 583        "CLUSTER": lambda self: self._parse_cluster(),
 584        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 585        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 586        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 587        "DEFINER": lambda self: self._parse_definer(),
 588        "DETERMINISTIC": lambda self: self.expression(
 589            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 590        ),
 591        "DISTKEY": lambda self: self._parse_distkey(),
 592        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 593        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 594        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 595        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 596        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 597        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 598        "FREESPACE": lambda self: self._parse_freespace(),
 599        "IMMUTABLE": lambda self: self.expression(
 600            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 601        ),
 602        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 603        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 604        "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"),
 605        "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"),
 606        "LIKE": lambda self: self._parse_create_like(),
 607        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 608        "LOCK": lambda self: self._parse_locking(),
 609        "LOCKING": lambda self: self._parse_locking(),
 610        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 611        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 612        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 613        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 614        "NO": lambda self: self._parse_no_property(),
 615        "ON": lambda self: self._parse_on_property(),
 616        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 617        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 618        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 619        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 620        "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True),
 621        "RANGE": lambda self: self._parse_dict_range(this="RANGE"),
 622        "RETURNS": lambda self: self._parse_returns(),
 623        "ROW": lambda self: self._parse_row(),
 624        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 625        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 626        "SETTINGS": lambda self: self.expression(
 627            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 628        ),
 629        "SORTKEY": lambda self: self._parse_sortkey(),
 630        "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
 631        "STABLE": lambda self: self.expression(
 632            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 633        ),
 634        "STORED": lambda self: self._parse_stored(),
 635        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 636        "TEMP": lambda self: self.expression(exp.TemporaryProperty),
 637        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
 638        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 639        "TTL": lambda self: self._parse_ttl(),
 640        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 641        "VOLATILE": lambda self: self._parse_volatile_property(),
 642        "WITH": lambda self: self._parse_with_property(),
 643    }
 644
 645    CONSTRAINT_PARSERS = {
 646        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 647        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 648        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 649        "CHARACTER SET": lambda self: self.expression(
 650            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 651        ),
 652        "CHECK": lambda self: self.expression(
 653            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 654        ),
 655        "COLLATE": lambda self: self.expression(
 656            exp.CollateColumnConstraint, this=self._parse_var()
 657        ),
 658        "COMMENT": lambda self: self.expression(
 659            exp.CommentColumnConstraint, this=self._parse_string()
 660        ),
 661        "COMPRESS": lambda self: self._parse_compress(),
 662        "DEFAULT": lambda self: self.expression(
 663            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 664        ),
 665        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 666        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 667        "FORMAT": lambda self: self.expression(
 668            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 669        ),
 670        "GENERATED": lambda self: self._parse_generated_as_identity(),
 671        "IDENTITY": lambda self: self._parse_auto_increment(),
 672        "INLINE": lambda self: self._parse_inline(),
 673        "LIKE": lambda self: self._parse_create_like(),
 674        "NOT": lambda self: self._parse_not_constraint(),
 675        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 676        "ON": lambda self: self._match(TokenType.UPDATE)
 677        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 678        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 679        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 680        "REFERENCES": lambda self: self._parse_references(match=False),
 681        "TITLE": lambda self: self.expression(
 682            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 683        ),
 684        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 685        "UNIQUE": lambda self: self._parse_unique(),
 686        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 687    }
 688
 689    ALTER_PARSERS = {
 690        "ADD": lambda self: self._parse_alter_table_add(),
 691        "ALTER": lambda self: self._parse_alter_table_alter(),
 692        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 693        "DROP": lambda self: self._parse_alter_table_drop(),
 694        "RENAME": lambda self: self._parse_alter_table_rename(),
 695    }
 696
 697    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 698
 699    NO_PAREN_FUNCTION_PARSERS = {
 700        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 701        TokenType.CASE: lambda self: self._parse_case(),
 702        TokenType.IF: lambda self: self._parse_if(),
 703        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 704            exp.NextValueFor,
 705            this=self._parse_column(),
 706            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 707        ),
 708    }
 709
 710    FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"}
 711
 712    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 713        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 714        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 715        "DECODE": lambda self: self._parse_decode(),
 716        "EXTRACT": lambda self: self._parse_extract(),
 717        "JSON_OBJECT": lambda self: self._parse_json_object(),
 718        "LOG": lambda self: self._parse_logarithm(),
 719        "MATCH": lambda self: self._parse_match_against(),
 720        "OPENJSON": lambda self: self._parse_open_json(),
 721        "POSITION": lambda self: self._parse_position(),
 722        "SAFE_CAST": lambda self: self._parse_cast(False),
 723        "STRING_AGG": lambda self: self._parse_string_agg(),
 724        "SUBSTRING": lambda self: self._parse_substring(),
 725        "TRIM": lambda self: self._parse_trim(),
 726        "TRY_CAST": lambda self: self._parse_cast(False),
 727        "TRY_CONVERT": lambda self: self._parse_convert(False),
 728    }
 729
 730    QUERY_MODIFIER_PARSERS = {
 731        "joins": lambda self: list(iter(self._parse_join, None)),
 732        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 733        "match": lambda self: self._parse_match_recognize(),
 734        "where": lambda self: self._parse_where(),
 735        "group": lambda self: self._parse_group(),
 736        "having": lambda self: self._parse_having(),
 737        "qualify": lambda self: self._parse_qualify(),
 738        "windows": lambda self: self._parse_window_clause(),
 739        "order": lambda self: self._parse_order(),
 740        "limit": lambda self: self._parse_limit(),
 741        "offset": lambda self: self._parse_offset(),
 742        "locks": lambda self: self._parse_locks(),
 743        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 744    }
 745
 746    SET_PARSERS = {
 747        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 748        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 749        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 750        "TRANSACTION": lambda self: self._parse_set_transaction(),
 751    }
 752
 753    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 754
 755    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 756
 757    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 758
 759    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 760
 761    TRANSACTION_CHARACTERISTICS = {
 762        "ISOLATION LEVEL REPEATABLE READ",
 763        "ISOLATION LEVEL READ COMMITTED",
 764        "ISOLATION LEVEL READ UNCOMMITTED",
 765        "ISOLATION LEVEL SERIALIZABLE",
 766        "READ WRITE",
 767        "READ ONLY",
 768    }
 769
 770    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 771
 772    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 773
 774    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 775    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 776    WINDOW_SIDES = {"FOLLOWING", "PRECEDING"}
 777
 778    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 779
 780    STRICT_CAST = True
 781
 782    CONVERT_TYPE_FIRST = False
 783
 784    PREFIXED_PIVOT_COLUMNS = False
 785    IDENTIFY_PIVOT_STRINGS = False
 786
 787    LOG_BASE_FIRST = True
 788    LOG_DEFAULTS_TO_LN = False
 789
 790    __slots__ = (
 791        "error_level",
 792        "error_message_context",
 793        "sql",
 794        "errors",
 795        "index_offset",
 796        "unnest_column_only",
 797        "alias_post_tablesample",
 798        "max_errors",
 799        "null_ordering",
 800        "_tokens",
 801        "_index",
 802        "_curr",
 803        "_next",
 804        "_prev",
 805        "_prev_comments",
 806        "_show_trie",
 807        "_set_trie",
 808    )
 809
 810    def __init__(
 811        self,
 812        error_level: t.Optional[ErrorLevel] = None,
 813        error_message_context: int = 100,
 814        index_offset: int = 0,
 815        unnest_column_only: bool = False,
 816        alias_post_tablesample: bool = False,
 817        max_errors: int = 3,
 818        null_ordering: t.Optional[str] = None,
 819    ):
 820        self.error_level = error_level or ErrorLevel.IMMEDIATE
 821        self.error_message_context = error_message_context
 822        self.index_offset = index_offset
 823        self.unnest_column_only = unnest_column_only
 824        self.alias_post_tablesample = alias_post_tablesample
 825        self.max_errors = max_errors
 826        self.null_ordering = null_ordering
 827        self.reset()
 828
 829    def reset(self):
 830        self.sql = ""
 831        self.errors = []
 832        self._tokens = []
 833        self._index = 0
 834        self._curr = None
 835        self._next = None
 836        self._prev = None
 837        self._prev_comments = None
 838
 839    def parse(
 840        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 841    ) -> t.List[t.Optional[exp.Expression]]:
 842        """
 843        Parses a list of tokens and returns a list of syntax trees, one tree
 844        per parsed SQL statement.
 845
 846        Args:
 847            raw_tokens: the list of tokens.
 848            sql: the original SQL string, used to produce helpful debug messages.
 849
 850        Returns:
 851            The list of syntax trees.
 852        """
 853        return self._parse(
 854            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 855        )
 856
 857    def parse_into(
 858        self,
 859        expression_types: exp.IntoType,
 860        raw_tokens: t.List[Token],
 861        sql: t.Optional[str] = None,
 862    ) -> t.List[t.Optional[exp.Expression]]:
 863        """
 864        Parses a list of tokens into a given Expression type. If a collection of Expression
 865        types is given instead, this method will try to parse the token list into each one
 866        of them, stopping at the first for which the parsing succeeds.
 867
 868        Args:
 869            expression_types: the expression type(s) to try and parse the token list into.
 870            raw_tokens: the list of tokens.
 871            sql: the original SQL string, used to produce helpful debug messages.
 872
 873        Returns:
 874            The target Expression.
 875        """
 876        errors = []
 877        for expression_type in ensure_collection(expression_types):
 878            parser = self.EXPRESSION_PARSERS.get(expression_type)
 879            if not parser:
 880                raise TypeError(f"No parser registered for {expression_type}")
 881            try:
 882                return self._parse(parser, raw_tokens, sql)
 883            except ParseError as e:
 884                e.errors[0]["into_expression"] = expression_type
 885                errors.append(e)
 886        raise ParseError(
 887            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
 888            errors=merge_errors(errors),
 889        ) from errors[-1]
 890
 891    def _parse(
 892        self,
 893        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 894        raw_tokens: t.List[Token],
 895        sql: t.Optional[str] = None,
 896    ) -> t.List[t.Optional[exp.Expression]]:
 897        self.reset()
 898        self.sql = sql or ""
 899        total = len(raw_tokens)
 900        chunks: t.List[t.List[Token]] = [[]]
 901
 902        for i, token in enumerate(raw_tokens):
 903            if token.token_type == TokenType.SEMICOLON:
 904                if i < total - 1:
 905                    chunks.append([])
 906            else:
 907                chunks[-1].append(token)
 908
 909        expressions = []
 910
 911        for tokens in chunks:
 912            self._index = -1
 913            self._tokens = tokens
 914            self._advance()
 915
 916            expressions.append(parse_method(self))
 917
 918            if self._index < len(self._tokens):
 919                self.raise_error("Invalid expression / Unexpected token")
 920
 921            self.check_errors()
 922
 923        return expressions
 924
 925    def check_errors(self) -> None:
 926        """
 927        Logs or raises any found errors, depending on the chosen error level setting.
 928        """
 929        if self.error_level == ErrorLevel.WARN:
 930            for error in self.errors:
 931                logger.error(str(error))
 932        elif self.error_level == ErrorLevel.RAISE and self.errors:
 933            raise ParseError(
 934                concat_messages(self.errors, self.max_errors),
 935                errors=merge_errors(self.errors),
 936            )
 937
 938    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 939        """
 940        Appends an error in the list of recorded errors or raises it, depending on the chosen
 941        error level setting.
 942        """
 943        token = token or self._curr or self._prev or Token.string("")
 944        start = token.start
 945        end = token.end + 1
 946        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 947        highlight = self.sql[start:end]
 948        end_context = self.sql[end : end + self.error_message_context]
 949
 950        error = ParseError.new(
 951            f"{message}. Line {token.line}, Col: {token.col}.\n"
 952            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 953            description=message,
 954            line=token.line,
 955            col=token.col,
 956            start_context=start_context,
 957            highlight=highlight,
 958            end_context=end_context,
 959        )
 960
 961        if self.error_level == ErrorLevel.IMMEDIATE:
 962            raise error
 963
 964        self.errors.append(error)
 965
 966    def expression(
 967        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 968    ) -> E:
 969        """
 970        Creates a new, validated Expression.
 971
 972        Args:
 973            exp_class: the expression class to instantiate.
 974            comments: an optional list of comments to attach to the expression.
 975            kwargs: the arguments to set for the expression along with their respective values.
 976
 977        Returns:
 978            The target expression.
 979        """
 980        instance = exp_class(**kwargs)
 981        instance.add_comments(comments) if comments else self._add_comments(instance)
 982        self.validate_expression(instance)
 983        return instance
 984
 985    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 986        if expression and self._prev_comments:
 987            expression.add_comments(self._prev_comments)
 988            self._prev_comments = None
 989
 990    def validate_expression(
 991        self, expression: exp.Expression, args: t.Optional[t.List] = None
 992    ) -> None:
 993        """
 994        Validates an already instantiated expression, making sure that all its mandatory arguments
 995        are set.
 996
 997        Args:
 998            expression: the expression to validate.
 999            args: an optional list of items that was used to instantiate the expression, if it's a Func.
1000        """
1001        if self.error_level == ErrorLevel.IGNORE:
1002            return
1003
1004        for error_message in expression.error_messages(args):
1005            self.raise_error(error_message)
1006
1007    def _find_sql(self, start: Token, end: Token) -> str:
1008        return self.sql[start.start : end.end + 1]
1009
1010    def _advance(self, times: int = 1) -> None:
1011        self._index += times
1012        self._curr = seq_get(self._tokens, self._index)
1013        self._next = seq_get(self._tokens, self._index + 1)
1014        if self._index > 0:
1015            self._prev = self._tokens[self._index - 1]
1016            self._prev_comments = self._prev.comments
1017        else:
1018            self._prev = None
1019            self._prev_comments = None
1020
1021    def _retreat(self, index: int) -> None:
1022        if index != self._index:
1023            self._advance(index - self._index)
1024
1025    def _parse_command(self) -> exp.Command:
1026        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1027
1028    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1029        start = self._prev
1030        exists = self._parse_exists() if allow_exists else None
1031
1032        self._match(TokenType.ON)
1033
1034        kind = self._match_set(self.CREATABLES) and self._prev
1035
1036        if not kind:
1037            return self._parse_as_command(start)
1038
1039        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1040            this = self._parse_user_defined_function(kind=kind.token_type)
1041        elif kind.token_type == TokenType.TABLE:
1042            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1043        elif kind.token_type == TokenType.COLUMN:
1044            this = self._parse_column()
1045        else:
1046            this = self._parse_id_var()
1047
1048        self._match(TokenType.IS)
1049
1050        return self.expression(
1051            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1052        )
1053
1054    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1055    def _parse_ttl(self) -> exp.Expression:
1056        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1057            this = self._parse_bitwise()
1058
1059            if self._match_text_seq("DELETE"):
1060                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1061            if self._match_text_seq("RECOMPRESS"):
1062                return self.expression(
1063                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1064                )
1065            if self._match_text_seq("TO", "DISK"):
1066                return self.expression(
1067                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1068                )
1069            if self._match_text_seq("TO", "VOLUME"):
1070                return self.expression(
1071                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1072                )
1073
1074            return this
1075
1076        expressions = self._parse_csv(_parse_ttl_action)
1077        where = self._parse_where()
1078        group = self._parse_group()
1079
1080        aggregates = None
1081        if group and self._match(TokenType.SET):
1082            aggregates = self._parse_csv(self._parse_set_item)
1083
1084        return self.expression(
1085            exp.MergeTreeTTL,
1086            expressions=expressions,
1087            where=where,
1088            group=group,
1089            aggregates=aggregates,
1090        )
1091
1092    def _parse_statement(self) -> t.Optional[exp.Expression]:
1093        if self._curr is None:
1094            return None
1095
1096        if self._match_set(self.STATEMENT_PARSERS):
1097            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1098
1099        if self._match_set(Tokenizer.COMMANDS):
1100            return self._parse_command()
1101
1102        expression = self._parse_expression()
1103        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1104        return self._parse_query_modifiers(expression)
1105
1106    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1107        start = self._prev
1108        temporary = self._match(TokenType.TEMPORARY)
1109        materialized = self._match_text_seq("MATERIALIZED")
1110        kind = self._match_set(self.CREATABLES) and self._prev.text
1111        if not kind:
1112            return self._parse_as_command(start)
1113
1114        return self.expression(
1115            exp.Drop,
1116            exists=self._parse_exists(),
1117            this=self._parse_table(schema=True),
1118            kind=kind,
1119            temporary=temporary,
1120            materialized=materialized,
1121            cascade=self._match_text_seq("CASCADE"),
1122            constraints=self._match_text_seq("CONSTRAINTS"),
1123            purge=self._match_text_seq("PURGE"),
1124        )
1125
1126    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1127        return (
1128            self._match(TokenType.IF)
1129            and (not not_ or self._match(TokenType.NOT))
1130            and self._match(TokenType.EXISTS)
1131        )
1132
1133    def _parse_create(self) -> t.Optional[exp.Expression]:
1134        start = self._prev
1135        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1136            TokenType.OR, TokenType.REPLACE
1137        )
1138        unique = self._match(TokenType.UNIQUE)
1139
1140        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1141            self._match(TokenType.TABLE)
1142
1143        properties = None
1144        create_token = self._match_set(self.CREATABLES) and self._prev
1145
1146        if not create_token:
1147            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1148            create_token = self._match_set(self.CREATABLES) and self._prev
1149
1150            if not properties or not create_token:
1151                return self._parse_as_command(start)
1152
1153        exists = self._parse_exists(not_=True)
1154        this = None
1155        expression = None
1156        indexes = None
1157        no_schema_binding = None
1158        begin = None
1159        clone = None
1160
1161        def extend_props(temp_props: t.Optional[exp.Expression]) -> None:
1162            nonlocal properties
1163            if properties and temp_props:
1164                properties.expressions.extend(temp_props.expressions)
1165            elif temp_props:
1166                properties = temp_props
1167
1168        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1169            this = self._parse_user_defined_function(kind=create_token.token_type)
1170            extend_props(self._parse_properties())
1171
1172            self._match(TokenType.ALIAS)
1173            begin = self._match(TokenType.BEGIN)
1174            return_ = self._match_text_seq("RETURN")
1175            expression = self._parse_statement()
1176
1177            if return_:
1178                expression = self.expression(exp.Return, this=expression)
1179        elif create_token.token_type == TokenType.INDEX:
1180            this = self._parse_index(index=self._parse_id_var())
1181        elif create_token.token_type in self.DB_CREATABLES:
1182            table_parts = self._parse_table_parts(schema=True)
1183
1184            # exp.Properties.Location.POST_NAME
1185            self._match(TokenType.COMMA)
1186            extend_props(self._parse_properties(before=True))
1187
1188            this = self._parse_schema(this=table_parts)
1189
1190            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1191            extend_props(self._parse_properties())
1192
1193            self._match(TokenType.ALIAS)
1194
1195            # exp.Properties.Location.POST_ALIAS
1196            if not (
1197                self._match(TokenType.SELECT, advance=False)
1198                or self._match(TokenType.WITH, advance=False)
1199                or self._match(TokenType.L_PAREN, advance=False)
1200            ):
1201                extend_props(self._parse_properties())
1202
1203            expression = self._parse_ddl_select()
1204
1205            if create_token.token_type == TokenType.TABLE:
1206                indexes = []
1207                while True:
1208                    index = self._parse_index()
1209
1210                    # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX
1211                    extend_props(self._parse_properties())
1212
1213                    if not index:
1214                        break
1215                    else:
1216                        self._match(TokenType.COMMA)
1217                        indexes.append(index)
1218            elif create_token.token_type == TokenType.VIEW:
1219                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1220                    no_schema_binding = True
1221
1222            if self._match_text_seq("CLONE"):
1223                clone = self._parse_table(schema=True)
1224                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1225                clone_kind = (
1226                    self._match(TokenType.L_PAREN)
1227                    and self._match_texts(self.CLONE_KINDS)
1228                    and self._prev.text.upper()
1229                )
1230                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1231                self._match(TokenType.R_PAREN)
1232                clone = self.expression(
1233                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1234                )
1235
1236        return self.expression(
1237            exp.Create,
1238            this=this,
1239            kind=create_token.text,
1240            replace=replace,
1241            unique=unique,
1242            expression=expression,
1243            exists=exists,
1244            properties=properties,
1245            indexes=indexes,
1246            no_schema_binding=no_schema_binding,
1247            begin=begin,
1248            clone=clone,
1249        )
1250
1251    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1252        # only used for teradata currently
1253        self._match(TokenType.COMMA)
1254
1255        kwargs = {
1256            "no": self._match_text_seq("NO"),
1257            "dual": self._match_text_seq("DUAL"),
1258            "before": self._match_text_seq("BEFORE"),
1259            "default": self._match_text_seq("DEFAULT"),
1260            "local": (self._match_text_seq("LOCAL") and "LOCAL")
1261            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1262            "after": self._match_text_seq("AFTER"),
1263            "minimum": self._match_texts(("MIN", "MINIMUM")),
1264            "maximum": self._match_texts(("MAX", "MAXIMUM")),
1265        }
1266
1267        if self._match_texts(self.PROPERTY_PARSERS):
1268            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1269            try:
1270                return parser(self, **{k: v for k, v in kwargs.items() if v})
1271            except TypeError:
1272                self.raise_error(f"Cannot parse property '{self._prev.text}'")
1273
1274        return None
1275
1276    def _parse_property(self) -> t.Optional[exp.Expression]:
1277        if self._match_texts(self.PROPERTY_PARSERS):
1278            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1279
1280        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1281            return self._parse_character_set(default=True)
1282
1283        if self._match_text_seq("COMPOUND", "SORTKEY"):
1284            return self._parse_sortkey(compound=True)
1285
1286        if self._match_text_seq("SQL", "SECURITY"):
1287            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1288
1289        assignment = self._match_pair(
1290            TokenType.VAR, TokenType.EQ, advance=False
1291        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1292
1293        if assignment:
1294            key = self._parse_var_or_string()
1295            self._match(TokenType.EQ)
1296            return self.expression(exp.Property, this=key, value=self._parse_column())
1297
1298        return None
1299
1300    def _parse_stored(self) -> exp.Expression:
1301        self._match(TokenType.ALIAS)
1302
1303        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1304        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1305
1306        return self.expression(
1307            exp.FileFormatProperty,
1308            this=self.expression(
1309                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1310            )
1311            if input_format or output_format
1312            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1313        )
1314
1315    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1316        self._match(TokenType.EQ)
1317        self._match(TokenType.ALIAS)
1318        return self.expression(exp_class, this=self._parse_field())
1319
1320    def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Expression]:
1321        properties = []
1322
1323        while True:
1324            if before:
1325                prop = self._parse_property_before()
1326            else:
1327                prop = self._parse_property()
1328
1329            if not prop:
1330                break
1331            for p in ensure_list(prop):
1332                properties.append(p)
1333
1334        if properties:
1335            return self.expression(exp.Properties, expressions=properties)
1336
1337        return None
1338
1339    def _parse_fallback(self, no: bool = False) -> exp.Expression:
1340        return self.expression(
1341            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1342        )
1343
1344    def _parse_volatile_property(self) -> exp.Expression:
1345        if self._index >= 2:
1346            pre_volatile_token = self._tokens[self._index - 2]
1347        else:
1348            pre_volatile_token = None
1349
1350        if pre_volatile_token and pre_volatile_token.token_type in (
1351            TokenType.CREATE,
1352            TokenType.REPLACE,
1353            TokenType.UNIQUE,
1354        ):
1355            return exp.VolatileProperty()
1356
1357        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1358
1359    def _parse_with_property(
1360        self,
1361    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1362        self._match(TokenType.WITH)
1363        if self._match(TokenType.L_PAREN, advance=False):
1364            return self._parse_wrapped_csv(self._parse_property)
1365
1366        if self._match_text_seq("JOURNAL"):
1367            return self._parse_withjournaltable()
1368
1369        if self._match_text_seq("DATA"):
1370            return self._parse_withdata(no=False)
1371        elif self._match_text_seq("NO", "DATA"):
1372            return self._parse_withdata(no=True)
1373
1374        if not self._next:
1375            return None
1376
1377        return self._parse_withisolatedloading()
1378
1379    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1380    def _parse_definer(self) -> t.Optional[exp.Expression]:
1381        self._match(TokenType.EQ)
1382
1383        user = self._parse_id_var()
1384        self._match(TokenType.PARAMETER)
1385        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1386
1387        if not user or not host:
1388            return None
1389
1390        return exp.DefinerProperty(this=f"{user}@{host}")
1391
1392    def _parse_withjournaltable(self) -> exp.Expression:
1393        self._match(TokenType.TABLE)
1394        self._match(TokenType.EQ)
1395        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1396
1397    def _parse_log(self, no: bool = False) -> exp.Expression:
1398        return self.expression(exp.LogProperty, no=no)
1399
1400    def _parse_journal(self, **kwargs) -> exp.Expression:
1401        return self.expression(exp.JournalProperty, **kwargs)
1402
1403    def _parse_checksum(self) -> exp.Expression:
1404        self._match(TokenType.EQ)
1405
1406        on = None
1407        if self._match(TokenType.ON):
1408            on = True
1409        elif self._match_text_seq("OFF"):
1410            on = False
1411        default = self._match(TokenType.DEFAULT)
1412
1413        return self.expression(
1414            exp.ChecksumProperty,
1415            on=on,
1416            default=default,
1417        )
1418
1419    def _parse_cluster(self) -> t.Optional[exp.Expression]:
1420        if not self._match_text_seq("BY"):
1421            self._retreat(self._index - 1)
1422            return None
1423        return self.expression(
1424            exp.Cluster,
1425            expressions=self._parse_csv(self._parse_ordered),
1426        )
1427
1428    def _parse_freespace(self) -> exp.Expression:
1429        self._match(TokenType.EQ)
1430        return self.expression(
1431            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1432        )
1433
1434    def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression:
1435        if self._match(TokenType.EQ):
1436            return self.expression(
1437                exp.MergeBlockRatioProperty,
1438                this=self._parse_number(),
1439                percent=self._match(TokenType.PERCENT),
1440            )
1441        return self.expression(
1442            exp.MergeBlockRatioProperty,
1443            no=no,
1444            default=default,
1445        )
1446
1447    def _parse_datablocksize(
1448        self,
1449        default: t.Optional[bool] = None,
1450        minimum: t.Optional[bool] = None,
1451        maximum: t.Optional[bool] = None,
1452    ) -> exp.Expression:
1453        self._match(TokenType.EQ)
1454        size = self._parse_number()
1455        units = None
1456        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1457            units = self._prev.text
1458        return self.expression(
1459            exp.DataBlocksizeProperty,
1460            size=size,
1461            units=units,
1462            default=default,
1463            minimum=minimum,
1464            maximum=maximum,
1465        )
1466
1467    def _parse_blockcompression(self) -> exp.Expression:
1468        self._match(TokenType.EQ)
1469        always = self._match_text_seq("ALWAYS")
1470        manual = self._match_text_seq("MANUAL")
1471        never = self._match_text_seq("NEVER")
1472        default = self._match_text_seq("DEFAULT")
1473        autotemp = None
1474        if self._match_text_seq("AUTOTEMP"):
1475            autotemp = self._parse_schema()
1476
1477        return self.expression(
1478            exp.BlockCompressionProperty,
1479            always=always,
1480            manual=manual,
1481            never=never,
1482            default=default,
1483            autotemp=autotemp,
1484        )
1485
1486    def _parse_withisolatedloading(self) -> exp.Expression:
1487        no = self._match_text_seq("NO")
1488        concurrent = self._match_text_seq("CONCURRENT")
1489        self._match_text_seq("ISOLATED", "LOADING")
1490        for_all = self._match_text_seq("FOR", "ALL")
1491        for_insert = self._match_text_seq("FOR", "INSERT")
1492        for_none = self._match_text_seq("FOR", "NONE")
1493        return self.expression(
1494            exp.IsolatedLoadingProperty,
1495            no=no,
1496            concurrent=concurrent,
1497            for_all=for_all,
1498            for_insert=for_insert,
1499            for_none=for_none,
1500        )
1501
1502    def _parse_locking(self) -> exp.Expression:
1503        if self._match(TokenType.TABLE):
1504            kind = "TABLE"
1505        elif self._match(TokenType.VIEW):
1506            kind = "VIEW"
1507        elif self._match(TokenType.ROW):
1508            kind = "ROW"
1509        elif self._match_text_seq("DATABASE"):
1510            kind = "DATABASE"
1511        else:
1512            kind = None
1513
1514        if kind in ("DATABASE", "TABLE", "VIEW"):
1515            this = self._parse_table_parts()
1516        else:
1517            this = None
1518
1519        if self._match(TokenType.FOR):
1520            for_or_in = "FOR"
1521        elif self._match(TokenType.IN):
1522            for_or_in = "IN"
1523        else:
1524            for_or_in = None
1525
1526        if self._match_text_seq("ACCESS"):
1527            lock_type = "ACCESS"
1528        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1529            lock_type = "EXCLUSIVE"
1530        elif self._match_text_seq("SHARE"):
1531            lock_type = "SHARE"
1532        elif self._match_text_seq("READ"):
1533            lock_type = "READ"
1534        elif self._match_text_seq("WRITE"):
1535            lock_type = "WRITE"
1536        elif self._match_text_seq("CHECKSUM"):
1537            lock_type = "CHECKSUM"
1538        else:
1539            lock_type = None
1540
1541        override = self._match_text_seq("OVERRIDE")
1542
1543        return self.expression(
1544            exp.LockingProperty,
1545            this=this,
1546            kind=kind,
1547            for_or_in=for_or_in,
1548            lock_type=lock_type,
1549            override=override,
1550        )
1551
1552    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1553        if self._match(TokenType.PARTITION_BY):
1554            return self._parse_csv(self._parse_conjunction)
1555        return []
1556
1557    def _parse_partitioned_by(self) -> exp.Expression:
1558        self._match(TokenType.EQ)
1559        return self.expression(
1560            exp.PartitionedByProperty,
1561            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1562        )
1563
1564    def _parse_withdata(self, no: bool = False) -> exp.Expression:
1565        if self._match_text_seq("AND", "STATISTICS"):
1566            statistics = True
1567        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1568            statistics = False
1569        else:
1570            statistics = None
1571
1572        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1573
1574    def _parse_no_property(self) -> t.Optional[exp.Property]:
1575        if self._match_text_seq("PRIMARY", "INDEX"):
1576            return exp.NoPrimaryIndexProperty()
1577        return None
1578
1579    def _parse_on_property(self) -> t.Optional[exp.Property]:
1580        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1581            return exp.OnCommitProperty()
1582        elif self._match_text_seq("COMMIT", "DELETE", "ROWS"):
1583            return exp.OnCommitProperty(delete=True)
1584        return None
1585
1586    def _parse_distkey(self) -> exp.Expression:
1587        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1588
1589    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1590        table = self._parse_table(schema=True)
1591        options = []
1592        while self._match_texts(("INCLUDING", "EXCLUDING")):
1593            this = self._prev.text.upper()
1594            id_var = self._parse_id_var()
1595
1596            if not id_var:
1597                return None
1598
1599            options.append(
1600                self.expression(
1601                    exp.Property,
1602                    this=this,
1603                    value=exp.Var(this=id_var.this.upper()),
1604                )
1605            )
1606        return self.expression(exp.LikeProperty, this=table, expressions=options)
1607
1608    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1609        return self.expression(
1610            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1611        )
1612
1613    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1614        self._match(TokenType.EQ)
1615        return self.expression(
1616            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1617        )
1618
1619    def _parse_returns(self) -> exp.Expression:
1620        value: t.Optional[exp.Expression]
1621        is_table = self._match(TokenType.TABLE)
1622
1623        if is_table:
1624            if self._match(TokenType.LT):
1625                value = self.expression(
1626                    exp.Schema,
1627                    this="TABLE",
1628                    expressions=self._parse_csv(self._parse_struct_types),
1629                )
1630                if not self._match(TokenType.GT):
1631                    self.raise_error("Expecting >")
1632            else:
1633                value = self._parse_schema(exp.Var(this="TABLE"))
1634        else:
1635            value = self._parse_types()
1636
1637        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1638
1639    def _parse_describe(self) -> exp.Expression:
1640        kind = self._match_set(self.CREATABLES) and self._prev.text
1641        this = self._parse_table()
1642
1643        return self.expression(exp.Describe, this=this, kind=kind)
1644
1645    def _parse_insert(self) -> exp.Expression:
1646        overwrite = self._match(TokenType.OVERWRITE)
1647        local = self._match_text_seq("LOCAL")
1648        alternative = None
1649
1650        if self._match_text_seq("DIRECTORY"):
1651            this: t.Optional[exp.Expression] = self.expression(
1652                exp.Directory,
1653                this=self._parse_var_or_string(),
1654                local=local,
1655                row_format=self._parse_row_format(match_row=True),
1656            )
1657        else:
1658            if self._match(TokenType.OR):
1659                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1660
1661            self._match(TokenType.INTO)
1662            self._match(TokenType.TABLE)
1663            this = self._parse_table(schema=True)
1664
1665        return self.expression(
1666            exp.Insert,
1667            this=this,
1668            exists=self._parse_exists(),
1669            partition=self._parse_partition(),
1670            expression=self._parse_ddl_select(),
1671            conflict=self._parse_on_conflict(),
1672            returning=self._parse_returning(),
1673            overwrite=overwrite,
1674            alternative=alternative,
1675        )
1676
1677    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1678        conflict = self._match_text_seq("ON", "CONFLICT")
1679        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1680
1681        if not (conflict or duplicate):
1682            return None
1683
1684        nothing = None
1685        expressions = None
1686        key = None
1687        constraint = None
1688
1689        if conflict:
1690            if self._match_text_seq("ON", "CONSTRAINT"):
1691                constraint = self._parse_id_var()
1692            else:
1693                key = self._parse_csv(self._parse_value)
1694
1695        self._match_text_seq("DO")
1696        if self._match_text_seq("NOTHING"):
1697            nothing = True
1698        else:
1699            self._match(TokenType.UPDATE)
1700            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1701
1702        return self.expression(
1703            exp.OnConflict,
1704            duplicate=duplicate,
1705            expressions=expressions,
1706            nothing=nothing,
1707            key=key,
1708            constraint=constraint,
1709        )
1710
1711    def _parse_returning(self) -> t.Optional[exp.Expression]:
1712        if not self._match(TokenType.RETURNING):
1713            return None
1714
1715        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1716
1717    def _parse_row(self) -> t.Optional[exp.Expression]:
1718        if not self._match(TokenType.FORMAT):
1719            return None
1720        return self._parse_row_format()
1721
1722    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1723        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1724            return None
1725
1726        if self._match_text_seq("SERDE"):
1727            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1728
1729        self._match_text_seq("DELIMITED")
1730
1731        kwargs = {}
1732
1733        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1734            kwargs["fields"] = self._parse_string()
1735            if self._match_text_seq("ESCAPED", "BY"):
1736                kwargs["escaped"] = self._parse_string()
1737        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1738            kwargs["collection_items"] = self._parse_string()
1739        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1740            kwargs["map_keys"] = self._parse_string()
1741        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1742            kwargs["lines"] = self._parse_string()
1743        if self._match_text_seq("NULL", "DEFINED", "AS"):
1744            kwargs["null"] = self._parse_string()
1745
1746        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1747
1748    def _parse_load(self) -> exp.Expression:
1749        if self._match_text_seq("DATA"):
1750            local = self._match_text_seq("LOCAL")
1751            self._match_text_seq("INPATH")
1752            inpath = self._parse_string()
1753            overwrite = self._match(TokenType.OVERWRITE)
1754            self._match_pair(TokenType.INTO, TokenType.TABLE)
1755
1756            return self.expression(
1757                exp.LoadData,
1758                this=self._parse_table(schema=True),
1759                local=local,
1760                overwrite=overwrite,
1761                inpath=inpath,
1762                partition=self._parse_partition(),
1763                input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1764                serde=self._match_text_seq("SERDE") and self._parse_string(),
1765            )
1766        return self._parse_as_command(self._prev)
1767
1768    def _parse_delete(self) -> exp.Expression:
1769        self._match(TokenType.FROM)
1770
1771        return self.expression(
1772            exp.Delete,
1773            this=self._parse_table(),
1774            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1775            where=self._parse_where(),
1776            returning=self._parse_returning(),
1777        )
1778
1779    def _parse_update(self) -> exp.Expression:
1780        return self.expression(
1781            exp.Update,
1782            **{  # type: ignore
1783                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1784                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1785                "from": self._parse_from(modifiers=True),
1786                "where": self._parse_where(),
1787                "returning": self._parse_returning(),
1788            },
1789        )
1790
1791    def _parse_uncache(self) -> exp.Expression:
1792        if not self._match(TokenType.TABLE):
1793            self.raise_error("Expecting TABLE after UNCACHE")
1794
1795        return self.expression(
1796            exp.Uncache,
1797            exists=self._parse_exists(),
1798            this=self._parse_table(schema=True),
1799        )
1800
1801    def _parse_cache(self) -> exp.Expression:
1802        lazy = self._match_text_seq("LAZY")
1803        self._match(TokenType.TABLE)
1804        table = self._parse_table(schema=True)
1805        options = []
1806
1807        if self._match_text_seq("OPTIONS"):
1808            self._match_l_paren()
1809            k = self._parse_string()
1810            self._match(TokenType.EQ)
1811            v = self._parse_string()
1812            options = [k, v]
1813            self._match_r_paren()
1814
1815        self._match(TokenType.ALIAS)
1816        return self.expression(
1817            exp.Cache,
1818            this=table,
1819            lazy=lazy,
1820            options=options,
1821            expression=self._parse_select(nested=True),
1822        )
1823
1824    def _parse_partition(self) -> t.Optional[exp.Expression]:
1825        if not self._match(TokenType.PARTITION):
1826            return None
1827
1828        return self.expression(
1829            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1830        )
1831
1832    def _parse_value(self) -> exp.Expression:
1833        if self._match(TokenType.L_PAREN):
1834            expressions = self._parse_csv(self._parse_conjunction)
1835            self._match_r_paren()
1836            return self.expression(exp.Tuple, expressions=expressions)
1837
1838        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1839        # Source: https://prestodb.io/docs/current/sql/values.html
1840        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1841
1842    def _parse_select(
1843        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1844    ) -> t.Optional[exp.Expression]:
1845        cte = self._parse_with()
1846        if cte:
1847            this = self._parse_statement()
1848
1849            if not this:
1850                self.raise_error("Failed to parse any statement following CTE")
1851                return cte
1852
1853            if "with" in this.arg_types:
1854                this.set("with", cte)
1855            else:
1856                self.raise_error(f"{this.key} does not support CTE")
1857                this = cte
1858        elif self._match(TokenType.SELECT):
1859            comments = self._prev_comments
1860
1861            hint = self._parse_hint()
1862            all_ = self._match(TokenType.ALL)
1863            distinct = self._match(TokenType.DISTINCT)
1864
1865            kind = (
1866                self._match(TokenType.ALIAS)
1867                and self._match_texts(("STRUCT", "VALUE"))
1868                and self._prev.text
1869            )
1870
1871            if distinct:
1872                distinct = self.expression(
1873                    exp.Distinct,
1874                    on=self._parse_value() if self._match(TokenType.ON) else None,
1875                )
1876
1877            if all_ and distinct:
1878                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1879
1880            limit = self._parse_limit(top=True)
1881            expressions = self._parse_csv(self._parse_expression)
1882
1883            this = self.expression(
1884                exp.Select,
1885                kind=kind,
1886                hint=hint,
1887                distinct=distinct,
1888                expressions=expressions,
1889                limit=limit,
1890            )
1891            this.comments = comments
1892
1893            into = self._parse_into()
1894            if into:
1895                this.set("into", into)
1896
1897            from_ = self._parse_from()
1898            if from_:
1899                this.set("from", from_)
1900
1901            this = self._parse_query_modifiers(this)
1902        elif (table or nested) and self._match(TokenType.L_PAREN):
1903            if self._match(TokenType.PIVOT):
1904                this = self._parse_simplified_pivot()
1905            elif self._match(TokenType.FROM):
1906                this = exp.select("*").from_(
1907                    t.cast(exp.From, self._parse_from(skip_from_token=True))
1908                )
1909            else:
1910                this = self._parse_table() if table else self._parse_select(nested=True)
1911                this = self._parse_set_operations(self._parse_query_modifiers(this))
1912
1913            self._match_r_paren()
1914
1915            # early return so that subquery unions aren't parsed again
1916            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1917            # Union ALL should be a property of the top select node, not the subquery
1918            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1919        elif self._match(TokenType.VALUES):
1920            this = self.expression(
1921                exp.Values,
1922                expressions=self._parse_csv(self._parse_value),
1923                alias=self._parse_table_alias(),
1924            )
1925        else:
1926            this = None
1927
1928        return self._parse_set_operations(this)
1929
1930    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1931        if not skip_with_token and not self._match(TokenType.WITH):
1932            return None
1933
1934        comments = self._prev_comments
1935        recursive = self._match(TokenType.RECURSIVE)
1936
1937        expressions = []
1938        while True:
1939            expressions.append(self._parse_cte())
1940
1941            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1942                break
1943            else:
1944                self._match(TokenType.WITH)
1945
1946        return self.expression(
1947            exp.With, comments=comments, expressions=expressions, recursive=recursive
1948        )
1949
1950    def _parse_cte(self) -> exp.Expression:
1951        alias = self._parse_table_alias()
1952        if not alias or not alias.this:
1953            self.raise_error("Expected CTE to have alias")
1954
1955        self._match(TokenType.ALIAS)
1956
1957        return self.expression(
1958            exp.CTE,
1959            this=self._parse_wrapped(self._parse_statement),
1960            alias=alias,
1961        )
1962
1963    def _parse_table_alias(
1964        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1965    ) -> t.Optional[exp.Expression]:
1966        any_token = self._match(TokenType.ALIAS)
1967        alias = (
1968            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1969            or self._parse_string_as_identifier()
1970        )
1971
1972        index = self._index
1973        if self._match(TokenType.L_PAREN):
1974            columns = self._parse_csv(self._parse_function_parameter)
1975            self._match_r_paren() if columns else self._retreat(index)
1976        else:
1977            columns = None
1978
1979        if not alias and not columns:
1980            return None
1981
1982        return self.expression(exp.TableAlias, this=alias, columns=columns)
1983
1984    def _parse_subquery(
1985        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1986    ) -> t.Optional[exp.Expression]:
1987        if not this:
1988            return None
1989        return self.expression(
1990            exp.Subquery,
1991            this=this,
1992            pivots=self._parse_pivots(),
1993            alias=self._parse_table_alias() if parse_alias else None,
1994        )
1995
1996    def _parse_query_modifiers(
1997        self, this: t.Optional[exp.Expression]
1998    ) -> t.Optional[exp.Expression]:
1999        if isinstance(this, self.MODIFIABLES):
2000            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
2001                expression = parser(self)
2002
2003                if expression:
2004                    this.set(key, expression)
2005        return this
2006
2007    def _parse_hint(self) -> t.Optional[exp.Expression]:
2008        if self._match(TokenType.HINT):
2009            hints = self._parse_csv(self._parse_function)
2010            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2011                self.raise_error("Expected */ after HINT")
2012            return self.expression(exp.Hint, expressions=hints)
2013
2014        return None
2015
2016    def _parse_into(self) -> t.Optional[exp.Expression]:
2017        if not self._match(TokenType.INTO):
2018            return None
2019
2020        temp = self._match(TokenType.TEMPORARY)
2021        unlogged = self._match_text_seq("UNLOGGED")
2022        self._match(TokenType.TABLE)
2023
2024        return self.expression(
2025            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2026        )
2027
2028    def _parse_from(
2029        self, modifiers: bool = False, skip_from_token: bool = False
2030    ) -> t.Optional[exp.From]:
2031        if not skip_from_token and not self._match(TokenType.FROM):
2032            return None
2033
2034        comments = self._prev_comments
2035        this = self._parse_table()
2036
2037        return self.expression(
2038            exp.From,
2039            comments=comments,
2040            this=self._parse_query_modifiers(this) if modifiers else this,
2041        )
2042
2043    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2044        if not self._match(TokenType.MATCH_RECOGNIZE):
2045            return None
2046
2047        self._match_l_paren()
2048
2049        partition = self._parse_partition_by()
2050        order = self._parse_order()
2051        measures = (
2052            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2053        )
2054
2055        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2056            rows = exp.Var(this="ONE ROW PER MATCH")
2057        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2058            text = "ALL ROWS PER MATCH"
2059            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2060                text += f" SHOW EMPTY MATCHES"
2061            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2062                text += f" OMIT EMPTY MATCHES"
2063            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2064                text += f" WITH UNMATCHED ROWS"
2065            rows = exp.Var(this=text)
2066        else:
2067            rows = None
2068
2069        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2070            text = "AFTER MATCH SKIP"
2071            if self._match_text_seq("PAST", "LAST", "ROW"):
2072                text += f" PAST LAST ROW"
2073            elif self._match_text_seq("TO", "NEXT", "ROW"):
2074                text += f" TO NEXT ROW"
2075            elif self._match_text_seq("TO", "FIRST"):
2076                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2077            elif self._match_text_seq("TO", "LAST"):
2078                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2079            after = exp.Var(this=text)
2080        else:
2081            after = None
2082
2083        if self._match_text_seq("PATTERN"):
2084            self._match_l_paren()
2085
2086            if not self._curr:
2087                self.raise_error("Expecting )", self._curr)
2088
2089            paren = 1
2090            start = self._curr
2091
2092            while self._curr and paren > 0:
2093                if self._curr.token_type == TokenType.L_PAREN:
2094                    paren += 1
2095                if self._curr.token_type == TokenType.R_PAREN:
2096                    paren -= 1
2097                end = self._prev
2098                self._advance()
2099            if paren > 0:
2100                self.raise_error("Expecting )", self._curr)
2101            pattern = exp.Var(this=self._find_sql(start, end))
2102        else:
2103            pattern = None
2104
2105        define = (
2106            self._parse_csv(
2107                lambda: self.expression(
2108                    exp.Alias,
2109                    alias=self._parse_id_var(any_token=True),
2110                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2111                )
2112            )
2113            if self._match_text_seq("DEFINE")
2114            else None
2115        )
2116
2117        self._match_r_paren()
2118
2119        return self.expression(
2120            exp.MatchRecognize,
2121            partition_by=partition,
2122            order=order,
2123            measures=measures,
2124            rows=rows,
2125            after=after,
2126            pattern=pattern,
2127            define=define,
2128            alias=self._parse_table_alias(),
2129        )
2130
2131    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2132        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2133        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2134
2135        if outer_apply or cross_apply:
2136            this = self._parse_select(table=True)
2137            view = None
2138            outer = not cross_apply
2139        elif self._match(TokenType.LATERAL):
2140            this = self._parse_select(table=True)
2141            view = self._match(TokenType.VIEW)
2142            outer = self._match(TokenType.OUTER)
2143        else:
2144            return None
2145
2146        if not this:
2147            this = self._parse_function() or self._parse_id_var(any_token=False)
2148            while self._match(TokenType.DOT):
2149                this = exp.Dot(
2150                    this=this,
2151                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2152                )
2153
2154        table_alias: t.Optional[exp.Expression]
2155
2156        if view:
2157            table = self._parse_id_var(any_token=False)
2158            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2159            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2160        else:
2161            table_alias = self._parse_table_alias()
2162
2163        expression = self.expression(
2164            exp.Lateral,
2165            this=this,
2166            view=view,
2167            outer=outer,
2168            alias=table_alias,
2169        )
2170
2171        return expression
2172
2173    def _parse_join_parts(
2174        self,
2175    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2176        return (
2177            self._match_set(self.JOIN_METHODS) and self._prev,
2178            self._match_set(self.JOIN_SIDES) and self._prev,
2179            self._match_set(self.JOIN_KINDS) and self._prev,
2180        )
2181
2182    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2183        if self._match(TokenType.COMMA):
2184            return self.expression(exp.Join, this=self._parse_table())
2185
2186        index = self._index
2187        method, side, kind = self._parse_join_parts()
2188        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2189        join = self._match(TokenType.JOIN)
2190
2191        if not skip_join_token and not join:
2192            self._retreat(index)
2193            kind = None
2194            method = None
2195            side = None
2196
2197        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2198        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2199
2200        if not skip_join_token and not join and not outer_apply and not cross_apply:
2201            return None
2202
2203        if outer_apply:
2204            side = Token(TokenType.LEFT, "LEFT")
2205
2206        kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()}
2207
2208        if method:
2209            kwargs["method"] = method.text
2210        if side:
2211            kwargs["side"] = side.text
2212        if kind:
2213            kwargs["kind"] = kind.text
2214        if hint:
2215            kwargs["hint"] = hint
2216
2217        if self._match(TokenType.ON):
2218            kwargs["on"] = self._parse_conjunction()
2219        elif self._match(TokenType.USING):
2220            kwargs["using"] = self._parse_wrapped_id_vars()
2221
2222        return self.expression(exp.Join, **kwargs)
2223
2224    def _parse_index(
2225        self,
2226        index: t.Optional[exp.Expression] = None,
2227    ) -> t.Optional[exp.Expression]:
2228        if index:
2229            unique = None
2230            primary = None
2231            amp = None
2232
2233            self._match(TokenType.ON)
2234            self._match(TokenType.TABLE)  # hive
2235            table = self._parse_table_parts(schema=True)
2236        else:
2237            unique = self._match(TokenType.UNIQUE)
2238            primary = self._match_text_seq("PRIMARY")
2239            amp = self._match_text_seq("AMP")
2240            if not self._match(TokenType.INDEX):
2241                return None
2242            index = self._parse_id_var()
2243            table = None
2244
2245        if self._match(TokenType.L_PAREN, advance=False):
2246            columns = self._parse_wrapped_csv(self._parse_ordered)
2247        else:
2248            columns = None
2249
2250        return self.expression(
2251            exp.Index,
2252            this=index,
2253            table=table,
2254            columns=columns,
2255            unique=unique,
2256            primary=primary,
2257            amp=amp,
2258            partition_by=self._parse_partition_by(),
2259        )
2260
2261    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2262        return (
2263            (not schema and self._parse_function())
2264            or self._parse_id_var(any_token=False)
2265            or self._parse_string_as_identifier()
2266            or self._parse_placeholder()
2267        )
2268
2269    def _parse_table_parts(self, schema: bool = False) -> exp.Table:
2270        catalog = None
2271        db = None
2272        table = self._parse_table_part(schema=schema)
2273
2274        while self._match(TokenType.DOT):
2275            if catalog:
2276                # This allows nesting the table in arbitrarily many dot expressions if needed
2277                table = self.expression(
2278                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2279                )
2280            else:
2281                catalog = db
2282                db = table
2283                table = self._parse_table_part(schema=schema)
2284
2285        if not table:
2286            self.raise_error(f"Expected table name but got {self._curr}")
2287
2288        return self.expression(
2289            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2290        )
2291
2292    def _parse_table(
2293        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2294    ) -> t.Optional[exp.Expression]:
2295        lateral = self._parse_lateral()
2296        if lateral:
2297            return lateral
2298
2299        unnest = self._parse_unnest()
2300        if unnest:
2301            return unnest
2302
2303        values = self._parse_derived_table_values()
2304        if values:
2305            return values
2306
2307        subquery = self._parse_select(table=True)
2308        if subquery:
2309            if not subquery.args.get("pivots"):
2310                subquery.set("pivots", self._parse_pivots())
2311            return subquery
2312
2313        this: exp.Expression = self._parse_table_parts(schema=schema)
2314
2315        if schema:
2316            return self._parse_schema(this=this)
2317
2318        if self.alias_post_tablesample:
2319            table_sample = self._parse_table_sample()
2320
2321        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2322        if alias:
2323            this.set("alias", alias)
2324
2325        if not this.args.get("pivots"):
2326            this.set("pivots", self._parse_pivots())
2327
2328        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2329            this.set(
2330                "hints",
2331                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2332            )
2333            self._match_r_paren()
2334
2335        if not self.alias_post_tablesample:
2336            table_sample = self._parse_table_sample()
2337
2338        if table_sample:
2339            table_sample.set("this", this)
2340            this = table_sample
2341
2342        return this
2343
2344    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2345        if not self._match(TokenType.UNNEST):
2346            return None
2347
2348        expressions = self._parse_wrapped_csv(self._parse_type)
2349        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2350        alias = self._parse_table_alias()
2351
2352        if alias and self.unnest_column_only:
2353            if alias.args.get("columns"):
2354                self.raise_error("Unexpected extra column alias in unnest.")
2355            alias.set("columns", [alias.this])
2356            alias.set("this", None)
2357
2358        offset = None
2359        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2360            self._match(TokenType.ALIAS)
2361            offset = self._parse_id_var() or exp.Identifier(this="offset")
2362
2363        return self.expression(
2364            exp.Unnest,
2365            expressions=expressions,
2366            ordinality=ordinality,
2367            alias=alias,
2368            offset=offset,
2369        )
2370
2371    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2372        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2373        if not is_derived and not self._match(TokenType.VALUES):
2374            return None
2375
2376        expressions = self._parse_csv(self._parse_value)
2377
2378        if is_derived:
2379            self._match_r_paren()
2380
2381        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2382
2383    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2384        if not self._match(TokenType.TABLE_SAMPLE) and not (
2385            as_modifier and self._match_text_seq("USING", "SAMPLE")
2386        ):
2387            return None
2388
2389        bucket_numerator = None
2390        bucket_denominator = None
2391        bucket_field = None
2392        percent = None
2393        rows = None
2394        size = None
2395        seed = None
2396
2397        kind = (
2398            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2399        )
2400        method = self._parse_var(tokens=(TokenType.ROW,))
2401
2402        self._match(TokenType.L_PAREN)
2403
2404        num = self._parse_number()
2405
2406        if self._match_text_seq("BUCKET"):
2407            bucket_numerator = self._parse_number()
2408            self._match_text_seq("OUT", "OF")
2409            bucket_denominator = bucket_denominator = self._parse_number()
2410            self._match(TokenType.ON)
2411            bucket_field = self._parse_field()
2412        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2413            percent = num
2414        elif self._match(TokenType.ROWS):
2415            rows = num
2416        else:
2417            size = num
2418
2419        self._match(TokenType.R_PAREN)
2420
2421        if self._match(TokenType.L_PAREN):
2422            method = self._parse_var()
2423            seed = self._match(TokenType.COMMA) and self._parse_number()
2424            self._match_r_paren()
2425        elif self._match_texts(("SEED", "REPEATABLE")):
2426            seed = self._parse_wrapped(self._parse_number)
2427
2428        return self.expression(
2429            exp.TableSample,
2430            method=method,
2431            bucket_numerator=bucket_numerator,
2432            bucket_denominator=bucket_denominator,
2433            bucket_field=bucket_field,
2434            percent=percent,
2435            rows=rows,
2436            size=size,
2437            seed=seed,
2438            kind=kind,
2439        )
2440
2441    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2442        return list(iter(self._parse_pivot, None))
2443
2444    # https://duckdb.org/docs/sql/statements/pivot
2445    def _parse_simplified_pivot(self) -> exp.Pivot:
2446        def _parse_on() -> t.Optional[exp.Expression]:
2447            this = self._parse_bitwise()
2448            return self._parse_in(this) if self._match(TokenType.IN) else this
2449
2450        this = self._parse_table()
2451        expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on)
2452        using = self._match(TokenType.USING) and self._parse_csv(
2453            lambda: self._parse_alias(self._parse_function())
2454        )
2455        group = self._parse_group()
2456        return self.expression(
2457            exp.Pivot, this=this, expressions=expressions, using=using, group=group
2458        )
2459
2460    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2461        index = self._index
2462
2463        if self._match(TokenType.PIVOT):
2464            unpivot = False
2465        elif self._match(TokenType.UNPIVOT):
2466            unpivot = True
2467        else:
2468            return None
2469
2470        expressions = []
2471        field = None
2472
2473        if not self._match(TokenType.L_PAREN):
2474            self._retreat(index)
2475            return None
2476
2477        if unpivot:
2478            expressions = self._parse_csv(self._parse_column)
2479        else:
2480            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2481
2482        if not expressions:
2483            self.raise_error("Failed to parse PIVOT's aggregation list")
2484
2485        if not self._match(TokenType.FOR):
2486            self.raise_error("Expecting FOR")
2487
2488        value = self._parse_column()
2489
2490        if not self._match(TokenType.IN):
2491            self.raise_error("Expecting IN")
2492
2493        field = self._parse_in(value, alias=True)
2494
2495        self._match_r_paren()
2496
2497        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2498
2499        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2500            pivot.set("alias", self._parse_table_alias())
2501
2502        if not unpivot:
2503            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2504
2505            columns: t.List[exp.Expression] = []
2506            for fld in pivot.args["field"].expressions:
2507                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2508                for name in names:
2509                    if self.PREFIXED_PIVOT_COLUMNS:
2510                        name = f"{name}_{field_name}" if name else field_name
2511                    else:
2512                        name = f"{field_name}_{name}" if name else field_name
2513
2514                    columns.append(exp.to_identifier(name))
2515
2516            pivot.set("columns", columns)
2517
2518        return pivot
2519
2520    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2521        return [agg.alias for agg in aggregations]
2522
2523    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2524        if not skip_where_token and not self._match(TokenType.WHERE):
2525            return None
2526
2527        return self.expression(
2528            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2529        )
2530
2531    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2532        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2533            return None
2534
2535        elements = defaultdict(list)
2536
2537        while True:
2538            expressions = self._parse_csv(self._parse_conjunction)
2539            if expressions:
2540                elements["expressions"].extend(expressions)
2541
2542            grouping_sets = self._parse_grouping_sets()
2543            if grouping_sets:
2544                elements["grouping_sets"].extend(grouping_sets)
2545
2546            rollup = None
2547            cube = None
2548            totals = None
2549
2550            with_ = self._match(TokenType.WITH)
2551            if self._match(TokenType.ROLLUP):
2552                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2553                elements["rollup"].extend(ensure_list(rollup))
2554
2555            if self._match(TokenType.CUBE):
2556                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2557                elements["cube"].extend(ensure_list(cube))
2558
2559            if self._match_text_seq("TOTALS"):
2560                totals = True
2561                elements["totals"] = True  # type: ignore
2562
2563            if not (grouping_sets or rollup or cube or totals):
2564                break
2565
2566        return self.expression(exp.Group, **elements)  # type: ignore
2567
2568    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2569        if not self._match(TokenType.GROUPING_SETS):
2570            return None
2571
2572        return self._parse_wrapped_csv(self._parse_grouping_set)
2573
2574    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2575        if self._match(TokenType.L_PAREN):
2576            grouping_set = self._parse_csv(self._parse_column)
2577            self._match_r_paren()
2578            return self.expression(exp.Tuple, expressions=grouping_set)
2579
2580        return self._parse_column()
2581
2582    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2583        if not skip_having_token and not self._match(TokenType.HAVING):
2584            return None
2585        return self.expression(exp.Having, this=self._parse_conjunction())
2586
2587    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2588        if not self._match(TokenType.QUALIFY):
2589            return None
2590        return self.expression(exp.Qualify, this=self._parse_conjunction())
2591
2592    def _parse_order(
2593        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2594    ) -> t.Optional[exp.Expression]:
2595        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2596            return this
2597
2598        return self.expression(
2599            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2600        )
2601
2602    def _parse_sort(
2603        self, exp_class: t.Type[exp.Expression], *texts: str
2604    ) -> t.Optional[exp.Expression]:
2605        if not self._match_text_seq(*texts):
2606            return None
2607        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2608
2609    def _parse_ordered(self) -> exp.Expression:
2610        this = self._parse_conjunction()
2611        self._match(TokenType.ASC)
2612        is_desc = self._match(TokenType.DESC)
2613        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
2614        is_nulls_last = self._match_text_seq("NULLS", "LAST")
2615        desc = is_desc or False
2616        asc = not desc
2617        nulls_first = is_nulls_first or False
2618        explicitly_null_ordered = is_nulls_first or is_nulls_last
2619        if (
2620            not explicitly_null_ordered
2621            and (
2622                (asc and self.null_ordering == "nulls_are_small")
2623                or (desc and self.null_ordering != "nulls_are_small")
2624            )
2625            and self.null_ordering != "nulls_are_last"
2626        ):
2627            nulls_first = True
2628
2629        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2630
2631    def _parse_limit(
2632        self, this: t.Optional[exp.Expression] = None, top: bool = False
2633    ) -> t.Optional[exp.Expression]:
2634        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2635            limit_paren = self._match(TokenType.L_PAREN)
2636            limit_exp = self.expression(
2637                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2638            )
2639
2640            if limit_paren:
2641                self._match_r_paren()
2642
2643            return limit_exp
2644
2645        if self._match(TokenType.FETCH):
2646            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2647            direction = self._prev.text if direction else "FIRST"
2648
2649            count = self._parse_number()
2650            percent = self._match(TokenType.PERCENT)
2651
2652            self._match_set((TokenType.ROW, TokenType.ROWS))
2653
2654            only = self._match_text_seq("ONLY")
2655            with_ties = self._match_text_seq("WITH", "TIES")
2656
2657            if only and with_ties:
2658                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2659
2660            return self.expression(
2661                exp.Fetch,
2662                direction=direction,
2663                count=count,
2664                percent=percent,
2665                with_ties=with_ties,
2666            )
2667
2668        return this
2669
2670    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2671        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2672            return this
2673
2674        count = self._parse_number()
2675        self._match_set((TokenType.ROW, TokenType.ROWS))
2676        return self.expression(exp.Offset, this=this, expression=count)
2677
2678    def _parse_locks(self) -> t.List[exp.Expression]:
2679        # Lists are invariant, so we need to use a type hint here
2680        locks: t.List[exp.Expression] = []
2681
2682        while True:
2683            if self._match_text_seq("FOR", "UPDATE"):
2684                update = True
2685            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2686                "LOCK", "IN", "SHARE", "MODE"
2687            ):
2688                update = False
2689            else:
2690                break
2691
2692            expressions = None
2693            if self._match_text_seq("OF"):
2694                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2695
2696            wait: t.Optional[bool | exp.Expression] = None
2697            if self._match_text_seq("NOWAIT"):
2698                wait = True
2699            elif self._match_text_seq("WAIT"):
2700                wait = self._parse_primary()
2701            elif self._match_text_seq("SKIP", "LOCKED"):
2702                wait = False
2703
2704            locks.append(
2705                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2706            )
2707
2708        return locks
2709
2710    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2711        if not self._match_set(self.SET_OPERATIONS):
2712            return this
2713
2714        token_type = self._prev.token_type
2715
2716        if token_type == TokenType.UNION:
2717            expression = exp.Union
2718        elif token_type == TokenType.EXCEPT:
2719            expression = exp.Except
2720        else:
2721            expression = exp.Intersect
2722
2723        return self.expression(
2724            expression,
2725            this=this,
2726            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2727            expression=self._parse_set_operations(self._parse_select(nested=True)),
2728        )
2729
2730    def _parse_expression(self) -> t.Optional[exp.Expression]:
2731        return self._parse_alias(self._parse_conjunction())
2732
2733    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2734        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2735
2736    def _parse_equality(self) -> t.Optional[exp.Expression]:
2737        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2738
2739    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2740        return self._parse_tokens(self._parse_range, self.COMPARISON)
2741
2742    def _parse_range(self) -> t.Optional[exp.Expression]:
2743        this = self._parse_bitwise()
2744        negate = self._match(TokenType.NOT)
2745
2746        if self._match_set(self.RANGE_PARSERS):
2747            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2748            if not expression:
2749                return this
2750
2751            this = expression
2752        elif self._match(TokenType.ISNULL):
2753            this = self.expression(exp.Is, this=this, expression=exp.Null())
2754
2755        # Postgres supports ISNULL and NOTNULL for conditions.
2756        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2757        if self._match(TokenType.NOTNULL):
2758            this = self.expression(exp.Is, this=this, expression=exp.Null())
2759            this = self.expression(exp.Not, this=this)
2760
2761        if negate:
2762            this = self.expression(exp.Not, this=this)
2763
2764        if self._match(TokenType.IS):
2765            this = self._parse_is(this)
2766
2767        return this
2768
2769    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2770        index = self._index - 1
2771        negate = self._match(TokenType.NOT)
2772        if self._match_text_seq("DISTINCT", "FROM"):
2773            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2774            return self.expression(klass, this=this, expression=self._parse_expression())
2775
2776        expression = self._parse_null() or self._parse_boolean()
2777        if not expression:
2778            self._retreat(index)
2779            return None
2780
2781        this = self.expression(exp.Is, this=this, expression=expression)
2782        return self.expression(exp.Not, this=this) if negate else this
2783
2784    def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In:
2785        unnest = self._parse_unnest()
2786        if unnest:
2787            this = self.expression(exp.In, this=this, unnest=unnest)
2788        elif self._match(TokenType.L_PAREN):
2789            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
2790
2791            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2792                this = self.expression(exp.In, this=this, query=expressions[0])
2793            else:
2794                this = self.expression(exp.In, this=this, expressions=expressions)
2795
2796            self._match_r_paren(this)
2797        else:
2798            this = self.expression(exp.In, this=this, field=self._parse_field())
2799
2800        return this
2801
2802    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2803        low = self._parse_bitwise()
2804        self._match(TokenType.AND)
2805        high = self._parse_bitwise()
2806        return self.expression(exp.Between, this=this, low=low, high=high)
2807
2808    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2809        if not self._match(TokenType.ESCAPE):
2810            return this
2811        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2812
2813    def _parse_interval(self) -> t.Optional[exp.Expression]:
2814        if not self._match(TokenType.INTERVAL):
2815            return None
2816
2817        this = self._parse_primary() or self._parse_term()
2818        unit = self._parse_function() or self._parse_var()
2819
2820        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2821        # each INTERVAL expression into this canonical form so it's easy to transpile
2822        if this and this.is_number:
2823            this = exp.Literal.string(this.name)
2824        elif this and this.is_string:
2825            parts = this.name.split()
2826
2827            if len(parts) == 2:
2828                if unit:
2829                    # this is not actually a unit, it's something else
2830                    unit = None
2831                    self._retreat(self._index - 1)
2832                else:
2833                    this = exp.Literal.string(parts[0])
2834                    unit = self.expression(exp.Var, this=parts[1])
2835
2836        return self.expression(exp.Interval, this=this, unit=unit)
2837
2838    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2839        this = self._parse_term()
2840
2841        while True:
2842            if self._match_set(self.BITWISE):
2843                this = self.expression(
2844                    self.BITWISE[self._prev.token_type],
2845                    this=this,
2846                    expression=self._parse_term(),
2847                )
2848            elif self._match_pair(TokenType.LT, TokenType.LT):
2849                this = self.expression(
2850                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2851                )
2852            elif self._match_pair(TokenType.GT, TokenType.GT):
2853                this = self.expression(
2854                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2855                )
2856            else:
2857                break
2858
2859        return this
2860
2861    def _parse_term(self) -> t.Optional[exp.Expression]:
2862        return self._parse_tokens(self._parse_factor, self.TERM)
2863
2864    def _parse_factor(self) -> t.Optional[exp.Expression]:
2865        return self._parse_tokens(self._parse_unary, self.FACTOR)
2866
2867    def _parse_unary(self) -> t.Optional[exp.Expression]:
2868        if self._match_set(self.UNARY_PARSERS):
2869            return self.UNARY_PARSERS[self._prev.token_type](self)
2870        return self._parse_at_time_zone(self._parse_type())
2871
2872    def _parse_type(self) -> t.Optional[exp.Expression]:
2873        interval = self._parse_interval()
2874        if interval:
2875            return interval
2876
2877        index = self._index
2878        data_type = self._parse_types(check_func=True)
2879        this = self._parse_column()
2880
2881        if data_type:
2882            if isinstance(this, exp.Literal):
2883                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2884                if parser:
2885                    return parser(self, this, data_type)
2886                return self.expression(exp.Cast, this=this, to=data_type)
2887            if not data_type.expressions:
2888                self._retreat(index)
2889                return self._parse_column()
2890            return self._parse_column_ops(data_type)
2891
2892        return this
2893
2894    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2895        this = self._parse_type()
2896        if not this:
2897            return None
2898
2899        return self.expression(
2900            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2901        )
2902
2903    def _parse_types(
2904        self, check_func: bool = False, schema: bool = False
2905    ) -> t.Optional[exp.Expression]:
2906        index = self._index
2907
2908        prefix = self._match_text_seq("SYSUDTLIB", ".")
2909
2910        if not self._match_set(self.TYPE_TOKENS):
2911            return None
2912
2913        type_token = self._prev.token_type
2914
2915        if type_token == TokenType.PSEUDO_TYPE:
2916            return self.expression(exp.PseudoType, this=self._prev.text)
2917
2918        nested = type_token in self.NESTED_TYPE_TOKENS
2919        is_struct = type_token == TokenType.STRUCT
2920        expressions = None
2921        maybe_func = False
2922
2923        if self._match(TokenType.L_PAREN):
2924            if is_struct:
2925                expressions = self._parse_csv(self._parse_struct_types)
2926            elif nested:
2927                expressions = self._parse_csv(
2928                    lambda: self._parse_types(check_func=check_func, schema=schema)
2929                )
2930            else:
2931                expressions = self._parse_csv(self._parse_type_size)
2932
2933            if not expressions or not self._match(TokenType.R_PAREN):
2934                self._retreat(index)
2935                return None
2936
2937            maybe_func = True
2938
2939        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2940            this = exp.DataType(
2941                this=exp.DataType.Type.ARRAY,
2942                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2943                nested=True,
2944            )
2945
2946            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2947                this = exp.DataType(
2948                    this=exp.DataType.Type.ARRAY,
2949                    expressions=[this],
2950                    nested=True,
2951                )
2952
2953            return this
2954
2955        if self._match(TokenType.L_BRACKET):
2956            self._retreat(index)
2957            return None
2958
2959        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2960        if nested and self._match(TokenType.LT):
2961            if is_struct:
2962                expressions = self._parse_csv(self._parse_struct_types)
2963            else:
2964                expressions = self._parse_csv(
2965                    lambda: self._parse_types(check_func=check_func, schema=schema)
2966                )
2967
2968            if not self._match(TokenType.GT):
2969                self.raise_error("Expecting >")
2970
2971            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2972                values = self._parse_csv(self._parse_conjunction)
2973                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2974
2975        value: t.Optional[exp.Expression] = None
2976        if type_token in self.TIMESTAMPS:
2977            if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ:
2978                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2979            elif (
2980                self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE")
2981                or type_token == TokenType.TIMESTAMPLTZ
2982            ):
2983                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2984            elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
2985                if type_token == TokenType.TIME:
2986                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2987                else:
2988                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2989
2990            maybe_func = maybe_func and value is None
2991
2992            if value is None:
2993                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2994        elif type_token == TokenType.INTERVAL:
2995            unit = self._parse_var()
2996
2997            if not unit:
2998                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2999            else:
3000                value = self.expression(exp.Interval, unit=unit)
3001
3002        if maybe_func and check_func:
3003            index2 = self._index
3004            peek = self._parse_string()
3005
3006            if not peek:
3007                self._retreat(index)
3008                return None
3009
3010            self._retreat(index2)
3011
3012        if value:
3013            return value
3014
3015        return exp.DataType(
3016            this=exp.DataType.Type[type_token.value.upper()],
3017            expressions=expressions,
3018            nested=nested,
3019            values=values,
3020            prefix=prefix,
3021        )
3022
3023    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
3024        this = self._parse_type() or self._parse_id_var()
3025        self._match(TokenType.COLON)
3026        return self._parse_column_def(this)
3027
3028    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3029        if not self._match_text_seq("AT", "TIME", "ZONE"):
3030            return this
3031        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
3032
3033    def _parse_column(self) -> t.Optional[exp.Expression]:
3034        this = self._parse_field()
3035        if isinstance(this, exp.Identifier):
3036            this = self.expression(exp.Column, this=this)
3037        elif not this:
3038            return self._parse_bracket(this)
3039        return self._parse_column_ops(this)
3040
3041    def _parse_column_ops(self, this: exp.Expression) -> exp.Expression:
3042        this = self._parse_bracket(this)
3043
3044        while self._match_set(self.COLUMN_OPERATORS):
3045            op_token = self._prev.token_type
3046            op = self.COLUMN_OPERATORS.get(op_token)
3047
3048            if op_token == TokenType.DCOLON:
3049                field = self._parse_types()
3050                if not field:
3051                    self.raise_error("Expected type")
3052            elif op and self._curr:
3053                self._advance()
3054                value = self._prev.text
3055                field = (
3056                    exp.Literal.number(value)
3057                    if self._prev.token_type == TokenType.NUMBER
3058                    else exp.Literal.string(value)
3059                )
3060            else:
3061                field = self._parse_field(anonymous_func=True)
3062
3063            if isinstance(field, exp.Func):
3064                # bigquery allows function calls like x.y.count(...)
3065                # SAFE.SUBSTR(...)
3066                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3067                this = self._replace_columns_with_dots(this)
3068
3069            if op:
3070                this = op(self, this, field)
3071            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3072                this = self.expression(
3073                    exp.Column,
3074                    this=field,
3075                    table=this.this,
3076                    db=this.args.get("table"),
3077                    catalog=this.args.get("db"),
3078                )
3079            else:
3080                this = self.expression(exp.Dot, this=this, expression=field)
3081            this = self._parse_bracket(this)
3082        return this
3083
3084    def _parse_primary(self) -> t.Optional[exp.Expression]:
3085        if self._match_set(self.PRIMARY_PARSERS):
3086            token_type = self._prev.token_type
3087            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3088
3089            if token_type == TokenType.STRING:
3090                expressions = [primary]
3091                while self._match(TokenType.STRING):
3092                    expressions.append(exp.Literal.string(self._prev.text))
3093                if len(expressions) > 1:
3094                    return self.expression(exp.Concat, expressions=expressions)
3095            return primary
3096
3097        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3098            return exp.Literal.number(f"0.{self._prev.text}")
3099
3100        if self._match(TokenType.L_PAREN):
3101            comments = self._prev_comments
3102            query = self._parse_select()
3103
3104            if query:
3105                expressions = [query]
3106            else:
3107                expressions = self._parse_csv(self._parse_expression)
3108
3109            this = self._parse_query_modifiers(seq_get(expressions, 0))
3110
3111            if isinstance(this, exp.Subqueryable):
3112                this = self._parse_set_operations(
3113                    self._parse_subquery(this=this, parse_alias=False)
3114                )
3115            elif len(expressions) > 1:
3116                this = self.expression(exp.Tuple, expressions=expressions)
3117            else:
3118                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3119
3120            if this:
3121                this.add_comments(comments)
3122            self._match_r_paren(expression=this)
3123
3124            return this
3125
3126        return None
3127
3128    def _parse_field(
3129        self,
3130        any_token: bool = False,
3131        tokens: t.Optional[t.Collection[TokenType]] = None,
3132        anonymous_func: bool = False,
3133    ) -> t.Optional[exp.Expression]:
3134        return (
3135            self._parse_primary()
3136            or self._parse_function(anonymous=anonymous_func)
3137            or self._parse_id_var(any_token=any_token, tokens=tokens)
3138        )
3139
3140    def _parse_function(
3141        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3142    ) -> t.Optional[exp.Expression]:
3143        if not self._curr:
3144            return None
3145
3146        token_type = self._curr.token_type
3147
3148        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3149            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3150
3151        if not self._next or self._next.token_type != TokenType.L_PAREN:
3152            if token_type in self.NO_PAREN_FUNCTIONS:
3153                self._advance()
3154                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3155
3156            return None
3157
3158        if token_type not in self.FUNC_TOKENS:
3159            return None
3160
3161        this = self._curr.text
3162        upper = this.upper()
3163        self._advance(2)
3164
3165        parser = self.FUNCTION_PARSERS.get(upper)
3166
3167        if parser and not anonymous:
3168            this = parser(self)
3169        else:
3170            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3171
3172            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3173                this = self.expression(subquery_predicate, this=self._parse_select())
3174                self._match_r_paren()
3175                return this
3176
3177            if functions is None:
3178                functions = self.FUNCTIONS
3179
3180            function = functions.get(upper)
3181
3182            alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS
3183            args = self._parse_csv(lambda: self._parse_lambda(alias=alias))
3184
3185            if function and not anonymous:
3186                this = function(args)
3187                self.validate_expression(this, args)
3188            else:
3189                this = self.expression(exp.Anonymous, this=this, expressions=args)
3190
3191        self._match_r_paren(this)
3192        return self._parse_window(this)
3193
3194    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3195        return self._parse_column_def(self._parse_id_var())
3196
3197    def _parse_user_defined_function(
3198        self, kind: t.Optional[TokenType] = None
3199    ) -> t.Optional[exp.Expression]:
3200        this = self._parse_id_var()
3201
3202        while self._match(TokenType.DOT):
3203            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3204
3205        if not self._match(TokenType.L_PAREN):
3206            return this
3207
3208        expressions = self._parse_csv(self._parse_function_parameter)
3209        self._match_r_paren()
3210        return self.expression(
3211            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3212        )
3213
3214    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3215        literal = self._parse_primary()
3216        if literal:
3217            return self.expression(exp.Introducer, this=token.text, expression=literal)
3218
3219        return self.expression(exp.Identifier, this=token.text)
3220
3221    def _parse_session_parameter(self) -> exp.Expression:
3222        kind = None
3223        this = self._parse_id_var() or self._parse_primary()
3224
3225        if this and self._match(TokenType.DOT):
3226            kind = this.name
3227            this = self._parse_var() or self._parse_primary()
3228
3229        return self.expression(exp.SessionParameter, this=this, kind=kind)
3230
3231    def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]:
3232        index = self._index
3233
3234        if self._match(TokenType.L_PAREN):
3235            expressions = self._parse_csv(self._parse_id_var)
3236
3237            if not self._match(TokenType.R_PAREN):
3238                self._retreat(index)
3239        else:
3240            expressions = [self._parse_id_var()]
3241
3242        if self._match_set(self.LAMBDAS):
3243            return self.LAMBDAS[self._prev.token_type](self, expressions)
3244
3245        self._retreat(index)
3246
3247        this: t.Optional[exp.Expression]
3248
3249        if self._match(TokenType.DISTINCT):
3250            this = self.expression(
3251                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3252            )
3253        else:
3254            this = self._parse_select_or_expression(alias=alias)
3255
3256            if isinstance(this, exp.EQ):
3257                left = this.this
3258                if isinstance(left, exp.Column):
3259                    left.replace(exp.Var(this=left.text("this")))
3260
3261        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3262
3263    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3264        index = self._index
3265
3266        if not self.errors:
3267            try:
3268                if self._parse_select(nested=True):
3269                    return this
3270            except ParseError:
3271                pass
3272            finally:
3273                self.errors.clear()
3274                self._retreat(index)
3275
3276        if not self._match(TokenType.L_PAREN):
3277            return this
3278
3279        args = self._parse_csv(
3280            lambda: self._parse_constraint()
3281            or self._parse_column_def(self._parse_field(any_token=True))
3282        )
3283        self._match_r_paren()
3284        return self.expression(exp.Schema, this=this, expressions=args)
3285
3286    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3287        # column defs are not really columns, they're identifiers
3288        if isinstance(this, exp.Column):
3289            this = this.this
3290        kind = self._parse_types(schema=True)
3291
3292        if self._match_text_seq("FOR", "ORDINALITY"):
3293            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3294
3295        constraints = []
3296        while True:
3297            constraint = self._parse_column_constraint()
3298            if not constraint:
3299                break
3300            constraints.append(constraint)
3301
3302        if not kind and not constraints:
3303            return this
3304
3305        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3306
3307    def _parse_auto_increment(self) -> exp.Expression:
3308        start = None
3309        increment = None
3310
3311        if self._match(TokenType.L_PAREN, advance=False):
3312            args = self._parse_wrapped_csv(self._parse_bitwise)
3313            start = seq_get(args, 0)
3314            increment = seq_get(args, 1)
3315        elif self._match_text_seq("START"):
3316            start = self._parse_bitwise()
3317            self._match_text_seq("INCREMENT")
3318            increment = self._parse_bitwise()
3319
3320        if start and increment:
3321            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3322
3323        return exp.AutoIncrementColumnConstraint()
3324
3325    def _parse_compress(self) -> exp.Expression:
3326        if self._match(TokenType.L_PAREN, advance=False):
3327            return self.expression(
3328                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3329            )
3330
3331        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3332
3333    def _parse_generated_as_identity(self) -> exp.Expression:
3334        if self._match_text_seq("BY", "DEFAULT"):
3335            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3336            this = self.expression(
3337                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3338            )
3339        else:
3340            self._match_text_seq("ALWAYS")
3341            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3342
3343        self._match(TokenType.ALIAS)
3344        identity = self._match_text_seq("IDENTITY")
3345
3346        if self._match(TokenType.L_PAREN):
3347            if self._match_text_seq("START", "WITH"):
3348                this.set("start", self._parse_bitwise())
3349            if self._match_text_seq("INCREMENT", "BY"):
3350                this.set("increment", self._parse_bitwise())
3351            if self._match_text_seq("MINVALUE"):
3352                this.set("minvalue", self._parse_bitwise())
3353            if self._match_text_seq("MAXVALUE"):
3354                this.set("maxvalue", self._parse_bitwise())
3355
3356            if self._match_text_seq("CYCLE"):
3357                this.set("cycle", True)
3358            elif self._match_text_seq("NO", "CYCLE"):
3359                this.set("cycle", False)
3360
3361            if not identity:
3362                this.set("expression", self._parse_bitwise())
3363
3364            self._match_r_paren()
3365
3366        return this
3367
3368    def _parse_inline(self) -> t.Optional[exp.Expression]:
3369        self._match_text_seq("LENGTH")
3370        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3371
3372    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3373        if self._match_text_seq("NULL"):
3374            return self.expression(exp.NotNullColumnConstraint)
3375        if self._match_text_seq("CASESPECIFIC"):
3376            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3377        return None
3378
3379    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3380        if self._match(TokenType.CONSTRAINT):
3381            this = self._parse_id_var()
3382        else:
3383            this = None
3384
3385        if self._match_texts(self.CONSTRAINT_PARSERS):
3386            return self.expression(
3387                exp.ColumnConstraint,
3388                this=this,
3389                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3390            )
3391
3392        return this
3393
3394    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3395        if not self._match(TokenType.CONSTRAINT):
3396            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3397
3398        this = self._parse_id_var()
3399        expressions = []
3400
3401        while True:
3402            constraint = self._parse_unnamed_constraint() or self._parse_function()
3403            if not constraint:
3404                break
3405            expressions.append(constraint)
3406
3407        return self.expression(exp.Constraint, this=this, expressions=expressions)
3408
3409    def _parse_unnamed_constraint(
3410        self, constraints: t.Optional[t.Collection[str]] = None
3411    ) -> t.Optional[exp.Expression]:
3412        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3413            return None
3414
3415        constraint = self._prev.text.upper()
3416        if constraint not in self.CONSTRAINT_PARSERS:
3417            self.raise_error(f"No parser found for schema constraint {constraint}.")
3418
3419        return self.CONSTRAINT_PARSERS[constraint](self)
3420
3421    def _parse_unique(self) -> exp.Expression:
3422        self._match_text_seq("KEY")
3423        return self.expression(
3424            exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False))
3425        )
3426
3427    def _parse_key_constraint_options(self) -> t.List[str]:
3428        options = []
3429        while True:
3430            if not self._curr:
3431                break
3432
3433            if self._match(TokenType.ON):
3434                action = None
3435                on = self._advance_any() and self._prev.text
3436
3437                if self._match_text_seq("NO", "ACTION"):
3438                    action = "NO ACTION"
3439                elif self._match_text_seq("CASCADE"):
3440                    action = "CASCADE"
3441                elif self._match_pair(TokenType.SET, TokenType.NULL):
3442                    action = "SET NULL"
3443                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3444                    action = "SET DEFAULT"
3445                else:
3446                    self.raise_error("Invalid key constraint")
3447
3448                options.append(f"ON {on} {action}")
3449            elif self._match_text_seq("NOT", "ENFORCED"):
3450                options.append("NOT ENFORCED")
3451            elif self._match_text_seq("DEFERRABLE"):
3452                options.append("DEFERRABLE")
3453            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3454                options.append("INITIALLY DEFERRED")
3455            elif self._match_text_seq("NORELY"):
3456                options.append("NORELY")
3457            elif self._match_text_seq("MATCH", "FULL"):
3458                options.append("MATCH FULL")
3459            else:
3460                break
3461
3462        return options
3463
3464    def _parse_references(self, match: bool = True) -> t.Optional[exp.Expression]:
3465        if match and not self._match(TokenType.REFERENCES):
3466            return None
3467
3468        expressions = None
3469        this = self._parse_id_var()
3470
3471        if self._match(TokenType.L_PAREN, advance=False):
3472            expressions = self._parse_wrapped_id_vars()
3473
3474        options = self._parse_key_constraint_options()
3475        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3476
3477    def _parse_foreign_key(self) -> exp.Expression:
3478        expressions = self._parse_wrapped_id_vars()
3479        reference = self._parse_references()
3480        options = {}
3481
3482        while self._match(TokenType.ON):
3483            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3484                self.raise_error("Expected DELETE or UPDATE")
3485
3486            kind = self._prev.text.lower()
3487
3488            if self._match_text_seq("NO", "ACTION"):
3489                action = "NO ACTION"
3490            elif self._match(TokenType.SET):
3491                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3492                action = "SET " + self._prev.text.upper()
3493            else:
3494                self._advance()
3495                action = self._prev.text.upper()
3496
3497            options[kind] = action
3498
3499        return self.expression(
3500            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3501        )
3502
3503    def _parse_primary_key(
3504        self, wrapped_optional: bool = False, in_props: bool = False
3505    ) -> exp.Expression:
3506        desc = (
3507            self._match_set((TokenType.ASC, TokenType.DESC))
3508            and self._prev.token_type == TokenType.DESC
3509        )
3510
3511        if not in_props and not self._match(TokenType.L_PAREN, advance=False):
3512            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3513
3514        expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional)
3515        options = self._parse_key_constraint_options()
3516        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3517
3518    @t.overload
3519    def _parse_bracket(self, this: exp.Expression) -> exp.Expression:
3520        ...
3521
3522    @t.overload
3523    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3524        ...
3525
3526    def _parse_bracket(self, this):
3527        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3528            return this
3529
3530        bracket_kind = self._prev.token_type
3531
3532        if self._match(TokenType.COLON):
3533            expressions: t.List[t.Optional[exp.Expression]] = [
3534                self.expression(exp.Slice, expression=self._parse_conjunction())
3535            ]
3536        else:
3537            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3538
3539        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3540        if bracket_kind == TokenType.L_BRACE:
3541            this = self.expression(exp.Struct, expressions=expressions)
3542        elif not this or this.name.upper() == "ARRAY":
3543            this = self.expression(exp.Array, expressions=expressions)
3544        else:
3545            expressions = apply_index_offset(this, expressions, -self.index_offset)
3546            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3547
3548        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3549            self.raise_error("Expected ]")
3550        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3551            self.raise_error("Expected }")
3552
3553        self._add_comments(this)
3554        return self._parse_bracket(this)
3555
3556    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3557        if self._match(TokenType.COLON):
3558            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3559        return this
3560
3561    def _parse_case(self) -> t.Optional[exp.Expression]:
3562        ifs = []
3563        default = None
3564
3565        expression = self._parse_conjunction()
3566
3567        while self._match(TokenType.WHEN):
3568            this = self._parse_conjunction()
3569            self._match(TokenType.THEN)
3570            then = self._parse_conjunction()
3571            ifs.append(self.expression(exp.If, this=this, true=then))
3572
3573        if self._match(TokenType.ELSE):
3574            default = self._parse_conjunction()
3575
3576        if not self._match(TokenType.END):
3577            self.raise_error("Expected END after CASE", self._prev)
3578
3579        return self._parse_window(
3580            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3581        )
3582
3583    def _parse_if(self) -> t.Optional[exp.Expression]:
3584        if self._match(TokenType.L_PAREN):
3585            args = self._parse_csv(self._parse_conjunction)
3586            this = exp.If.from_arg_list(args)
3587            self.validate_expression(this, args)
3588            self._match_r_paren()
3589        else:
3590            index = self._index - 1
3591            condition = self._parse_conjunction()
3592
3593            if not condition:
3594                self._retreat(index)
3595                return None
3596
3597            self._match(TokenType.THEN)
3598            true = self._parse_conjunction()
3599            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3600            self._match(TokenType.END)
3601            this = self.expression(exp.If, this=condition, true=true, false=false)
3602
3603        return self._parse_window(this)
3604
3605    def _parse_extract(self) -> exp.Expression:
3606        this = self._parse_function() or self._parse_var() or self._parse_type()
3607
3608        if self._match(TokenType.FROM):
3609            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3610
3611        if not self._match(TokenType.COMMA):
3612            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3613
3614        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3615
3616    def _parse_cast(self, strict: bool) -> exp.Expression:
3617        this = self._parse_conjunction()
3618
3619        if not self._match(TokenType.ALIAS):
3620            if self._match(TokenType.COMMA):
3621                return self.expression(
3622                    exp.CastToStrType, this=this, expression=self._parse_string()
3623                )
3624            else:
3625                self.raise_error("Expected AS after CAST")
3626
3627        to = self._parse_types()
3628
3629        if not to:
3630            self.raise_error("Expected TYPE after CAST")
3631        elif to.this == exp.DataType.Type.CHAR:
3632            if self._match(TokenType.CHARACTER_SET):
3633                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3634
3635        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3636
3637    def _parse_string_agg(self) -> exp.Expression:
3638        expression: t.Optional[exp.Expression]
3639
3640        if self._match(TokenType.DISTINCT):
3641            args = self._parse_csv(self._parse_conjunction)
3642            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3643        else:
3644            args = self._parse_csv(self._parse_conjunction)
3645            expression = seq_get(args, 0)
3646
3647        index = self._index
3648        if not self._match(TokenType.R_PAREN):
3649            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3650            order = self._parse_order(this=expression)
3651            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3652
3653        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3654        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3655        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3656        if not self._match_text_seq("WITHIN", "GROUP"):
3657            self._retreat(index)
3658            this = exp.GroupConcat.from_arg_list(args)
3659            self.validate_expression(this, args)
3660            return this
3661
3662        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3663        order = self._parse_order(this=expression)
3664        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3665
3666    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3667        to: t.Optional[exp.Expression]
3668        this = self._parse_bitwise()
3669
3670        if self._match(TokenType.USING):
3671            to = self.expression(exp.CharacterSet, this=self._parse_var())
3672        elif self._match(TokenType.COMMA):
3673            to = self._parse_bitwise()
3674        else:
3675            to = None
3676
3677        # Swap the argument order if needed to produce the correct AST
3678        if self.CONVERT_TYPE_FIRST:
3679            this, to = to, this
3680
3681        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3682
3683    def _parse_decode(self) -> t.Optional[exp.Expression]:
3684        """
3685        There are generally two variants of the DECODE function:
3686
3687        - DECODE(bin, charset)
3688        - DECODE(expression, search, result [, search, result] ... [, default])
3689
3690        The second variant will always be parsed into a CASE expression. Note that NULL
3691        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3692        instead of relying on pattern matching.
3693        """
3694        args = self._parse_csv(self._parse_conjunction)
3695
3696        if len(args) < 3:
3697            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3698
3699        expression, *expressions = args
3700        if not expression:
3701            return None
3702
3703        ifs = []
3704        for search, result in zip(expressions[::2], expressions[1::2]):
3705            if not search or not result:
3706                return None
3707
3708            if isinstance(search, exp.Literal):
3709                ifs.append(
3710                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3711                )
3712            elif isinstance(search, exp.Null):
3713                ifs.append(
3714                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3715                )
3716            else:
3717                cond = exp.or_(
3718                    exp.EQ(this=expression.copy(), expression=search),
3719                    exp.and_(
3720                        exp.Is(this=expression.copy(), expression=exp.Null()),
3721                        exp.Is(this=search.copy(), expression=exp.Null()),
3722                        copy=False,
3723                    ),
3724                    copy=False,
3725                )
3726                ifs.append(exp.If(this=cond, true=result))
3727
3728        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3729
3730    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3731        self._match_text_seq("KEY")
3732        key = self._parse_field()
3733        self._match(TokenType.COLON)
3734        self._match_text_seq("VALUE")
3735        value = self._parse_field()
3736        if not key and not value:
3737            return None
3738        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3739
3740    def _parse_json_object(self) -> exp.Expression:
3741        expressions = self._parse_csv(self._parse_json_key_value)
3742
3743        null_handling = None
3744        if self._match_text_seq("NULL", "ON", "NULL"):
3745            null_handling = "NULL ON NULL"
3746        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3747            null_handling = "ABSENT ON NULL"
3748
3749        unique_keys = None
3750        if self._match_text_seq("WITH", "UNIQUE"):
3751            unique_keys = True
3752        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3753            unique_keys = False
3754
3755        self._match_text_seq("KEYS")
3756
3757        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3758        format_json = self._match_text_seq("FORMAT", "JSON")
3759        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3760
3761        return self.expression(
3762            exp.JSONObject,
3763            expressions=expressions,
3764            null_handling=null_handling,
3765            unique_keys=unique_keys,
3766            return_type=return_type,
3767            format_json=format_json,
3768            encoding=encoding,
3769        )
3770
3771    def _parse_logarithm(self) -> exp.Expression:
3772        # Default argument order is base, expression
3773        args = self._parse_csv(self._parse_range)
3774
3775        if len(args) > 1:
3776            if not self.LOG_BASE_FIRST:
3777                args.reverse()
3778            return exp.Log.from_arg_list(args)
3779
3780        return self.expression(
3781            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3782        )
3783
3784    def _parse_match_against(self) -> exp.Expression:
3785        expressions = self._parse_csv(self._parse_column)
3786
3787        self._match_text_seq(")", "AGAINST", "(")
3788
3789        this = self._parse_string()
3790
3791        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3792            modifier = "IN NATURAL LANGUAGE MODE"
3793            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3794                modifier = f"{modifier} WITH QUERY EXPANSION"
3795        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3796            modifier = "IN BOOLEAN MODE"
3797        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3798            modifier = "WITH QUERY EXPANSION"
3799        else:
3800            modifier = None
3801
3802        return self.expression(
3803            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3804        )
3805
3806    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3807    def _parse_open_json(self) -> exp.Expression:
3808        this = self._parse_bitwise()
3809        path = self._match(TokenType.COMMA) and self._parse_string()
3810
3811        def _parse_open_json_column_def() -> exp.Expression:
3812            this = self._parse_field(any_token=True)
3813            kind = self._parse_types()
3814            path = self._parse_string()
3815            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3816            return self.expression(
3817                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3818            )
3819
3820        expressions = None
3821        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3822            self._match_l_paren()
3823            expressions = self._parse_csv(_parse_open_json_column_def)
3824
3825        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3826
3827    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3828        args = self._parse_csv(self._parse_bitwise)
3829
3830        if self._match(TokenType.IN):
3831            return self.expression(
3832                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3833            )
3834
3835        if haystack_first:
3836            haystack = seq_get(args, 0)
3837            needle = seq_get(args, 1)
3838        else:
3839            needle = seq_get(args, 0)
3840            haystack = seq_get(args, 1)
3841
3842        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3843
3844        self.validate_expression(this, args)
3845
3846        return this
3847
3848    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3849        args = self._parse_csv(self._parse_table)
3850        return exp.JoinHint(this=func_name.upper(), expressions=args)
3851
3852    def _parse_substring(self) -> exp.Expression:
3853        # Postgres supports the form: substring(string [from int] [for int])
3854        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3855
3856        args = self._parse_csv(self._parse_bitwise)
3857
3858        if self._match(TokenType.FROM):
3859            args.append(self._parse_bitwise())
3860            if self._match(TokenType.FOR):
3861                args.append(self._parse_bitwise())
3862
3863        this = exp.Substring.from_arg_list(args)
3864        self.validate_expression(this, args)
3865
3866        return this
3867
3868    def _parse_trim(self) -> exp.Expression:
3869        # https://www.w3resource.com/sql/character-functions/trim.php
3870        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3871
3872        position = None
3873        collation = None
3874
3875        if self._match_texts(self.TRIM_TYPES):
3876            position = self._prev.text.upper()
3877
3878        expression = self._parse_bitwise()
3879        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3880            this = self._parse_bitwise()
3881        else:
3882            this = expression
3883            expression = None
3884
3885        if self._match(TokenType.COLLATE):
3886            collation = self._parse_bitwise()
3887
3888        return self.expression(
3889            exp.Trim,
3890            this=this,
3891            position=position,
3892            expression=expression,
3893            collation=collation,
3894        )
3895
3896    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3897        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3898
3899    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3900        return self._parse_window(self._parse_id_var(), alias=True)
3901
3902    def _parse_respect_or_ignore_nulls(
3903        self, this: t.Optional[exp.Expression]
3904    ) -> t.Optional[exp.Expression]:
3905        if self._match_text_seq("IGNORE", "NULLS"):
3906            return self.expression(exp.IgnoreNulls, this=this)
3907        if self._match_text_seq("RESPECT", "NULLS"):
3908            return self.expression(exp.RespectNulls, this=this)
3909        return this
3910
3911    def _parse_window(
3912        self, this: t.Optional[exp.Expression], alias: bool = False
3913    ) -> t.Optional[exp.Expression]:
3914        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3915            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3916            self._match_r_paren()
3917
3918        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3919        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3920        if self._match_text_seq("WITHIN", "GROUP"):
3921            order = self._parse_wrapped(self._parse_order)
3922            this = self.expression(exp.WithinGroup, this=this, expression=order)
3923
3924        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3925        # Some dialects choose to implement and some do not.
3926        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3927
3928        # There is some code above in _parse_lambda that handles
3929        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3930
3931        # The below changes handle
3932        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3933
3934        # Oracle allows both formats
3935        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3936        #   and Snowflake chose to do the same for familiarity
3937        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3938        this = self._parse_respect_or_ignore_nulls(this)
3939
3940        # bigquery select from window x AS (partition by ...)
3941        if alias:
3942            over = None
3943            self._match(TokenType.ALIAS)
3944        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3945            return this
3946        else:
3947            over = self._prev.text.upper()
3948
3949        if not self._match(TokenType.L_PAREN):
3950            return self.expression(
3951                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3952            )
3953
3954        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3955
3956        first = self._match(TokenType.FIRST)
3957        if self._match_text_seq("LAST"):
3958            first = False
3959
3960        partition = self._parse_partition_by()
3961        order = self._parse_order()
3962        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3963
3964        if kind:
3965            self._match(TokenType.BETWEEN)
3966            start = self._parse_window_spec()
3967            self._match(TokenType.AND)
3968            end = self._parse_window_spec()
3969
3970            spec = self.expression(
3971                exp.WindowSpec,
3972                kind=kind,
3973                start=start["value"],
3974                start_side=start["side"],
3975                end=end["value"],
3976                end_side=end["side"],
3977            )
3978        else:
3979            spec = None
3980
3981        self._match_r_paren()
3982
3983        return self.expression(
3984            exp.Window,
3985            this=this,
3986            partition_by=partition,
3987            order=order,
3988            spec=spec,
3989            alias=window_alias,
3990            over=over,
3991            first=first,
3992        )
3993
3994    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3995        self._match(TokenType.BETWEEN)
3996
3997        return {
3998            "value": (
3999                (self._match_text_seq("UNBOUNDED") and "UNBOUNDED")
4000                or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW")
4001                or self._parse_bitwise()
4002            ),
4003            "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text,
4004        }
4005
4006    def _parse_alias(
4007        self, this: t.Optional[exp.Expression], explicit: bool = False
4008    ) -> t.Optional[exp.Expression]:
4009        any_token = self._match(TokenType.ALIAS)
4010
4011        if explicit and not any_token:
4012            return this
4013
4014        if self._match(TokenType.L_PAREN):
4015            aliases = self.expression(
4016                exp.Aliases,
4017                this=this,
4018                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
4019            )
4020            self._match_r_paren(aliases)
4021            return aliases
4022
4023        alias = self._parse_id_var(any_token)
4024
4025        if alias:
4026            return self.expression(exp.Alias, this=this, alias=alias)
4027
4028        return this
4029
4030    def _parse_id_var(
4031        self,
4032        any_token: bool = True,
4033        tokens: t.Optional[t.Collection[TokenType]] = None,
4034    ) -> t.Optional[exp.Expression]:
4035        identifier = self._parse_identifier()
4036
4037        if identifier:
4038            return identifier
4039
4040        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
4041            quoted = self._prev.token_type == TokenType.STRING
4042            return exp.Identifier(this=self._prev.text, quoted=quoted)
4043
4044        return None
4045
4046    def _parse_string(self) -> t.Optional[exp.Expression]:
4047        if self._match(TokenType.STRING):
4048            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
4049        return self._parse_placeholder()
4050
4051    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
4052        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
4053
4054    def _parse_number(self) -> t.Optional[exp.Expression]:
4055        if self._match(TokenType.NUMBER):
4056            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
4057        return self._parse_placeholder()
4058
4059    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4060        if self._match(TokenType.IDENTIFIER):
4061            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4062        return self._parse_placeholder()
4063
4064    def _parse_var(
4065        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4066    ) -> t.Optional[exp.Expression]:
4067        if (
4068            (any_token and self._advance_any())
4069            or self._match(TokenType.VAR)
4070            or (self._match_set(tokens) if tokens else False)
4071        ):
4072            return self.expression(exp.Var, this=self._prev.text)
4073        return self._parse_placeholder()
4074
4075    def _advance_any(self) -> t.Optional[Token]:
4076        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4077            self._advance()
4078            return self._prev
4079        return None
4080
4081    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4082        return self._parse_var() or self._parse_string()
4083
4084    def _parse_null(self) -> t.Optional[exp.Expression]:
4085        if self._match(TokenType.NULL):
4086            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4087        return None
4088
4089    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4090        if self._match(TokenType.TRUE):
4091            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4092        if self._match(TokenType.FALSE):
4093            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4094        return None
4095
4096    def _parse_star(self) -> t.Optional[exp.Expression]:
4097        if self._match(TokenType.STAR):
4098            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4099        return None
4100
4101    def _parse_parameter(self) -> exp.Expression:
4102        wrapped = self._match(TokenType.L_BRACE)
4103        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4104        self._match(TokenType.R_BRACE)
4105        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4106
4107    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4108        if self._match_set(self.PLACEHOLDER_PARSERS):
4109            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4110            if placeholder:
4111                return placeholder
4112            self._advance(-1)
4113        return None
4114
4115    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4116        if not self._match(TokenType.EXCEPT):
4117            return None
4118        if self._match(TokenType.L_PAREN, advance=False):
4119            return self._parse_wrapped_csv(self._parse_column)
4120        return self._parse_csv(self._parse_column)
4121
4122    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4123        if not self._match(TokenType.REPLACE):
4124            return None
4125        if self._match(TokenType.L_PAREN, advance=False):
4126            return self._parse_wrapped_csv(self._parse_expression)
4127        return self._parse_csv(self._parse_expression)
4128
4129    def _parse_csv(
4130        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4131    ) -> t.List[t.Optional[exp.Expression]]:
4132        parse_result = parse_method()
4133        items = [parse_result] if parse_result is not None else []
4134
4135        while self._match(sep):
4136            self._add_comments(parse_result)
4137            parse_result = parse_method()
4138            if parse_result is not None:
4139                items.append(parse_result)
4140
4141        return items
4142
4143    def _parse_tokens(
4144        self, parse_method: t.Callable, expressions: t.Dict
4145    ) -> t.Optional[exp.Expression]:
4146        this = parse_method()
4147
4148        while self._match_set(expressions):
4149            this = self.expression(
4150                expressions[self._prev.token_type],
4151                this=this,
4152                comments=self._prev_comments,
4153                expression=parse_method(),
4154            )
4155
4156        return this
4157
4158    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4159        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4160
4161    def _parse_wrapped_csv(
4162        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4163    ) -> t.List[t.Optional[exp.Expression]]:
4164        return self._parse_wrapped(
4165            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4166        )
4167
4168    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4169        wrapped = self._match(TokenType.L_PAREN)
4170        if not wrapped and not optional:
4171            self.raise_error("Expecting (")
4172        parse_result = parse_method()
4173        if wrapped:
4174            self._match_r_paren()
4175        return parse_result
4176
4177    def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]:
4178        return self._parse_select() or self._parse_set_operations(
4179            self._parse_expression() if alias else self._parse_conjunction()
4180        )
4181
4182    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4183        return self._parse_query_modifiers(
4184            self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False))
4185        )
4186
4187    def _parse_transaction(self) -> exp.Expression:
4188        this = None
4189        if self._match_texts(self.TRANSACTION_KIND):
4190            this = self._prev.text
4191
4192        self._match_texts({"TRANSACTION", "WORK"})
4193
4194        modes = []
4195        while True:
4196            mode = []
4197            while self._match(TokenType.VAR):
4198                mode.append(self._prev.text)
4199
4200            if mode:
4201                modes.append(" ".join(mode))
4202            if not self._match(TokenType.COMMA):
4203                break
4204
4205        return self.expression(exp.Transaction, this=this, modes=modes)
4206
4207    def _parse_commit_or_rollback(self) -> exp.Expression:
4208        chain = None
4209        savepoint = None
4210        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4211
4212        self._match_texts({"TRANSACTION", "WORK"})
4213
4214        if self._match_text_seq("TO"):
4215            self._match_text_seq("SAVEPOINT")
4216            savepoint = self._parse_id_var()
4217
4218        if self._match(TokenType.AND):
4219            chain = not self._match_text_seq("NO")
4220            self._match_text_seq("CHAIN")
4221
4222        if is_rollback:
4223            return self.expression(exp.Rollback, savepoint=savepoint)
4224        return self.expression(exp.Commit, chain=chain)
4225
4226    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4227        if not self._match_text_seq("ADD"):
4228            return None
4229
4230        self._match(TokenType.COLUMN)
4231        exists_column = self._parse_exists(not_=True)
4232        expression = self._parse_column_def(self._parse_field(any_token=True))
4233
4234        if expression:
4235            expression.set("exists", exists_column)
4236
4237            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4238            if self._match_texts(("FIRST", "AFTER")):
4239                position = self._prev.text
4240                column_position = self.expression(
4241                    exp.ColumnPosition, this=self._parse_column(), position=position
4242                )
4243                expression.set("position", column_position)
4244
4245        return expression
4246
4247    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4248        drop = self._match(TokenType.DROP) and self._parse_drop()
4249        if drop and not isinstance(drop, exp.Command):
4250            drop.set("kind", drop.args.get("kind", "COLUMN"))
4251        return drop
4252
4253    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4254    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4255        return self.expression(
4256            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4257        )
4258
4259    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4260        this = None
4261        kind = self._prev.token_type
4262
4263        if kind == TokenType.CONSTRAINT:
4264            this = self._parse_id_var()
4265
4266            if self._match_text_seq("CHECK"):
4267                expression = self._parse_wrapped(self._parse_conjunction)
4268                enforced = self._match_text_seq("ENFORCED")
4269
4270                return self.expression(
4271                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4272                )
4273
4274        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4275            expression = self._parse_foreign_key()
4276        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4277            expression = self._parse_primary_key()
4278        else:
4279            expression = None
4280
4281        return self.expression(exp.AddConstraint, this=this, expression=expression)
4282
4283    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4284        index = self._index - 1
4285
4286        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4287            return self._parse_csv(self._parse_add_constraint)
4288
4289        self._retreat(index)
4290        return self._parse_csv(self._parse_add_column)
4291
4292    def _parse_alter_table_alter(self) -> exp.Expression:
4293        self._match(TokenType.COLUMN)
4294        column = self._parse_field(any_token=True)
4295
4296        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4297            return self.expression(exp.AlterColumn, this=column, drop=True)
4298        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4299            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4300
4301        self._match_text_seq("SET", "DATA")
4302        return self.expression(
4303            exp.AlterColumn,
4304            this=column,
4305            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4306            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4307            using=self._match(TokenType.USING) and self._parse_conjunction(),
4308        )
4309
4310    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4311        index = self._index - 1
4312
4313        partition_exists = self._parse_exists()
4314        if self._match(TokenType.PARTITION, advance=False):
4315            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4316
4317        self._retreat(index)
4318        return self._parse_csv(self._parse_drop_column)
4319
4320    def _parse_alter_table_rename(self) -> exp.Expression:
4321        self._match_text_seq("TO")
4322        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4323
4324    def _parse_alter(self) -> t.Optional[exp.Expression]:
4325        start = self._prev
4326
4327        if not self._match(TokenType.TABLE):
4328            return self._parse_as_command(start)
4329
4330        exists = self._parse_exists()
4331        this = self._parse_table(schema=True)
4332
4333        if self._next:
4334            self._advance()
4335        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4336
4337        if parser:
4338            actions = ensure_list(parser(self))
4339
4340            if not self._curr:
4341                return self.expression(
4342                    exp.AlterTable,
4343                    this=this,
4344                    exists=exists,
4345                    actions=actions,
4346                )
4347        return self._parse_as_command(start)
4348
4349    def _parse_merge(self) -> exp.Expression:
4350        self._match(TokenType.INTO)
4351        target = self._parse_table()
4352
4353        self._match(TokenType.USING)
4354        using = self._parse_table()
4355
4356        self._match(TokenType.ON)
4357        on = self._parse_conjunction()
4358
4359        whens = []
4360        while self._match(TokenType.WHEN):
4361            matched = not self._match(TokenType.NOT)
4362            self._match_text_seq("MATCHED")
4363            source = (
4364                False
4365                if self._match_text_seq("BY", "TARGET")
4366                else self._match_text_seq("BY", "SOURCE")
4367            )
4368            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4369
4370            self._match(TokenType.THEN)
4371
4372            if self._match(TokenType.INSERT):
4373                _this = self._parse_star()
4374                if _this:
4375                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4376                else:
4377                    then = self.expression(
4378                        exp.Insert,
4379                        this=self._parse_value(),
4380                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4381                    )
4382            elif self._match(TokenType.UPDATE):
4383                expressions = self._parse_star()
4384                if expressions:
4385                    then = self.expression(exp.Update, expressions=expressions)
4386                else:
4387                    then = self.expression(
4388                        exp.Update,
4389                        expressions=self._match(TokenType.SET)
4390                        and self._parse_csv(self._parse_equality),
4391                    )
4392            elif self._match(TokenType.DELETE):
4393                then = self.expression(exp.Var, this=self._prev.text)
4394            else:
4395                then = None
4396
4397            whens.append(
4398                self.expression(
4399                    exp.When,
4400                    matched=matched,
4401                    source=source,
4402                    condition=condition,
4403                    then=then,
4404                )
4405            )
4406
4407        return self.expression(
4408            exp.Merge,
4409            this=target,
4410            using=using,
4411            on=on,
4412            expressions=whens,
4413        )
4414
4415    def _parse_show(self) -> t.Optional[exp.Expression]:
4416        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4417        if parser:
4418            return parser(self)
4419        self._advance()
4420        return self.expression(exp.Show, this=self._prev.text.upper())
4421
4422    def _parse_set_item_assignment(
4423        self, kind: t.Optional[str] = None
4424    ) -> t.Optional[exp.Expression]:
4425        index = self._index
4426
4427        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4428            return self._parse_set_transaction(global_=kind == "GLOBAL")
4429
4430        left = self._parse_primary() or self._parse_id_var()
4431
4432        if not self._match_texts(("=", "TO")):
4433            self._retreat(index)
4434            return None
4435
4436        right = self._parse_statement() or self._parse_id_var()
4437        this = self.expression(
4438            exp.EQ,
4439            this=left,
4440            expression=right,
4441        )
4442
4443        return self.expression(
4444            exp.SetItem,
4445            this=this,
4446            kind=kind,
4447        )
4448
4449    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4450        self._match_text_seq("TRANSACTION")
4451        characteristics = self._parse_csv(
4452            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4453        )
4454        return self.expression(
4455            exp.SetItem,
4456            expressions=characteristics,
4457            kind="TRANSACTION",
4458            **{"global": global_},  # type: ignore
4459        )
4460
4461    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4462        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4463        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4464
4465    def _parse_set(self) -> exp.Expression:
4466        index = self._index
4467        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4468
4469        if self._curr:
4470            self._retreat(index)
4471            return self._parse_as_command(self._prev)
4472
4473        return set_
4474
4475    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4476        for option in options:
4477            if self._match_text_seq(*option.split(" ")):
4478                return exp.Var(this=option)
4479        return None
4480
4481    def _parse_as_command(self, start: Token) -> exp.Command:
4482        while self._curr:
4483            self._advance()
4484        text = self._find_sql(start, self._prev)
4485        size = len(start.text)
4486        return exp.Command(this=text[:size], expression=text[size:])
4487
4488    def _parse_dict_property(self, this: str) -> exp.DictProperty:
4489        settings = []
4490
4491        self._match_l_paren()
4492        kind = self._parse_id_var()
4493
4494        if self._match(TokenType.L_PAREN):
4495            while True:
4496                key = self._parse_id_var()
4497                value = self._parse_primary()
4498
4499                if not key and value is None:
4500                    break
4501                settings.append(self.expression(exp.DictSubProperty, this=key, value=value))
4502            self._match(TokenType.R_PAREN)
4503
4504        self._match_r_paren()
4505
4506        return self.expression(
4507            exp.DictProperty,
4508            this=this,
4509            kind=kind.this if kind else None,
4510            settings=settings,
4511        )
4512
4513    def _parse_dict_range(self, this: str) -> exp.DictRange:
4514        self._match_l_paren()
4515        has_min = self._match_text_seq("MIN")
4516        if has_min:
4517            min = self._parse_var() or self._parse_primary()
4518            self._match_text_seq("MAX")
4519            max = self._parse_var() or self._parse_primary()
4520        else:
4521            max = self._parse_var() or self._parse_primary()
4522            min = exp.Literal.number(0)
4523        self._match_r_paren()
4524        return self.expression(exp.DictRange, this=this, min=min, max=max)
4525
4526    def _find_parser(
4527        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4528    ) -> t.Optional[t.Callable]:
4529        if not self._curr:
4530            return None
4531
4532        index = self._index
4533        this = []
4534        while True:
4535            # The current token might be multiple words
4536            curr = self._curr.text.upper()
4537            key = curr.split(" ")
4538            this.append(curr)
4539            self._advance()
4540            result, trie = in_trie(trie, key)
4541            if result == 0:
4542                break
4543            if result == 2:
4544                subparser = parsers[" ".join(this)]
4545                return subparser
4546        self._retreat(index)
4547        return None
4548
4549    def _match(self, token_type, advance=True, expression=None):
4550        if not self._curr:
4551            return None
4552
4553        if self._curr.token_type == token_type:
4554            if advance:
4555                self._advance()
4556            self._add_comments(expression)
4557            return True
4558
4559        return None
4560
4561    def _match_set(self, types, advance=True):
4562        if not self._curr:
4563            return None
4564
4565        if self._curr.token_type in types:
4566            if advance:
4567                self._advance()
4568            return True
4569
4570        return None
4571
4572    def _match_pair(self, token_type_a, token_type_b, advance=True):
4573        if not self._curr or not self._next:
4574            return None
4575
4576        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4577            if advance:
4578                self._advance(2)
4579            return True
4580
4581        return None
4582
4583    def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4584        if not self._match(TokenType.L_PAREN, expression=expression):
4585            self.raise_error("Expecting (")
4586
4587    def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None:
4588        if not self._match(TokenType.R_PAREN, expression=expression):
4589            self.raise_error("Expecting )")
4590
4591    def _match_texts(self, texts, advance=True):
4592        if self._curr and self._curr.text.upper() in texts:
4593            if advance:
4594                self._advance()
4595            return True
4596        return False
4597
4598    def _match_text_seq(self, *texts, advance=True):
4599        index = self._index
4600        for text in texts:
4601            if self._curr and self._curr.text.upper() == text:
4602                self._advance()
4603            else:
4604                self._retreat(index)
4605                return False
4606
4607        if not advance:
4608            self._retreat(index)
4609
4610        return True
4611
4612    @t.overload
4613    def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression:
4614        ...
4615
4616    @t.overload
4617    def _replace_columns_with_dots(
4618        self, this: t.Optional[exp.Expression]
4619    ) -> t.Optional[exp.Expression]:
4620        ...
4621
4622    def _replace_columns_with_dots(self, this):
4623        if isinstance(this, exp.Dot):
4624            exp.replace_children(this, self._replace_columns_with_dots)
4625        elif isinstance(this, exp.Column):
4626            exp.replace_children(this, self._replace_columns_with_dots)
4627            table = this.args.get("table")
4628            this = (
4629                self.expression(exp.Dot, this=table, expression=this.this)
4630                if table
4631                else self.expression(exp.Var, this=this.name)
4632            )
4633        elif isinstance(this, exp.Identifier):
4634            this = self.expression(exp.Var, this=this.name)
4635
4636        return this
4637
4638    def _replace_lambda(
4639        self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str]
4640    ) -> t.Optional[exp.Expression]:
4641        if not node:
4642            return node
4643
4644        for column in node.find_all(exp.Column):
4645            if column.parts[0].name in lambda_variables:
4646                dot_or_id = column.to_dot() if column.table else column.this
4647                parent = column.parent
4648
4649                while isinstance(parent, exp.Dot):
4650                    if not isinstance(parent.parent, exp.Dot):
4651                        parent.replace(dot_or_id)
4652                        break
4653                    parent = parent.parent
4654                else:
4655                    if column is node:
4656                        node = dot_or_id
4657                    else:
4658                        column.replace(dot_or_id)
4659        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.IMMEDIATE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
810    def __init__(
811        self,
812        error_level: t.Optional[ErrorLevel] = None,
813        error_message_context: int = 100,
814        index_offset: int = 0,
815        unnest_column_only: bool = False,
816        alias_post_tablesample: bool = False,
817        max_errors: int = 3,
818        null_ordering: t.Optional[str] = None,
819    ):
820        self.error_level = error_level or ErrorLevel.IMMEDIATE
821        self.error_message_context = error_message_context
822        self.index_offset = index_offset
823        self.unnest_column_only = unnest_column_only
824        self.alias_post_tablesample = alias_post_tablesample
825        self.max_errors = max_errors
826        self.null_ordering = null_ordering
827        self.reset()
def reset(self):
829    def reset(self):
830        self.sql = ""
831        self.errors = []
832        self._tokens = []
833        self._index = 0
834        self._curr = None
835        self._next = None
836        self._prev = None
837        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
839    def parse(
840        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
841    ) -> t.List[t.Optional[exp.Expression]]:
842        """
843        Parses a list of tokens and returns a list of syntax trees, one tree
844        per parsed SQL statement.
845
846        Args:
847            raw_tokens: the list of tokens.
848            sql: the original SQL string, used to produce helpful debug messages.
849
850        Returns:
851            The list of syntax trees.
852        """
853        return self._parse(
854            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
855        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
857    def parse_into(
858        self,
859        expression_types: exp.IntoType,
860        raw_tokens: t.List[Token],
861        sql: t.Optional[str] = None,
862    ) -> t.List[t.Optional[exp.Expression]]:
863        """
864        Parses a list of tokens into a given Expression type. If a collection of Expression
865        types is given instead, this method will try to parse the token list into each one
866        of them, stopping at the first for which the parsing succeeds.
867
868        Args:
869            expression_types: the expression type(s) to try and parse the token list into.
870            raw_tokens: the list of tokens.
871            sql: the original SQL string, used to produce helpful debug messages.
872
873        Returns:
874            The target Expression.
875        """
876        errors = []
877        for expression_type in ensure_collection(expression_types):
878            parser = self.EXPRESSION_PARSERS.get(expression_type)
879            if not parser:
880                raise TypeError(f"No parser registered for {expression_type}")
881            try:
882                return self._parse(parser, raw_tokens, sql)
883            except ParseError as e:
884                e.errors[0]["into_expression"] = expression_type
885                errors.append(e)
886        raise ParseError(
887            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
888            errors=merge_errors(errors),
889        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
925    def check_errors(self) -> None:
926        """
927        Logs or raises any found errors, depending on the chosen error level setting.
928        """
929        if self.error_level == ErrorLevel.WARN:
930            for error in self.errors:
931                logger.error(str(error))
932        elif self.error_level == ErrorLevel.RAISE and self.errors:
933            raise ParseError(
934                concat_messages(self.errors, self.max_errors),
935                errors=merge_errors(self.errors),
936            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
938    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
939        """
940        Appends an error in the list of recorded errors or raises it, depending on the chosen
941        error level setting.
942        """
943        token = token or self._curr or self._prev or Token.string("")
944        start = token.start
945        end = token.end + 1
946        start_context = self.sql[max(start - self.error_message_context, 0) : start]
947        highlight = self.sql[start:end]
948        end_context = self.sql[end : end + self.error_message_context]
949
950        error = ParseError.new(
951            f"{message}. Line {token.line}, Col: {token.col}.\n"
952            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
953            description=message,
954            line=token.line,
955            col=token.col,
956            start_context=start_context,
957            highlight=highlight,
958            end_context=end_context,
959        )
960
961        if self.error_level == ErrorLevel.IMMEDIATE:
962            raise error
963
964        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[~E], comments: Optional[List[str]] = None, **kwargs) -> ~E:
966    def expression(
967        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
968    ) -> E:
969        """
970        Creates a new, validated Expression.
971
972        Args:
973            exp_class: the expression class to instantiate.
974            comments: an optional list of comments to attach to the expression.
975            kwargs: the arguments to set for the expression along with their respective values.
976
977        Returns:
978            The target expression.
979        """
980        instance = exp_class(**kwargs)
981        instance.add_comments(comments) if comments else self._add_comments(instance)
982        self.validate_expression(instance)
983        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
 990    def validate_expression(
 991        self, expression: exp.Expression, args: t.Optional[t.List] = None
 992    ) -> None:
 993        """
 994        Validates an already instantiated expression, making sure that all its mandatory arguments
 995        are set.
 996
 997        Args:
 998            expression: the expression to validate.
 999            args: an optional list of items that was used to instantiate the expression, if it's a Func.
1000        """
1001        if self.error_level == ErrorLevel.IGNORE:
1002            return
1003
1004        for error_message in expression.error_messages(args):
1005            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.