Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get
  10from sqlglot.tokens import Token, Tokenizer, TokenType
  11from sqlglot.trie import in_trie, new_trie
  12
  13logger = logging.getLogger("sqlglot")
  14
  15E = t.TypeVar("E", bound=exp.Expression)
  16
  17
  18def parse_var_map(args: t.Sequence) -> exp.Expression:
  19    if len(args) == 1 and args[0].is_star:
  20        return exp.StarMap(this=args[0])
  21
  22    keys = []
  23    values = []
  24    for i in range(0, len(args), 2):
  25        keys.append(args[i])
  26        values.append(args[i + 1])
  27    return exp.VarMap(
  28        keys=exp.Array(expressions=keys),
  29        values=exp.Array(expressions=values),
  30    )
  31
  32
  33def parse_like(args):
  34    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  35    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  36
  37
  38def binary_range_parser(
  39    expr_type: t.Type[exp.Expression],
  40) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  41    return lambda self, this: self._parse_escape(
  42        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  43    )
  44
  45
  46class _Parser(type):
  47    def __new__(cls, clsname, bases, attrs):
  48        klass = super().__new__(cls, clsname, bases, attrs)
  49        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  50        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  51
  52        return klass
  53
  54
  55class Parser(metaclass=_Parser):
  56    """
  57    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  58    a parsed syntax tree.
  59
  60    Args:
  61        error_level: the desired error level.
  62            Default: ErrorLevel.RAISE
  63        error_message_context: determines the amount of context to capture from a
  64            query string when displaying the error message (in number of characters).
  65            Default: 50.
  66        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  67            Default: 0
  68        alias_post_tablesample: If the table alias comes after tablesample.
  69            Default: False
  70        max_errors: Maximum number of error messages to include in a raised ParseError.
  71            This is only relevant if error_level is ErrorLevel.RAISE.
  72            Default: 3
  73        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  74            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  75            Default: "nulls_are_small"
  76    """
  77
  78    FUNCTIONS: t.Dict[str, t.Callable] = {
  79        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  80        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  81            this=seq_get(args, 0),
  82            to=exp.DataType(this=exp.DataType.Type.TEXT),
  83        ),
  84        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  85        "IFNULL": exp.Coalesce.from_arg_list,
  86        "LIKE": parse_like,
  87        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  88            this=seq_get(args, 0),
  89            to=exp.DataType(this=exp.DataType.Type.TEXT),
  90        ),
  91        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  92            this=exp.Cast(
  93                this=seq_get(args, 0),
  94                to=exp.DataType(this=exp.DataType.Type.TEXT),
  95            ),
  96            start=exp.Literal.number(1),
  97            length=exp.Literal.number(10),
  98        ),
  99        "VAR_MAP": parse_var_map,
 100    }
 101
 102    NO_PAREN_FUNCTIONS = {
 103        TokenType.CURRENT_DATE: exp.CurrentDate,
 104        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 105        TokenType.CURRENT_TIME: exp.CurrentTime,
 106        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 107        TokenType.CURRENT_USER: exp.CurrentUser,
 108    }
 109
 110    JOIN_HINTS: t.Set[str] = set()
 111
 112    NESTED_TYPE_TOKENS = {
 113        TokenType.ARRAY,
 114        TokenType.MAP,
 115        TokenType.NULLABLE,
 116        TokenType.STRUCT,
 117    }
 118
 119    TYPE_TOKENS = {
 120        TokenType.BIT,
 121        TokenType.BOOLEAN,
 122        TokenType.TINYINT,
 123        TokenType.UTINYINT,
 124        TokenType.SMALLINT,
 125        TokenType.USMALLINT,
 126        TokenType.INT,
 127        TokenType.UINT,
 128        TokenType.BIGINT,
 129        TokenType.UBIGINT,
 130        TokenType.INT128,
 131        TokenType.UINT128,
 132        TokenType.INT256,
 133        TokenType.UINT256,
 134        TokenType.FLOAT,
 135        TokenType.DOUBLE,
 136        TokenType.CHAR,
 137        TokenType.NCHAR,
 138        TokenType.VARCHAR,
 139        TokenType.NVARCHAR,
 140        TokenType.TEXT,
 141        TokenType.MEDIUMTEXT,
 142        TokenType.LONGTEXT,
 143        TokenType.MEDIUMBLOB,
 144        TokenType.LONGBLOB,
 145        TokenType.BINARY,
 146        TokenType.VARBINARY,
 147        TokenType.JSON,
 148        TokenType.JSONB,
 149        TokenType.INTERVAL,
 150        TokenType.TIME,
 151        TokenType.TIMESTAMP,
 152        TokenType.TIMESTAMPTZ,
 153        TokenType.TIMESTAMPLTZ,
 154        TokenType.DATETIME,
 155        TokenType.DATETIME64,
 156        TokenType.DATE,
 157        TokenType.DECIMAL,
 158        TokenType.BIGDECIMAL,
 159        TokenType.UUID,
 160        TokenType.GEOGRAPHY,
 161        TokenType.GEOMETRY,
 162        TokenType.HLLSKETCH,
 163        TokenType.HSTORE,
 164        TokenType.PSEUDO_TYPE,
 165        TokenType.SUPER,
 166        TokenType.SERIAL,
 167        TokenType.SMALLSERIAL,
 168        TokenType.BIGSERIAL,
 169        TokenType.XML,
 170        TokenType.UNIQUEIDENTIFIER,
 171        TokenType.MONEY,
 172        TokenType.SMALLMONEY,
 173        TokenType.ROWVERSION,
 174        TokenType.IMAGE,
 175        TokenType.VARIANT,
 176        TokenType.OBJECT,
 177        TokenType.INET,
 178        *NESTED_TYPE_TOKENS,
 179    }
 180
 181    SUBQUERY_PREDICATES = {
 182        TokenType.ANY: exp.Any,
 183        TokenType.ALL: exp.All,
 184        TokenType.EXISTS: exp.Exists,
 185        TokenType.SOME: exp.Any,
 186    }
 187
 188    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 189
 190    DB_CREATABLES = {
 191        TokenType.DATABASE,
 192        TokenType.SCHEMA,
 193        TokenType.TABLE,
 194        TokenType.VIEW,
 195    }
 196
 197    CREATABLES = {
 198        TokenType.COLUMN,
 199        TokenType.FUNCTION,
 200        TokenType.INDEX,
 201        TokenType.PROCEDURE,
 202        *DB_CREATABLES,
 203    }
 204
 205    ID_VAR_TOKENS = {
 206        TokenType.VAR,
 207        TokenType.ANTI,
 208        TokenType.APPLY,
 209        TokenType.ASC,
 210        TokenType.AUTO_INCREMENT,
 211        TokenType.BEGIN,
 212        TokenType.BOTH,
 213        TokenType.BUCKET,
 214        TokenType.CACHE,
 215        TokenType.CASCADE,
 216        TokenType.COLLATE,
 217        TokenType.COMMAND,
 218        TokenType.COMMENT,
 219        TokenType.COMMIT,
 220        TokenType.COMPOUND,
 221        TokenType.CONSTRAINT,
 222        TokenType.DEFAULT,
 223        TokenType.DELETE,
 224        TokenType.DESC,
 225        TokenType.DESCRIBE,
 226        TokenType.DIV,
 227        TokenType.END,
 228        TokenType.EXECUTE,
 229        TokenType.ESCAPE,
 230        TokenType.FALSE,
 231        TokenType.FIRST,
 232        TokenType.FILTER,
 233        TokenType.FOLLOWING,
 234        TokenType.FORMAT,
 235        TokenType.FULL,
 236        TokenType.IF,
 237        TokenType.IS,
 238        TokenType.ISNULL,
 239        TokenType.INTERVAL,
 240        TokenType.KEEP,
 241        TokenType.LAZY,
 242        TokenType.LEADING,
 243        TokenType.LEFT,
 244        TokenType.LOCAL,
 245        TokenType.MATERIALIZED,
 246        TokenType.MERGE,
 247        TokenType.NATURAL,
 248        TokenType.NEXT,
 249        TokenType.OFFSET,
 250        TokenType.ONLY,
 251        TokenType.OPTIONS,
 252        TokenType.ORDINALITY,
 253        TokenType.OVERWRITE,
 254        TokenType.PARTITION,
 255        TokenType.PERCENT,
 256        TokenType.PIVOT,
 257        TokenType.PRAGMA,
 258        TokenType.PRECEDING,
 259        TokenType.RANGE,
 260        TokenType.REFERENCES,
 261        TokenType.RIGHT,
 262        TokenType.ROW,
 263        TokenType.ROWS,
 264        TokenType.SEED,
 265        TokenType.SEMI,
 266        TokenType.SET,
 267        TokenType.SETTINGS,
 268        TokenType.SHOW,
 269        TokenType.SORTKEY,
 270        TokenType.TEMPORARY,
 271        TokenType.TOP,
 272        TokenType.TRAILING,
 273        TokenType.TRUE,
 274        TokenType.UNBOUNDED,
 275        TokenType.UNIQUE,
 276        TokenType.UNLOGGED,
 277        TokenType.UNPIVOT,
 278        TokenType.VOLATILE,
 279        TokenType.WINDOW,
 280        *CREATABLES,
 281        *SUBQUERY_PREDICATES,
 282        *TYPE_TOKENS,
 283        *NO_PAREN_FUNCTIONS,
 284    }
 285
 286    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 287
 288    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 289        TokenType.APPLY,
 290        TokenType.FULL,
 291        TokenType.LEFT,
 292        TokenType.LOCK,
 293        TokenType.NATURAL,
 294        TokenType.OFFSET,
 295        TokenType.RIGHT,
 296        TokenType.WINDOW,
 297    }
 298
 299    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 300
 301    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 302
 303    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 304
 305    FUNC_TOKENS = {
 306        TokenType.COMMAND,
 307        TokenType.CURRENT_DATE,
 308        TokenType.CURRENT_DATETIME,
 309        TokenType.CURRENT_TIMESTAMP,
 310        TokenType.CURRENT_TIME,
 311        TokenType.CURRENT_USER,
 312        TokenType.FILTER,
 313        TokenType.FIRST,
 314        TokenType.FORMAT,
 315        TokenType.GLOB,
 316        TokenType.IDENTIFIER,
 317        TokenType.INDEX,
 318        TokenType.ISNULL,
 319        TokenType.ILIKE,
 320        TokenType.LIKE,
 321        TokenType.MERGE,
 322        TokenType.OFFSET,
 323        TokenType.PRIMARY_KEY,
 324        TokenType.RANGE,
 325        TokenType.REPLACE,
 326        TokenType.ROW,
 327        TokenType.UNNEST,
 328        TokenType.VAR,
 329        TokenType.LEFT,
 330        TokenType.RIGHT,
 331        TokenType.DATE,
 332        TokenType.DATETIME,
 333        TokenType.TABLE,
 334        TokenType.TIMESTAMP,
 335        TokenType.TIMESTAMPTZ,
 336        TokenType.WINDOW,
 337        *TYPE_TOKENS,
 338        *SUBQUERY_PREDICATES,
 339    }
 340
 341    CONJUNCTION = {
 342        TokenType.AND: exp.And,
 343        TokenType.OR: exp.Or,
 344    }
 345
 346    EQUALITY = {
 347        TokenType.EQ: exp.EQ,
 348        TokenType.NEQ: exp.NEQ,
 349        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 350    }
 351
 352    COMPARISON = {
 353        TokenType.GT: exp.GT,
 354        TokenType.GTE: exp.GTE,
 355        TokenType.LT: exp.LT,
 356        TokenType.LTE: exp.LTE,
 357    }
 358
 359    BITWISE = {
 360        TokenType.AMP: exp.BitwiseAnd,
 361        TokenType.CARET: exp.BitwiseXor,
 362        TokenType.PIPE: exp.BitwiseOr,
 363        TokenType.DPIPE: exp.DPipe,
 364    }
 365
 366    TERM = {
 367        TokenType.DASH: exp.Sub,
 368        TokenType.PLUS: exp.Add,
 369        TokenType.MOD: exp.Mod,
 370        TokenType.COLLATE: exp.Collate,
 371    }
 372
 373    FACTOR = {
 374        TokenType.DIV: exp.IntDiv,
 375        TokenType.LR_ARROW: exp.Distance,
 376        TokenType.SLASH: exp.Div,
 377        TokenType.STAR: exp.Mul,
 378    }
 379
 380    TIMESTAMPS = {
 381        TokenType.TIME,
 382        TokenType.TIMESTAMP,
 383        TokenType.TIMESTAMPTZ,
 384        TokenType.TIMESTAMPLTZ,
 385    }
 386
 387    SET_OPERATIONS = {
 388        TokenType.UNION,
 389        TokenType.INTERSECT,
 390        TokenType.EXCEPT,
 391    }
 392
 393    JOIN_SIDES = {
 394        TokenType.LEFT,
 395        TokenType.RIGHT,
 396        TokenType.FULL,
 397    }
 398
 399    JOIN_KINDS = {
 400        TokenType.INNER,
 401        TokenType.OUTER,
 402        TokenType.CROSS,
 403        TokenType.SEMI,
 404        TokenType.ANTI,
 405    }
 406
 407    LAMBDAS = {
 408        TokenType.ARROW: lambda self, expressions: self.expression(
 409            exp.Lambda,
 410            this=self._replace_lambda(
 411                self._parse_conjunction(),
 412                {node.name for node in expressions},
 413            ),
 414            expressions=expressions,
 415        ),
 416        TokenType.FARROW: lambda self, expressions: self.expression(
 417            exp.Kwarg,
 418            this=exp.Var(this=expressions[0].name),
 419            expression=self._parse_conjunction(),
 420        ),
 421    }
 422
 423    COLUMN_OPERATORS = {
 424        TokenType.DOT: None,
 425        TokenType.DCOLON: lambda self, this, to: self.expression(
 426            exp.Cast if self.STRICT_CAST else exp.TryCast,
 427            this=this,
 428            to=to,
 429        ),
 430        TokenType.ARROW: lambda self, this, path: self.expression(
 431            exp.JSONExtract,
 432            this=this,
 433            expression=path,
 434        ),
 435        TokenType.DARROW: lambda self, this, path: self.expression(
 436            exp.JSONExtractScalar,
 437            this=this,
 438            expression=path,
 439        ),
 440        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 441            exp.JSONBExtract,
 442            this=this,
 443            expression=path,
 444        ),
 445        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 446            exp.JSONBExtractScalar,
 447            this=this,
 448            expression=path,
 449        ),
 450        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 451            exp.JSONBContains,
 452            this=this,
 453            expression=key,
 454        ),
 455    }
 456
 457    EXPRESSION_PARSERS = {
 458        exp.Column: lambda self: self._parse_column(),
 459        exp.DataType: lambda self: self._parse_types(),
 460        exp.From: lambda self: self._parse_from(),
 461        exp.Group: lambda self: self._parse_group(),
 462        exp.Identifier: lambda self: self._parse_id_var(),
 463        exp.Lateral: lambda self: self._parse_lateral(),
 464        exp.Join: lambda self: self._parse_join(),
 465        exp.Order: lambda self: self._parse_order(),
 466        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 467        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 468        exp.Lambda: lambda self: self._parse_lambda(),
 469        exp.Limit: lambda self: self._parse_limit(),
 470        exp.Offset: lambda self: self._parse_offset(),
 471        exp.TableAlias: lambda self: self._parse_table_alias(),
 472        exp.Table: lambda self: self._parse_table(),
 473        exp.Condition: lambda self: self._parse_conjunction(),
 474        exp.Expression: lambda self: self._parse_statement(),
 475        exp.Properties: lambda self: self._parse_properties(),
 476        exp.Where: lambda self: self._parse_where(),
 477        exp.Ordered: lambda self: self._parse_ordered(),
 478        exp.Having: lambda self: self._parse_having(),
 479        exp.With: lambda self: self._parse_with(),
 480        exp.Window: lambda self: self._parse_named_window(),
 481        exp.Qualify: lambda self: self._parse_qualify(),
 482        exp.Returning: lambda self: self._parse_returning(),
 483        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 484    }
 485
 486    STATEMENT_PARSERS = {
 487        TokenType.ALTER: lambda self: self._parse_alter(),
 488        TokenType.BEGIN: lambda self: self._parse_transaction(),
 489        TokenType.CACHE: lambda self: self._parse_cache(),
 490        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 491        TokenType.COMMENT: lambda self: self._parse_comment(),
 492        TokenType.CREATE: lambda self: self._parse_create(),
 493        TokenType.DELETE: lambda self: self._parse_delete(),
 494        TokenType.DESC: lambda self: self._parse_describe(),
 495        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 496        TokenType.DROP: lambda self: self._parse_drop(),
 497        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 498        TokenType.INSERT: lambda self: self._parse_insert(),
 499        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 500        TokenType.MERGE: lambda self: self._parse_merge(),
 501        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 502        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 503        TokenType.SET: lambda self: self._parse_set(),
 504        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 505        TokenType.UPDATE: lambda self: self._parse_update(),
 506        TokenType.USE: lambda self: self.expression(
 507            exp.Use,
 508            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 509            and exp.Var(this=self._prev.text),
 510            this=self._parse_table(schema=False),
 511        ),
 512    }
 513
 514    UNARY_PARSERS = {
 515        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 516        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 517        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 518        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 519    }
 520
 521    PRIMARY_PARSERS = {
 522        TokenType.STRING: lambda self, token: self.expression(
 523            exp.Literal, this=token.text, is_string=True
 524        ),
 525        TokenType.NUMBER: lambda self, token: self.expression(
 526            exp.Literal, this=token.text, is_string=False
 527        ),
 528        TokenType.STAR: lambda self, _: self.expression(
 529            exp.Star,
 530            **{"except": self._parse_except(), "replace": self._parse_replace()},
 531        ),
 532        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 533        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 534        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 535        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 536        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 537        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 538        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 539        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 540        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 541    }
 542
 543    PLACEHOLDER_PARSERS = {
 544        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 545        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 546        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 547        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 548        else None,
 549    }
 550
 551    RANGE_PARSERS = {
 552        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 553        TokenType.GLOB: binary_range_parser(exp.Glob),
 554        TokenType.ILIKE: binary_range_parser(exp.ILike),
 555        TokenType.IN: lambda self, this: self._parse_in(this),
 556        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 557        TokenType.IS: lambda self, this: self._parse_is(this),
 558        TokenType.LIKE: binary_range_parser(exp.Like),
 559        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 560        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 561        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 562    }
 563
 564    PROPERTY_PARSERS = {
 565        "AFTER": lambda self: self._parse_afterjournal(
 566            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 567        ),
 568        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 569        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 570        "BEFORE": lambda self: self._parse_journal(
 571            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 572        ),
 573        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 574        "CHARACTER SET": lambda self: self._parse_character_set(),
 575        "CHECKSUM": lambda self: self._parse_checksum(),
 576        "CLUSTER BY": lambda self: self.expression(
 577            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 578        ),
 579        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 580        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 581        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 582            default=self._prev.text.upper() == "DEFAULT"
 583        ),
 584        "DEFINER": lambda self: self._parse_definer(),
 585        "DETERMINISTIC": lambda self: self.expression(
 586            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 587        ),
 588        "DISTKEY": lambda self: self._parse_distkey(),
 589        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 590        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 591        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 592        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 593        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 594        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 595        "FREESPACE": lambda self: self._parse_freespace(),
 596        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 597        "IMMUTABLE": lambda self: self.expression(
 598            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 599        ),
 600        "JOURNAL": lambda self: self._parse_journal(
 601            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 602        ),
 603        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 604        "LIKE": lambda self: self._parse_create_like(),
 605        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 606        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 607        "LOCK": lambda self: self._parse_locking(),
 608        "LOCKING": lambda self: self._parse_locking(),
 609        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 610        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 611        "MAX": lambda self: self._parse_datablocksize(),
 612        "MAXIMUM": lambda self: self._parse_datablocksize(),
 613        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 614            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 615        ),
 616        "MIN": lambda self: self._parse_datablocksize(),
 617        "MINIMUM": lambda self: self._parse_datablocksize(),
 618        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 619        "NO": lambda self: self._parse_noprimaryindex(),
 620        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 621        "ON": lambda self: self._parse_oncommit(),
 622        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 623        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 624        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 625        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 626        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 627        "RETURNS": lambda self: self._parse_returns(),
 628        "ROW": lambda self: self._parse_row(),
 629        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 630        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 631        "SETTINGS": lambda self: self.expression(
 632            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 633        ),
 634        "SORTKEY": lambda self: self._parse_sortkey(),
 635        "STABLE": lambda self: self.expression(
 636            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 637        ),
 638        "STORED": lambda self: self._parse_stored(),
 639        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 640        "TEMP": lambda self: self._parse_temporary(global_=False),
 641        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 642        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 643        "TTL": lambda self: self._parse_ttl(),
 644        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 645        "VOLATILE": lambda self: self._parse_volatile_property(),
 646        "WITH": lambda self: self._parse_with_property(),
 647    }
 648
 649    CONSTRAINT_PARSERS = {
 650        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 651        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 652        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 653        "CHARACTER SET": lambda self: self.expression(
 654            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 655        ),
 656        "CHECK": lambda self: self.expression(
 657            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 658        ),
 659        "COLLATE": lambda self: self.expression(
 660            exp.CollateColumnConstraint, this=self._parse_var()
 661        ),
 662        "COMMENT": lambda self: self.expression(
 663            exp.CommentColumnConstraint, this=self._parse_string()
 664        ),
 665        "COMPRESS": lambda self: self._parse_compress(),
 666        "DEFAULT": lambda self: self.expression(
 667            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 668        ),
 669        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 670        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 671        "FORMAT": lambda self: self.expression(
 672            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 673        ),
 674        "GENERATED": lambda self: self._parse_generated_as_identity(),
 675        "IDENTITY": lambda self: self._parse_auto_increment(),
 676        "INLINE": lambda self: self._parse_inline(),
 677        "LIKE": lambda self: self._parse_create_like(),
 678        "NOT": lambda self: self._parse_not_constraint(),
 679        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 680        "ON": lambda self: self._match(TokenType.UPDATE)
 681        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 682        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 683        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 684        "REFERENCES": lambda self: self._parse_references(match=False),
 685        "TITLE": lambda self: self.expression(
 686            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 687        ),
 688        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 689        "UNIQUE": lambda self: self._parse_unique(),
 690        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 691    }
 692
 693    ALTER_PARSERS = {
 694        "ADD": lambda self: self._parse_alter_table_add(),
 695        "ALTER": lambda self: self._parse_alter_table_alter(),
 696        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 697        "DROP": lambda self: self._parse_alter_table_drop(),
 698        "RENAME": lambda self: self._parse_alter_table_rename(),
 699    }
 700
 701    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 702
 703    NO_PAREN_FUNCTION_PARSERS = {
 704        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 705        TokenType.CASE: lambda self: self._parse_case(),
 706        TokenType.IF: lambda self: self._parse_if(),
 707        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 708            exp.NextValueFor,
 709            this=self._parse_column(),
 710            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 711        ),
 712    }
 713
 714    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 715        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 716        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 717        "DECODE": lambda self: self._parse_decode(),
 718        "EXTRACT": lambda self: self._parse_extract(),
 719        "JSON_OBJECT": lambda self: self._parse_json_object(),
 720        "LOG": lambda self: self._parse_logarithm(),
 721        "MATCH": lambda self: self._parse_match_against(),
 722        "OPENJSON": lambda self: self._parse_open_json(),
 723        "POSITION": lambda self: self._parse_position(),
 724        "STRING_AGG": lambda self: self._parse_string_agg(),
 725        "SUBSTRING": lambda self: self._parse_substring(),
 726        "TRIM": lambda self: self._parse_trim(),
 727        "TRY_CAST": lambda self: self._parse_cast(False),
 728        "TRY_CONVERT": lambda self: self._parse_convert(False),
 729    }
 730
 731    QUERY_MODIFIER_PARSERS = {
 732        "joins": lambda self: list(iter(self._parse_join, None)),
 733        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 734        "match": lambda self: self._parse_match_recognize(),
 735        "where": lambda self: self._parse_where(),
 736        "group": lambda self: self._parse_group(),
 737        "having": lambda self: self._parse_having(),
 738        "qualify": lambda self: self._parse_qualify(),
 739        "windows": lambda self: self._parse_window_clause(),
 740        "order": lambda self: self._parse_order(),
 741        "limit": lambda self: self._parse_limit(),
 742        "offset": lambda self: self._parse_offset(),
 743        "locks": lambda self: self._parse_locks(),
 744        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 745    }
 746
 747    SET_PARSERS = {
 748        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 749        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 750        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 751        "TRANSACTION": lambda self: self._parse_set_transaction(),
 752    }
 753
 754    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 755
 756    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 757
 758    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 759
 760    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 761
 762    TRANSACTION_CHARACTERISTICS = {
 763        "ISOLATION LEVEL REPEATABLE READ",
 764        "ISOLATION LEVEL READ COMMITTED",
 765        "ISOLATION LEVEL READ UNCOMMITTED",
 766        "ISOLATION LEVEL SERIALIZABLE",
 767        "READ WRITE",
 768        "READ ONLY",
 769    }
 770
 771    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 772
 773    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 774
 775    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 776    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 777
 778    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 779
 780    STRICT_CAST = True
 781
 782    CONVERT_TYPE_FIRST = False
 783
 784    PREFIXED_PIVOT_COLUMNS = False
 785    IDENTIFY_PIVOT_STRINGS = False
 786
 787    LOG_BASE_FIRST = True
 788    LOG_DEFAULTS_TO_LN = False
 789
 790    __slots__ = (
 791        "error_level",
 792        "error_message_context",
 793        "sql",
 794        "errors",
 795        "index_offset",
 796        "unnest_column_only",
 797        "alias_post_tablesample",
 798        "max_errors",
 799        "null_ordering",
 800        "_tokens",
 801        "_index",
 802        "_curr",
 803        "_next",
 804        "_prev",
 805        "_prev_comments",
 806        "_show_trie",
 807        "_set_trie",
 808    )
 809
 810    def __init__(
 811        self,
 812        error_level: t.Optional[ErrorLevel] = None,
 813        error_message_context: int = 100,
 814        index_offset: int = 0,
 815        unnest_column_only: bool = False,
 816        alias_post_tablesample: bool = False,
 817        max_errors: int = 3,
 818        null_ordering: t.Optional[str] = None,
 819    ):
 820        self.error_level = error_level or ErrorLevel.IMMEDIATE
 821        self.error_message_context = error_message_context
 822        self.index_offset = index_offset
 823        self.unnest_column_only = unnest_column_only
 824        self.alias_post_tablesample = alias_post_tablesample
 825        self.max_errors = max_errors
 826        self.null_ordering = null_ordering
 827        self.reset()
 828
 829    def reset(self):
 830        self.sql = ""
 831        self.errors = []
 832        self._tokens = []
 833        self._index = 0
 834        self._curr = None
 835        self._next = None
 836        self._prev = None
 837        self._prev_comments = None
 838
 839    def parse(
 840        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 841    ) -> t.List[t.Optional[exp.Expression]]:
 842        """
 843        Parses a list of tokens and returns a list of syntax trees, one tree
 844        per parsed SQL statement.
 845
 846        Args:
 847            raw_tokens: the list of tokens.
 848            sql: the original SQL string, used to produce helpful debug messages.
 849
 850        Returns:
 851            The list of syntax trees.
 852        """
 853        return self._parse(
 854            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 855        )
 856
 857    def parse_into(
 858        self,
 859        expression_types: exp.IntoType,
 860        raw_tokens: t.List[Token],
 861        sql: t.Optional[str] = None,
 862    ) -> t.List[t.Optional[exp.Expression]]:
 863        """
 864        Parses a list of tokens into a given Expression type. If a collection of Expression
 865        types is given instead, this method will try to parse the token list into each one
 866        of them, stopping at the first for which the parsing succeeds.
 867
 868        Args:
 869            expression_types: the expression type(s) to try and parse the token list into.
 870            raw_tokens: the list of tokens.
 871            sql: the original SQL string, used to produce helpful debug messages.
 872
 873        Returns:
 874            The target Expression.
 875        """
 876        errors = []
 877        for expression_type in ensure_collection(expression_types):
 878            parser = self.EXPRESSION_PARSERS.get(expression_type)
 879            if not parser:
 880                raise TypeError(f"No parser registered for {expression_type}")
 881            try:
 882                return self._parse(parser, raw_tokens, sql)
 883            except ParseError as e:
 884                e.errors[0]["into_expression"] = expression_type
 885                errors.append(e)
 886        raise ParseError(
 887            f"Failed to parse into {expression_types}",
 888            errors=merge_errors(errors),
 889        ) from errors[-1]
 890
 891    def _parse(
 892        self,
 893        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 894        raw_tokens: t.List[Token],
 895        sql: t.Optional[str] = None,
 896    ) -> t.List[t.Optional[exp.Expression]]:
 897        self.reset()
 898        self.sql = sql or ""
 899        total = len(raw_tokens)
 900        chunks: t.List[t.List[Token]] = [[]]
 901
 902        for i, token in enumerate(raw_tokens):
 903            if token.token_type == TokenType.SEMICOLON:
 904                if i < total - 1:
 905                    chunks.append([])
 906            else:
 907                chunks[-1].append(token)
 908
 909        expressions = []
 910
 911        for tokens in chunks:
 912            self._index = -1
 913            self._tokens = tokens
 914            self._advance()
 915
 916            expressions.append(parse_method(self))
 917
 918            if self._index < len(self._tokens):
 919                self.raise_error("Invalid expression / Unexpected token")
 920
 921            self.check_errors()
 922
 923        return expressions
 924
 925    def check_errors(self) -> None:
 926        """
 927        Logs or raises any found errors, depending on the chosen error level setting.
 928        """
 929        if self.error_level == ErrorLevel.WARN:
 930            for error in self.errors:
 931                logger.error(str(error))
 932        elif self.error_level == ErrorLevel.RAISE and self.errors:
 933            raise ParseError(
 934                concat_messages(self.errors, self.max_errors),
 935                errors=merge_errors(self.errors),
 936            )
 937
 938    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 939        """
 940        Appends an error in the list of recorded errors or raises it, depending on the chosen
 941        error level setting.
 942        """
 943        token = token or self._curr or self._prev or Token.string("")
 944        start = token.start
 945        end = token.end + 1
 946        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 947        highlight = self.sql[start:end]
 948        end_context = self.sql[end : end + self.error_message_context]
 949
 950        error = ParseError.new(
 951            f"{message}. Line {token.line}, Col: {token.col}.\n"
 952            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 953            description=message,
 954            line=token.line,
 955            col=token.col,
 956            start_context=start_context,
 957            highlight=highlight,
 958            end_context=end_context,
 959        )
 960
 961        if self.error_level == ErrorLevel.IMMEDIATE:
 962            raise error
 963
 964        self.errors.append(error)
 965
 966    def expression(
 967        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 968    ) -> E:
 969        """
 970        Creates a new, validated Expression.
 971
 972        Args:
 973            exp_class: the expression class to instantiate.
 974            comments: an optional list of comments to attach to the expression.
 975            kwargs: the arguments to set for the expression along with their respective values.
 976
 977        Returns:
 978            The target expression.
 979        """
 980        instance = exp_class(**kwargs)
 981        instance.add_comments(comments) if comments else self._add_comments(instance)
 982        self.validate_expression(instance)
 983        return instance
 984
 985    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 986        if expression and self._prev_comments:
 987            expression.add_comments(self._prev_comments)
 988            self._prev_comments = None
 989
 990    def validate_expression(
 991        self, expression: exp.Expression, args: t.Optional[t.List] = None
 992    ) -> None:
 993        """
 994        Validates an already instantiated expression, making sure that all its mandatory arguments
 995        are set.
 996
 997        Args:
 998            expression: the expression to validate.
 999            args: an optional list of items that was used to instantiate the expression, if it's a Func.
1000        """
1001        if self.error_level == ErrorLevel.IGNORE:
1002            return
1003
1004        for error_message in expression.error_messages(args):
1005            self.raise_error(error_message)
1006
1007    def _find_sql(self, start: Token, end: Token) -> str:
1008        return self.sql[start.start : end.end + 1]
1009
1010    def _advance(self, times: int = 1) -> None:
1011        self._index += times
1012        self._curr = seq_get(self._tokens, self._index)
1013        self._next = seq_get(self._tokens, self._index + 1)
1014        if self._index > 0:
1015            self._prev = self._tokens[self._index - 1]
1016            self._prev_comments = self._prev.comments
1017        else:
1018            self._prev = None
1019            self._prev_comments = None
1020
1021    def _retreat(self, index: int) -> None:
1022        if index != self._index:
1023            self._advance(index - self._index)
1024
1025    def _parse_command(self) -> exp.Command:
1026        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1027
1028    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1029        start = self._prev
1030        exists = self._parse_exists() if allow_exists else None
1031
1032        self._match(TokenType.ON)
1033
1034        kind = self._match_set(self.CREATABLES) and self._prev
1035
1036        if not kind:
1037            return self._parse_as_command(start)
1038
1039        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1040            this = self._parse_user_defined_function(kind=kind.token_type)
1041        elif kind.token_type == TokenType.TABLE:
1042            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1043        elif kind.token_type == TokenType.COLUMN:
1044            this = self._parse_column()
1045        else:
1046            this = self._parse_id_var()
1047
1048        self._match(TokenType.IS)
1049
1050        return self.expression(
1051            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1052        )
1053
1054    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1055    def _parse_ttl(self) -> exp.Expression:
1056        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1057            this = self._parse_bitwise()
1058
1059            if self._match_text_seq("DELETE"):
1060                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1061            if self._match_text_seq("RECOMPRESS"):
1062                return self.expression(
1063                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1064                )
1065            if self._match_text_seq("TO", "DISK"):
1066                return self.expression(
1067                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1068                )
1069            if self._match_text_seq("TO", "VOLUME"):
1070                return self.expression(
1071                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1072                )
1073
1074            return this
1075
1076        expressions = self._parse_csv(_parse_ttl_action)
1077        where = self._parse_where()
1078        group = self._parse_group()
1079
1080        aggregates = None
1081        if group and self._match(TokenType.SET):
1082            aggregates = self._parse_csv(self._parse_set_item)
1083
1084        return self.expression(
1085            exp.MergeTreeTTL,
1086            expressions=expressions,
1087            where=where,
1088            group=group,
1089            aggregates=aggregates,
1090        )
1091
1092    def _parse_statement(self) -> t.Optional[exp.Expression]:
1093        if self._curr is None:
1094            return None
1095
1096        if self._match_set(self.STATEMENT_PARSERS):
1097            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1098
1099        if self._match_set(Tokenizer.COMMANDS):
1100            return self._parse_command()
1101
1102        expression = self._parse_expression()
1103        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1104        return self._parse_query_modifiers(expression)
1105
1106    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1107        start = self._prev
1108        temporary = self._match(TokenType.TEMPORARY)
1109        materialized = self._match(TokenType.MATERIALIZED)
1110        kind = self._match_set(self.CREATABLES) and self._prev.text
1111        if not kind:
1112            return self._parse_as_command(start)
1113
1114        return self.expression(
1115            exp.Drop,
1116            exists=self._parse_exists(),
1117            this=self._parse_table(schema=True),
1118            kind=kind,
1119            temporary=temporary,
1120            materialized=materialized,
1121            cascade=self._match(TokenType.CASCADE),
1122            constraints=self._match_text_seq("CONSTRAINTS"),
1123            purge=self._match_text_seq("PURGE"),
1124        )
1125
1126    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1127        return (
1128            self._match(TokenType.IF)
1129            and (not not_ or self._match(TokenType.NOT))
1130            and self._match(TokenType.EXISTS)
1131        )
1132
1133    def _parse_create(self) -> t.Optional[exp.Expression]:
1134        start = self._prev
1135        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1136            TokenType.OR, TokenType.REPLACE
1137        )
1138        unique = self._match(TokenType.UNIQUE)
1139
1140        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1141            self._match(TokenType.TABLE)
1142
1143        properties = None
1144        create_token = self._match_set(self.CREATABLES) and self._prev
1145
1146        if not create_token:
1147            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1148            create_token = self._match_set(self.CREATABLES) and self._prev
1149
1150            if not properties or not create_token:
1151                return self._parse_as_command(start)
1152
1153        exists = self._parse_exists(not_=True)
1154        this = None
1155        expression = None
1156        indexes = None
1157        no_schema_binding = None
1158        begin = None
1159        clone = None
1160
1161        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1162            this = self._parse_user_defined_function(kind=create_token.token_type)
1163            temp_properties = self._parse_properties()
1164            if properties and temp_properties:
1165                properties.expressions.extend(temp_properties.expressions)
1166            elif temp_properties:
1167                properties = temp_properties
1168
1169            self._match(TokenType.ALIAS)
1170            begin = self._match(TokenType.BEGIN)
1171            return_ = self._match_text_seq("RETURN")
1172            expression = self._parse_statement()
1173
1174            if return_:
1175                expression = self.expression(exp.Return, this=expression)
1176        elif create_token.token_type == TokenType.INDEX:
1177            this = self._parse_index()
1178        elif create_token.token_type in self.DB_CREATABLES:
1179            table_parts = self._parse_table_parts(schema=True)
1180
1181            # exp.Properties.Location.POST_NAME
1182            if self._match(TokenType.COMMA):
1183                temp_properties = self._parse_properties(before=True)
1184                if properties and temp_properties:
1185                    properties.expressions.extend(temp_properties.expressions)
1186                elif temp_properties:
1187                    properties = temp_properties
1188
1189            this = self._parse_schema(this=table_parts)
1190
1191            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1192            temp_properties = self._parse_properties()
1193            if properties and temp_properties:
1194                properties.expressions.extend(temp_properties.expressions)
1195            elif temp_properties:
1196                properties = temp_properties
1197
1198            self._match(TokenType.ALIAS)
1199
1200            # exp.Properties.Location.POST_ALIAS
1201            if not (
1202                self._match(TokenType.SELECT, advance=False)
1203                or self._match(TokenType.WITH, advance=False)
1204                or self._match(TokenType.L_PAREN, advance=False)
1205            ):
1206                temp_properties = self._parse_properties()
1207                if properties and temp_properties:
1208                    properties.expressions.extend(temp_properties.expressions)
1209                elif temp_properties:
1210                    properties = temp_properties
1211
1212            expression = self._parse_ddl_select()
1213
1214            if create_token.token_type == TokenType.TABLE:
1215                indexes = []
1216                while True:
1217                    index = self._parse_create_table_index()
1218
1219                    # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX
1220                    temp_properties = self._parse_properties()
1221                    if properties and temp_properties:
1222                        properties.expressions.extend(temp_properties.expressions)
1223                    elif temp_properties:
1224                        properties = temp_properties
1225
1226                    if not index:
1227                        break
1228                    else:
1229                        self._match(TokenType.COMMA)
1230                        indexes.append(index)
1231            elif create_token.token_type == TokenType.VIEW:
1232                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1233                    no_schema_binding = True
1234
1235            if self._match_text_seq("CLONE"):
1236                clone = self._parse_table(schema=True)
1237                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1238                clone_kind = (
1239                    self._match(TokenType.L_PAREN)
1240                    and self._match_texts(self.CLONE_KINDS)
1241                    and self._prev.text.upper()
1242                )
1243                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1244                self._match(TokenType.R_PAREN)
1245                clone = self.expression(
1246                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1247                )
1248
1249        return self.expression(
1250            exp.Create,
1251            this=this,
1252            kind=create_token.text,
1253            replace=replace,
1254            unique=unique,
1255            expression=expression,
1256            exists=exists,
1257            properties=properties,
1258            indexes=indexes,
1259            no_schema_binding=no_schema_binding,
1260            begin=begin,
1261            clone=clone,
1262        )
1263
1264    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1265        self._match(TokenType.COMMA)
1266
1267        # parsers look to _prev for no/dual/default, so need to consume first
1268        self._match_text_seq("NO")
1269        self._match_text_seq("DUAL")
1270        self._match_text_seq("DEFAULT")
1271
1272        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1273            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1274
1275        return None
1276
1277    def _parse_property(self) -> t.Optional[exp.Expression]:
1278        if self._match_texts(self.PROPERTY_PARSERS):
1279            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1280
1281        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1282            return self._parse_character_set(default=True)
1283
1284        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1285            return self._parse_sortkey(compound=True)
1286
1287        if self._match_text_seq("SQL", "SECURITY"):
1288            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1289
1290        assignment = self._match_pair(
1291            TokenType.VAR, TokenType.EQ, advance=False
1292        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1293
1294        if assignment:
1295            key = self._parse_var_or_string()
1296            self._match(TokenType.EQ)
1297            return self.expression(exp.Property, this=key, value=self._parse_column())
1298
1299        return None
1300
1301    def _parse_stored(self) -> exp.Expression:
1302        self._match(TokenType.ALIAS)
1303
1304        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1305        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1306
1307        return self.expression(
1308            exp.FileFormatProperty,
1309            this=self.expression(
1310                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1311            )
1312            if input_format or output_format
1313            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1314        )
1315
1316    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1317        self._match(TokenType.EQ)
1318        self._match(TokenType.ALIAS)
1319        return self.expression(exp_class, this=self._parse_field())
1320
1321    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1322        properties = []
1323
1324        while True:
1325            if before:
1326                identified_property = self._parse_property_before()
1327            else:
1328                identified_property = self._parse_property()
1329
1330            if not identified_property:
1331                break
1332            for p in ensure_list(identified_property):
1333                properties.append(p)
1334
1335        if properties:
1336            return self.expression(exp.Properties, expressions=properties)
1337
1338        return None
1339
1340    def _parse_fallback(self, no=False) -> exp.Expression:
1341        self._match_text_seq("FALLBACK")
1342        return self.expression(
1343            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1344        )
1345
1346    def _parse_volatile_property(self) -> exp.Expression:
1347        if self._index >= 2:
1348            pre_volatile_token = self._tokens[self._index - 2]
1349        else:
1350            pre_volatile_token = None
1351
1352        if pre_volatile_token and pre_volatile_token.token_type in (
1353            TokenType.CREATE,
1354            TokenType.REPLACE,
1355            TokenType.UNIQUE,
1356        ):
1357            return exp.VolatileProperty()
1358
1359        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1360
1361    def _parse_with_property(
1362        self,
1363    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1364        self._match(TokenType.WITH)
1365        if self._match(TokenType.L_PAREN, advance=False):
1366            return self._parse_wrapped_csv(self._parse_property)
1367
1368        if self._match_text_seq("JOURNAL"):
1369            return self._parse_withjournaltable()
1370
1371        if self._match_text_seq("DATA"):
1372            return self._parse_withdata(no=False)
1373        elif self._match_text_seq("NO", "DATA"):
1374            return self._parse_withdata(no=True)
1375
1376        if not self._next:
1377            return None
1378
1379        return self._parse_withisolatedloading()
1380
1381    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1382    def _parse_definer(self) -> t.Optional[exp.Expression]:
1383        self._match(TokenType.EQ)
1384
1385        user = self._parse_id_var()
1386        self._match(TokenType.PARAMETER)
1387        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1388
1389        if not user or not host:
1390            return None
1391
1392        return exp.DefinerProperty(this=f"{user}@{host}")
1393
1394    def _parse_withjournaltable(self) -> exp.Expression:
1395        self._match(TokenType.TABLE)
1396        self._match(TokenType.EQ)
1397        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1398
1399    def _parse_log(self, no=False) -> exp.Expression:
1400        self._match_text_seq("LOG")
1401        return self.expression(exp.LogProperty, no=no)
1402
1403    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1404        before = self._match_text_seq("BEFORE")
1405        self._match_text_seq("JOURNAL")
1406        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1407
1408    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1409        self._match_text_seq("NOT")
1410        self._match_text_seq("LOCAL")
1411        self._match_text_seq("AFTER", "JOURNAL")
1412        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1413
1414    def _parse_checksum(self) -> exp.Expression:
1415        self._match_text_seq("CHECKSUM")
1416        self._match(TokenType.EQ)
1417
1418        on = None
1419        if self._match(TokenType.ON):
1420            on = True
1421        elif self._match_text_seq("OFF"):
1422            on = False
1423        default = self._match(TokenType.DEFAULT)
1424
1425        return self.expression(
1426            exp.ChecksumProperty,
1427            on=on,
1428            default=default,
1429        )
1430
1431    def _parse_freespace(self) -> exp.Expression:
1432        self._match_text_seq("FREESPACE")
1433        self._match(TokenType.EQ)
1434        return self.expression(
1435            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1436        )
1437
1438    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1439        self._match_text_seq("MERGEBLOCKRATIO")
1440        if self._match(TokenType.EQ):
1441            return self.expression(
1442                exp.MergeBlockRatioProperty,
1443                this=self._parse_number(),
1444                percent=self._match(TokenType.PERCENT),
1445            )
1446        else:
1447            return self.expression(
1448                exp.MergeBlockRatioProperty,
1449                no=no,
1450                default=default,
1451            )
1452
1453    def _parse_datablocksize(self, default=None) -> exp.Expression:
1454        if default:
1455            self._match_text_seq("DATABLOCKSIZE")
1456            return self.expression(exp.DataBlocksizeProperty, default=True)
1457        elif self._match_texts(("MIN", "MINIMUM")):
1458            self._match_text_seq("DATABLOCKSIZE")
1459            return self.expression(exp.DataBlocksizeProperty, min=True)
1460        elif self._match_texts(("MAX", "MAXIMUM")):
1461            self._match_text_seq("DATABLOCKSIZE")
1462            return self.expression(exp.DataBlocksizeProperty, min=False)
1463
1464        self._match_text_seq("DATABLOCKSIZE")
1465        self._match(TokenType.EQ)
1466        size = self._parse_number()
1467        units = None
1468        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1469            units = self._prev.text
1470        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1471
1472    def _parse_blockcompression(self) -> exp.Expression:
1473        self._match_text_seq("BLOCKCOMPRESSION")
1474        self._match(TokenType.EQ)
1475        always = self._match_text_seq("ALWAYS")
1476        manual = self._match_text_seq("MANUAL")
1477        never = self._match_text_seq("NEVER")
1478        default = self._match_text_seq("DEFAULT")
1479        autotemp = None
1480        if self._match_text_seq("AUTOTEMP"):
1481            autotemp = self._parse_schema()
1482
1483        return self.expression(
1484            exp.BlockCompressionProperty,
1485            always=always,
1486            manual=manual,
1487            never=never,
1488            default=default,
1489            autotemp=autotemp,
1490        )
1491
1492    def _parse_withisolatedloading(self) -> exp.Expression:
1493        no = self._match_text_seq("NO")
1494        concurrent = self._match_text_seq("CONCURRENT")
1495        self._match_text_seq("ISOLATED", "LOADING")
1496        for_all = self._match_text_seq("FOR", "ALL")
1497        for_insert = self._match_text_seq("FOR", "INSERT")
1498        for_none = self._match_text_seq("FOR", "NONE")
1499        return self.expression(
1500            exp.IsolatedLoadingProperty,
1501            no=no,
1502            concurrent=concurrent,
1503            for_all=for_all,
1504            for_insert=for_insert,
1505            for_none=for_none,
1506        )
1507
1508    def _parse_locking(self) -> exp.Expression:
1509        if self._match(TokenType.TABLE):
1510            kind = "TABLE"
1511        elif self._match(TokenType.VIEW):
1512            kind = "VIEW"
1513        elif self._match(TokenType.ROW):
1514            kind = "ROW"
1515        elif self._match_text_seq("DATABASE"):
1516            kind = "DATABASE"
1517        else:
1518            kind = None
1519
1520        if kind in ("DATABASE", "TABLE", "VIEW"):
1521            this = self._parse_table_parts()
1522        else:
1523            this = None
1524
1525        if self._match(TokenType.FOR):
1526            for_or_in = "FOR"
1527        elif self._match(TokenType.IN):
1528            for_or_in = "IN"
1529        else:
1530            for_or_in = None
1531
1532        if self._match_text_seq("ACCESS"):
1533            lock_type = "ACCESS"
1534        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1535            lock_type = "EXCLUSIVE"
1536        elif self._match_text_seq("SHARE"):
1537            lock_type = "SHARE"
1538        elif self._match_text_seq("READ"):
1539            lock_type = "READ"
1540        elif self._match_text_seq("WRITE"):
1541            lock_type = "WRITE"
1542        elif self._match_text_seq("CHECKSUM"):
1543            lock_type = "CHECKSUM"
1544        else:
1545            lock_type = None
1546
1547        override = self._match_text_seq("OVERRIDE")
1548
1549        return self.expression(
1550            exp.LockingProperty,
1551            this=this,
1552            kind=kind,
1553            for_or_in=for_or_in,
1554            lock_type=lock_type,
1555            override=override,
1556        )
1557
1558    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1559        if self._match(TokenType.PARTITION_BY):
1560            return self._parse_csv(self._parse_conjunction)
1561        return []
1562
1563    def _parse_partitioned_by(self) -> exp.Expression:
1564        self._match(TokenType.EQ)
1565        return self.expression(
1566            exp.PartitionedByProperty,
1567            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1568        )
1569
1570    def _parse_withdata(self, no=False) -> exp.Expression:
1571        if self._match_text_seq("AND", "STATISTICS"):
1572            statistics = True
1573        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1574            statistics = False
1575        else:
1576            statistics = None
1577
1578        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1579
1580    def _parse_noprimaryindex(self) -> exp.Expression:
1581        self._match_text_seq("PRIMARY", "INDEX")
1582        return exp.NoPrimaryIndexProperty()
1583
1584    def _parse_oncommit(self) -> exp.Expression:
1585        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1586            return exp.OnCommitProperty()
1587        return exp.OnCommitProperty(delete=self._match_text_seq("COMMIT", "DELETE", "ROWS"))
1588
1589    def _parse_distkey(self) -> exp.Expression:
1590        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1591
1592    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1593        table = self._parse_table(schema=True)
1594        options = []
1595        while self._match_texts(("INCLUDING", "EXCLUDING")):
1596            this = self._prev.text.upper()
1597            id_var = self._parse_id_var()
1598
1599            if not id_var:
1600                return None
1601
1602            options.append(
1603                self.expression(
1604                    exp.Property,
1605                    this=this,
1606                    value=exp.Var(this=id_var.this.upper()),
1607                )
1608            )
1609        return self.expression(exp.LikeProperty, this=table, expressions=options)
1610
1611    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1612        return self.expression(
1613            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1614        )
1615
1616    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1617        self._match(TokenType.EQ)
1618        return self.expression(
1619            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1620        )
1621
1622    def _parse_returns(self) -> exp.Expression:
1623        value: t.Optional[exp.Expression]
1624        is_table = self._match(TokenType.TABLE)
1625
1626        if is_table:
1627            if self._match(TokenType.LT):
1628                value = self.expression(
1629                    exp.Schema,
1630                    this="TABLE",
1631                    expressions=self._parse_csv(self._parse_struct_types),
1632                )
1633                if not self._match(TokenType.GT):
1634                    self.raise_error("Expecting >")
1635            else:
1636                value = self._parse_schema(exp.Var(this="TABLE"))
1637        else:
1638            value = self._parse_types()
1639
1640        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1641
1642    def _parse_temporary(self, global_=False) -> exp.Expression:
1643        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1644        return self.expression(exp.TemporaryProperty, global_=global_)
1645
1646    def _parse_describe(self) -> exp.Expression:
1647        kind = self._match_set(self.CREATABLES) and self._prev.text
1648        this = self._parse_table()
1649
1650        return self.expression(exp.Describe, this=this, kind=kind)
1651
1652    def _parse_insert(self) -> exp.Expression:
1653        overwrite = self._match(TokenType.OVERWRITE)
1654        local = self._match(TokenType.LOCAL)
1655        alternative = None
1656
1657        if self._match_text_seq("DIRECTORY"):
1658            this: t.Optional[exp.Expression] = self.expression(
1659                exp.Directory,
1660                this=self._parse_var_or_string(),
1661                local=local,
1662                row_format=self._parse_row_format(match_row=True),
1663            )
1664        else:
1665            if self._match(TokenType.OR):
1666                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1667
1668            self._match(TokenType.INTO)
1669            self._match(TokenType.TABLE)
1670            this = self._parse_table(schema=True)
1671
1672        return self.expression(
1673            exp.Insert,
1674            this=this,
1675            exists=self._parse_exists(),
1676            partition=self._parse_partition(),
1677            expression=self._parse_ddl_select(),
1678            conflict=self._parse_on_conflict(),
1679            returning=self._parse_returning(),
1680            overwrite=overwrite,
1681            alternative=alternative,
1682        )
1683
1684    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1685        conflict = self._match_text_seq("ON", "CONFLICT")
1686        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1687
1688        if not (conflict or duplicate):
1689            return None
1690
1691        nothing = None
1692        expressions = None
1693        key = None
1694        constraint = None
1695
1696        if conflict:
1697            if self._match_text_seq("ON", "CONSTRAINT"):
1698                constraint = self._parse_id_var()
1699            else:
1700                key = self._parse_csv(self._parse_value)
1701
1702        self._match_text_seq("DO")
1703        if self._match_text_seq("NOTHING"):
1704            nothing = True
1705        else:
1706            self._match(TokenType.UPDATE)
1707            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1708
1709        return self.expression(
1710            exp.OnConflict,
1711            duplicate=duplicate,
1712            expressions=expressions,
1713            nothing=nothing,
1714            key=key,
1715            constraint=constraint,
1716        )
1717
1718    def _parse_returning(self) -> t.Optional[exp.Expression]:
1719        if not self._match(TokenType.RETURNING):
1720            return None
1721
1722        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1723
1724    def _parse_row(self) -> t.Optional[exp.Expression]:
1725        if not self._match(TokenType.FORMAT):
1726            return None
1727        return self._parse_row_format()
1728
1729    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1730        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1731            return None
1732
1733        if self._match_text_seq("SERDE"):
1734            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1735
1736        self._match_text_seq("DELIMITED")
1737
1738        kwargs = {}
1739
1740        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1741            kwargs["fields"] = self._parse_string()
1742            if self._match_text_seq("ESCAPED", "BY"):
1743                kwargs["escaped"] = self._parse_string()
1744        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1745            kwargs["collection_items"] = self._parse_string()
1746        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1747            kwargs["map_keys"] = self._parse_string()
1748        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1749            kwargs["lines"] = self._parse_string()
1750        if self._match_text_seq("NULL", "DEFINED", "AS"):
1751            kwargs["null"] = self._parse_string()
1752
1753        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1754
1755    def _parse_load_data(self) -> exp.Expression:
1756        local = self._match(TokenType.LOCAL)
1757        self._match_text_seq("INPATH")
1758        inpath = self._parse_string()
1759        overwrite = self._match(TokenType.OVERWRITE)
1760        self._match_pair(TokenType.INTO, TokenType.TABLE)
1761
1762        return self.expression(
1763            exp.LoadData,
1764            this=self._parse_table(schema=True),
1765            local=local,
1766            overwrite=overwrite,
1767            inpath=inpath,
1768            partition=self._parse_partition(),
1769            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1770            serde=self._match_text_seq("SERDE") and self._parse_string(),
1771        )
1772
1773    def _parse_delete(self) -> exp.Expression:
1774        self._match(TokenType.FROM)
1775
1776        return self.expression(
1777            exp.Delete,
1778            this=self._parse_table(),
1779            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1780            where=self._parse_where(),
1781            returning=self._parse_returning(),
1782        )
1783
1784    def _parse_update(self) -> exp.Expression:
1785        return self.expression(
1786            exp.Update,
1787            **{  # type: ignore
1788                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1789                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1790                "from": self._parse_from(modifiers=True),
1791                "where": self._parse_where(),
1792                "returning": self._parse_returning(),
1793            },
1794        )
1795
1796    def _parse_uncache(self) -> exp.Expression:
1797        if not self._match(TokenType.TABLE):
1798            self.raise_error("Expecting TABLE after UNCACHE")
1799
1800        return self.expression(
1801            exp.Uncache,
1802            exists=self._parse_exists(),
1803            this=self._parse_table(schema=True),
1804        )
1805
1806    def _parse_cache(self) -> exp.Expression:
1807        lazy = self._match(TokenType.LAZY)
1808        self._match(TokenType.TABLE)
1809        table = self._parse_table(schema=True)
1810        options = []
1811
1812        if self._match(TokenType.OPTIONS):
1813            self._match_l_paren()
1814            k = self._parse_string()
1815            self._match(TokenType.EQ)
1816            v = self._parse_string()
1817            options = [k, v]
1818            self._match_r_paren()
1819
1820        self._match(TokenType.ALIAS)
1821        return self.expression(
1822            exp.Cache,
1823            this=table,
1824            lazy=lazy,
1825            options=options,
1826            expression=self._parse_select(nested=True),
1827        )
1828
1829    def _parse_partition(self) -> t.Optional[exp.Expression]:
1830        if not self._match(TokenType.PARTITION):
1831            return None
1832
1833        return self.expression(
1834            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1835        )
1836
1837    def _parse_value(self) -> exp.Expression:
1838        if self._match(TokenType.L_PAREN):
1839            expressions = self._parse_csv(self._parse_conjunction)
1840            self._match_r_paren()
1841            return self.expression(exp.Tuple, expressions=expressions)
1842
1843        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1844        # Source: https://prestodb.io/docs/current/sql/values.html
1845        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1846
1847    def _parse_select(
1848        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1849    ) -> t.Optional[exp.Expression]:
1850        cte = self._parse_with()
1851        if cte:
1852            this = self._parse_statement()
1853
1854            if not this:
1855                self.raise_error("Failed to parse any statement following CTE")
1856                return cte
1857
1858            if "with" in this.arg_types:
1859                this.set("with", cte)
1860            else:
1861                self.raise_error(f"{this.key} does not support CTE")
1862                this = cte
1863        elif self._match(TokenType.SELECT):
1864            comments = self._prev_comments
1865
1866            hint = self._parse_hint()
1867            all_ = self._match(TokenType.ALL)
1868            distinct = self._match(TokenType.DISTINCT)
1869
1870            kind = (
1871                self._match(TokenType.ALIAS)
1872                and self._match_texts(("STRUCT", "VALUE"))
1873                and self._prev.text
1874            )
1875
1876            if distinct:
1877                distinct = self.expression(
1878                    exp.Distinct,
1879                    on=self._parse_value() if self._match(TokenType.ON) else None,
1880                )
1881
1882            if all_ and distinct:
1883                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1884
1885            limit = self._parse_limit(top=True)
1886            expressions = self._parse_csv(self._parse_expression)
1887
1888            this = self.expression(
1889                exp.Select,
1890                kind=kind,
1891                hint=hint,
1892                distinct=distinct,
1893                expressions=expressions,
1894                limit=limit,
1895            )
1896            this.comments = comments
1897
1898            into = self._parse_into()
1899            if into:
1900                this.set("into", into)
1901
1902            from_ = self._parse_from()
1903            if from_:
1904                this.set("from", from_)
1905
1906            this = self._parse_query_modifiers(this)
1907        elif (table or nested) and self._match(TokenType.L_PAREN):
1908            this = self._parse_table() if table else self._parse_select(nested=True)
1909            this = self._parse_set_operations(self._parse_query_modifiers(this))
1910            self._match_r_paren()
1911
1912            # early return so that subquery unions aren't parsed again
1913            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1914            # Union ALL should be a property of the top select node, not the subquery
1915            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1916        elif self._match(TokenType.VALUES):
1917            this = self.expression(
1918                exp.Values,
1919                expressions=self._parse_csv(self._parse_value),
1920                alias=self._parse_table_alias(),
1921            )
1922        else:
1923            this = None
1924
1925        return self._parse_set_operations(this)
1926
1927    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1928        if not skip_with_token and not self._match(TokenType.WITH):
1929            return None
1930
1931        comments = self._prev_comments
1932        recursive = self._match(TokenType.RECURSIVE)
1933
1934        expressions = []
1935        while True:
1936            expressions.append(self._parse_cte())
1937
1938            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1939                break
1940            else:
1941                self._match(TokenType.WITH)
1942
1943        return self.expression(
1944            exp.With, comments=comments, expressions=expressions, recursive=recursive
1945        )
1946
1947    def _parse_cte(self) -> exp.Expression:
1948        alias = self._parse_table_alias()
1949        if not alias or not alias.this:
1950            self.raise_error("Expected CTE to have alias")
1951
1952        self._match(TokenType.ALIAS)
1953
1954        return self.expression(
1955            exp.CTE,
1956            this=self._parse_wrapped(self._parse_statement),
1957            alias=alias,
1958        )
1959
1960    def _parse_table_alias(
1961        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1962    ) -> t.Optional[exp.Expression]:
1963        any_token = self._match(TokenType.ALIAS)
1964        alias = (
1965            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1966            or self._parse_string_as_identifier()
1967        )
1968
1969        index = self._index
1970        if self._match(TokenType.L_PAREN):
1971            columns = self._parse_csv(self._parse_function_parameter)
1972            self._match_r_paren() if columns else self._retreat(index)
1973        else:
1974            columns = None
1975
1976        if not alias and not columns:
1977            return None
1978
1979        return self.expression(exp.TableAlias, this=alias, columns=columns)
1980
1981    def _parse_subquery(
1982        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1983    ) -> exp.Expression:
1984        return self.expression(
1985            exp.Subquery,
1986            this=this,
1987            pivots=self._parse_pivots(),
1988            alias=self._parse_table_alias() if parse_alias else None,
1989        )
1990
1991    def _parse_query_modifiers(
1992        self, this: t.Optional[exp.Expression]
1993    ) -> t.Optional[exp.Expression]:
1994        if isinstance(this, self.MODIFIABLES):
1995            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1996                expression = parser(self)
1997
1998                if expression:
1999                    this.set(key, expression)
2000        return this
2001
2002    def _parse_hint(self) -> t.Optional[exp.Expression]:
2003        if self._match(TokenType.HINT):
2004            hints = self._parse_csv(self._parse_function)
2005            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2006                self.raise_error("Expected */ after HINT")
2007            return self.expression(exp.Hint, expressions=hints)
2008
2009        return None
2010
2011    def _parse_into(self) -> t.Optional[exp.Expression]:
2012        if not self._match(TokenType.INTO):
2013            return None
2014
2015        temp = self._match(TokenType.TEMPORARY)
2016        unlogged = self._match(TokenType.UNLOGGED)
2017        self._match(TokenType.TABLE)
2018
2019        return self.expression(
2020            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2021        )
2022
2023    def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]:
2024        if not self._match(TokenType.FROM):
2025            return None
2026
2027        comments = self._prev_comments
2028        this = self._parse_table()
2029
2030        return self.expression(
2031            exp.From,
2032            comments=comments,
2033            this=self._parse_query_modifiers(this) if modifiers else this,
2034        )
2035
2036    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2037        if not self._match(TokenType.MATCH_RECOGNIZE):
2038            return None
2039
2040        self._match_l_paren()
2041
2042        partition = self._parse_partition_by()
2043        order = self._parse_order()
2044        measures = (
2045            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2046        )
2047
2048        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2049            rows = exp.Var(this="ONE ROW PER MATCH")
2050        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2051            text = "ALL ROWS PER MATCH"
2052            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2053                text += f" SHOW EMPTY MATCHES"
2054            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2055                text += f" OMIT EMPTY MATCHES"
2056            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2057                text += f" WITH UNMATCHED ROWS"
2058            rows = exp.Var(this=text)
2059        else:
2060            rows = None
2061
2062        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2063            text = "AFTER MATCH SKIP"
2064            if self._match_text_seq("PAST", "LAST", "ROW"):
2065                text += f" PAST LAST ROW"
2066            elif self._match_text_seq("TO", "NEXT", "ROW"):
2067                text += f" TO NEXT ROW"
2068            elif self._match_text_seq("TO", "FIRST"):
2069                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2070            elif self._match_text_seq("TO", "LAST"):
2071                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2072            after = exp.Var(this=text)
2073        else:
2074            after = None
2075
2076        if self._match_text_seq("PATTERN"):
2077            self._match_l_paren()
2078
2079            if not self._curr:
2080                self.raise_error("Expecting )", self._curr)
2081
2082            paren = 1
2083            start = self._curr
2084
2085            while self._curr and paren > 0:
2086                if self._curr.token_type == TokenType.L_PAREN:
2087                    paren += 1
2088                if self._curr.token_type == TokenType.R_PAREN:
2089                    paren -= 1
2090                end = self._prev
2091                self._advance()
2092            if paren > 0:
2093                self.raise_error("Expecting )", self._curr)
2094            pattern = exp.Var(this=self._find_sql(start, end))
2095        else:
2096            pattern = None
2097
2098        define = (
2099            self._parse_csv(
2100                lambda: self.expression(
2101                    exp.Alias,
2102                    alias=self._parse_id_var(any_token=True),
2103                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2104                )
2105            )
2106            if self._match_text_seq("DEFINE")
2107            else None
2108        )
2109
2110        self._match_r_paren()
2111
2112        return self.expression(
2113            exp.MatchRecognize,
2114            partition_by=partition,
2115            order=order,
2116            measures=measures,
2117            rows=rows,
2118            after=after,
2119            pattern=pattern,
2120            define=define,
2121            alias=self._parse_table_alias(),
2122        )
2123
2124    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2125        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2126        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2127
2128        if outer_apply or cross_apply:
2129            this = self._parse_select(table=True)
2130            view = None
2131            outer = not cross_apply
2132        elif self._match(TokenType.LATERAL):
2133            this = self._parse_select(table=True)
2134            view = self._match(TokenType.VIEW)
2135            outer = self._match(TokenType.OUTER)
2136        else:
2137            return None
2138
2139        if not this:
2140            this = self._parse_function() or self._parse_id_var(any_token=False)
2141            while self._match(TokenType.DOT):
2142                this = exp.Dot(
2143                    this=this,
2144                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2145                )
2146
2147        table_alias: t.Optional[exp.Expression]
2148
2149        if view:
2150            table = self._parse_id_var(any_token=False)
2151            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2152            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2153        else:
2154            table_alias = self._parse_table_alias()
2155
2156        expression = self.expression(
2157            exp.Lateral,
2158            this=this,
2159            view=view,
2160            outer=outer,
2161            alias=table_alias,
2162        )
2163
2164        return expression
2165
2166    def _parse_join_side_and_kind(
2167        self,
2168    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2169        return (
2170            self._match(TokenType.NATURAL) and self._prev,
2171            self._match_set(self.JOIN_SIDES) and self._prev,
2172            self._match_set(self.JOIN_KINDS) and self._prev,
2173        )
2174
2175    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2176        if self._match(TokenType.COMMA):
2177            return self.expression(exp.Join, this=self._parse_table())
2178
2179        index = self._index
2180        natural, side, kind = self._parse_join_side_and_kind()
2181        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2182        join = self._match(TokenType.JOIN)
2183
2184        if not skip_join_token and not join:
2185            self._retreat(index)
2186            kind = None
2187            natural = None
2188            side = None
2189
2190        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2191        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2192
2193        if not skip_join_token and not join and not outer_apply and not cross_apply:
2194            return None
2195
2196        if outer_apply:
2197            side = Token(TokenType.LEFT, "LEFT")
2198
2199        kwargs: t.Dict[
2200            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2201        ] = {"this": self._parse_table()}
2202
2203        if natural:
2204            kwargs["natural"] = True
2205        if side:
2206            kwargs["side"] = side.text
2207        if kind:
2208            kwargs["kind"] = kind.text
2209        if hint:
2210            kwargs["hint"] = hint
2211
2212        if self._match(TokenType.ON):
2213            kwargs["on"] = self._parse_conjunction()
2214        elif self._match(TokenType.USING):
2215            kwargs["using"] = self._parse_wrapped_id_vars()
2216
2217        return self.expression(exp.Join, **kwargs)  # type: ignore
2218
2219    def _parse_index(self) -> exp.Expression:
2220        index = self._parse_id_var()
2221        self._match(TokenType.ON)
2222        self._match(TokenType.TABLE)  # hive
2223
2224        return self.expression(
2225            exp.Index,
2226            this=index,
2227            table=self.expression(exp.Table, this=self._parse_id_var()),
2228            columns=self._parse_expression(),
2229        )
2230
2231    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2232        unique = self._match(TokenType.UNIQUE)
2233        primary = self._match_text_seq("PRIMARY")
2234        amp = self._match_text_seq("AMP")
2235        if not self._match(TokenType.INDEX):
2236            return None
2237        index = self._parse_id_var()
2238        columns = None
2239        if self._match(TokenType.L_PAREN, advance=False):
2240            columns = self._parse_wrapped_csv(self._parse_column)
2241        return self.expression(
2242            exp.Index,
2243            this=index,
2244            columns=columns,
2245            unique=unique,
2246            primary=primary,
2247            amp=amp,
2248        )
2249
2250    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2251        return (
2252            (not schema and self._parse_function())
2253            or self._parse_id_var(any_token=False)
2254            or self._parse_string_as_identifier()
2255            or self._parse_placeholder()
2256        )
2257
2258    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2259        catalog = None
2260        db = None
2261        table = self._parse_table_part(schema=schema)
2262
2263        while self._match(TokenType.DOT):
2264            if catalog:
2265                # This allows nesting the table in arbitrarily many dot expressions if needed
2266                table = self.expression(
2267                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2268                )
2269            else:
2270                catalog = db
2271                db = table
2272                table = self._parse_table_part(schema=schema)
2273
2274        if not table:
2275            self.raise_error(f"Expected table name but got {self._curr}")
2276
2277        return self.expression(
2278            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2279        )
2280
2281    def _parse_table(
2282        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2283    ) -> t.Optional[exp.Expression]:
2284        lateral = self._parse_lateral()
2285        if lateral:
2286            return lateral
2287
2288        unnest = self._parse_unnest()
2289        if unnest:
2290            return unnest
2291
2292        values = self._parse_derived_table_values()
2293        if values:
2294            return values
2295
2296        subquery = self._parse_select(table=True)
2297        if subquery:
2298            if not subquery.args.get("pivots"):
2299                subquery.set("pivots", self._parse_pivots())
2300            return subquery
2301
2302        this = self._parse_table_parts(schema=schema)
2303
2304        if schema:
2305            return self._parse_schema(this=this)
2306
2307        if self.alias_post_tablesample:
2308            table_sample = self._parse_table_sample()
2309
2310        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2311        if alias:
2312            this.set("alias", alias)
2313
2314        if not this.args.get("pivots"):
2315            this.set("pivots", self._parse_pivots())
2316
2317        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2318            this.set(
2319                "hints",
2320                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2321            )
2322            self._match_r_paren()
2323
2324        if not self.alias_post_tablesample:
2325            table_sample = self._parse_table_sample()
2326
2327        if table_sample:
2328            table_sample.set("this", this)
2329            this = table_sample
2330
2331        return this
2332
2333    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2334        if not self._match(TokenType.UNNEST):
2335            return None
2336
2337        expressions = self._parse_wrapped_csv(self._parse_type)
2338        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2339        alias = self._parse_table_alias()
2340
2341        if alias and self.unnest_column_only:
2342            if alias.args.get("columns"):
2343                self.raise_error("Unexpected extra column alias in unnest.")
2344            alias.set("columns", [alias.this])
2345            alias.set("this", None)
2346
2347        offset = None
2348        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2349            self._match(TokenType.ALIAS)
2350            offset = self._parse_id_var() or exp.Identifier(this="offset")
2351
2352        return self.expression(
2353            exp.Unnest,
2354            expressions=expressions,
2355            ordinality=ordinality,
2356            alias=alias,
2357            offset=offset,
2358        )
2359
2360    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2361        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2362        if not is_derived and not self._match(TokenType.VALUES):
2363            return None
2364
2365        expressions = self._parse_csv(self._parse_value)
2366
2367        if is_derived:
2368            self._match_r_paren()
2369
2370        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2371
2372    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2373        if not self._match(TokenType.TABLE_SAMPLE) and not (
2374            as_modifier and self._match_text_seq("USING", "SAMPLE")
2375        ):
2376            return None
2377
2378        bucket_numerator = None
2379        bucket_denominator = None
2380        bucket_field = None
2381        percent = None
2382        rows = None
2383        size = None
2384        seed = None
2385
2386        kind = (
2387            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2388        )
2389        method = self._parse_var(tokens=(TokenType.ROW,))
2390
2391        self._match(TokenType.L_PAREN)
2392
2393        num = self._parse_number()
2394
2395        if self._match(TokenType.BUCKET):
2396            bucket_numerator = self._parse_number()
2397            self._match(TokenType.OUT_OF)
2398            bucket_denominator = bucket_denominator = self._parse_number()
2399            self._match(TokenType.ON)
2400            bucket_field = self._parse_field()
2401        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2402            percent = num
2403        elif self._match(TokenType.ROWS):
2404            rows = num
2405        else:
2406            size = num
2407
2408        self._match(TokenType.R_PAREN)
2409
2410        if self._match(TokenType.L_PAREN):
2411            method = self._parse_var()
2412            seed = self._match(TokenType.COMMA) and self._parse_number()
2413            self._match_r_paren()
2414        elif self._match_texts(("SEED", "REPEATABLE")):
2415            seed = self._parse_wrapped(self._parse_number)
2416
2417        return self.expression(
2418            exp.TableSample,
2419            method=method,
2420            bucket_numerator=bucket_numerator,
2421            bucket_denominator=bucket_denominator,
2422            bucket_field=bucket_field,
2423            percent=percent,
2424            rows=rows,
2425            size=size,
2426            seed=seed,
2427            kind=kind,
2428        )
2429
2430    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2431        return list(iter(self._parse_pivot, None))
2432
2433    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2434        index = self._index
2435
2436        if self._match(TokenType.PIVOT):
2437            unpivot = False
2438        elif self._match(TokenType.UNPIVOT):
2439            unpivot = True
2440        else:
2441            return None
2442
2443        expressions = []
2444        field = None
2445
2446        if not self._match(TokenType.L_PAREN):
2447            self._retreat(index)
2448            return None
2449
2450        if unpivot:
2451            expressions = self._parse_csv(self._parse_column)
2452        else:
2453            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2454
2455        if not expressions:
2456            self.raise_error("Failed to parse PIVOT's aggregation list")
2457
2458        if not self._match(TokenType.FOR):
2459            self.raise_error("Expecting FOR")
2460
2461        value = self._parse_column()
2462
2463        if not self._match(TokenType.IN):
2464            self.raise_error("Expecting IN")
2465
2466        field = self._parse_in(value)
2467
2468        self._match_r_paren()
2469
2470        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2471
2472        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2473            pivot.set("alias", self._parse_table_alias())
2474
2475        if not unpivot:
2476            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2477
2478            columns: t.List[exp.Expression] = []
2479            for fld in pivot.args["field"].expressions:
2480                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2481                for name in names:
2482                    if self.PREFIXED_PIVOT_COLUMNS:
2483                        name = f"{name}_{field_name}" if name else field_name
2484                    else:
2485                        name = f"{field_name}_{name}" if name else field_name
2486
2487                    columns.append(exp.to_identifier(name))
2488
2489            pivot.set("columns", columns)
2490
2491        return pivot
2492
2493    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2494        return [agg.alias for agg in aggregations]
2495
2496    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2497        if not skip_where_token and not self._match(TokenType.WHERE):
2498            return None
2499
2500        return self.expression(
2501            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2502        )
2503
2504    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2505        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2506            return None
2507
2508        elements = defaultdict(list)
2509
2510        while True:
2511            expressions = self._parse_csv(self._parse_conjunction)
2512            if expressions:
2513                elements["expressions"].extend(expressions)
2514
2515            grouping_sets = self._parse_grouping_sets()
2516            if grouping_sets:
2517                elements["grouping_sets"].extend(grouping_sets)
2518
2519            rollup = None
2520            cube = None
2521            totals = None
2522
2523            with_ = self._match(TokenType.WITH)
2524            if self._match(TokenType.ROLLUP):
2525                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2526                elements["rollup"].extend(ensure_list(rollup))
2527
2528            if self._match(TokenType.CUBE):
2529                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2530                elements["cube"].extend(ensure_list(cube))
2531
2532            if self._match_text_seq("TOTALS"):
2533                totals = True
2534                elements["totals"] = True  # type: ignore
2535
2536            if not (grouping_sets or rollup or cube or totals):
2537                break
2538
2539        return self.expression(exp.Group, **elements)  # type: ignore
2540
2541    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2542        if not self._match(TokenType.GROUPING_SETS):
2543            return None
2544
2545        return self._parse_wrapped_csv(self._parse_grouping_set)
2546
2547    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2548        if self._match(TokenType.L_PAREN):
2549            grouping_set = self._parse_csv(self._parse_column)
2550            self._match_r_paren()
2551            return self.expression(exp.Tuple, expressions=grouping_set)
2552
2553        return self._parse_column()
2554
2555    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2556        if not skip_having_token and not self._match(TokenType.HAVING):
2557            return None
2558        return self.expression(exp.Having, this=self._parse_conjunction())
2559
2560    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2561        if not self._match(TokenType.QUALIFY):
2562            return None
2563        return self.expression(exp.Qualify, this=self._parse_conjunction())
2564
2565    def _parse_order(
2566        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2567    ) -> t.Optional[exp.Expression]:
2568        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2569            return this
2570
2571        return self.expression(
2572            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2573        )
2574
2575    def _parse_sort(
2576        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2577    ) -> t.Optional[exp.Expression]:
2578        if not self._match(token_type):
2579            return None
2580        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2581
2582    def _parse_ordered(self) -> exp.Expression:
2583        this = self._parse_conjunction()
2584        self._match(TokenType.ASC)
2585        is_desc = self._match(TokenType.DESC)
2586        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2587        is_nulls_last = self._match(TokenType.NULLS_LAST)
2588        desc = is_desc or False
2589        asc = not desc
2590        nulls_first = is_nulls_first or False
2591        explicitly_null_ordered = is_nulls_first or is_nulls_last
2592        if (
2593            not explicitly_null_ordered
2594            and (
2595                (asc and self.null_ordering == "nulls_are_small")
2596                or (desc and self.null_ordering != "nulls_are_small")
2597            )
2598            and self.null_ordering != "nulls_are_last"
2599        ):
2600            nulls_first = True
2601
2602        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2603
2604    def _parse_limit(
2605        self, this: t.Optional[exp.Expression] = None, top: bool = False
2606    ) -> t.Optional[exp.Expression]:
2607        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2608            limit_paren = self._match(TokenType.L_PAREN)
2609            limit_exp = self.expression(
2610                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2611            )
2612
2613            if limit_paren:
2614                self._match_r_paren()
2615
2616            return limit_exp
2617
2618        if self._match(TokenType.FETCH):
2619            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2620            direction = self._prev.text if direction else "FIRST"
2621
2622            count = self._parse_number()
2623            percent = self._match(TokenType.PERCENT)
2624
2625            self._match_set((TokenType.ROW, TokenType.ROWS))
2626
2627            only = self._match(TokenType.ONLY)
2628            with_ties = self._match_text_seq("WITH", "TIES")
2629
2630            if only and with_ties:
2631                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2632
2633            return self.expression(
2634                exp.Fetch,
2635                direction=direction,
2636                count=count,
2637                percent=percent,
2638                with_ties=with_ties,
2639            )
2640
2641        return this
2642
2643    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2644        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2645            return this
2646
2647        count = self._parse_number()
2648        self._match_set((TokenType.ROW, TokenType.ROWS))
2649        return self.expression(exp.Offset, this=this, expression=count)
2650
2651    def _parse_locks(self) -> t.List[exp.Expression]:
2652        # Lists are invariant, so we need to use a type hint here
2653        locks: t.List[exp.Expression] = []
2654
2655        while True:
2656            if self._match_text_seq("FOR", "UPDATE"):
2657                update = True
2658            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2659                "LOCK", "IN", "SHARE", "MODE"
2660            ):
2661                update = False
2662            else:
2663                break
2664
2665            expressions = None
2666            if self._match_text_seq("OF"):
2667                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2668
2669            wait: t.Optional[bool | exp.Expression] = None
2670            if self._match_text_seq("NOWAIT"):
2671                wait = True
2672            elif self._match_text_seq("WAIT"):
2673                wait = self._parse_primary()
2674            elif self._match_text_seq("SKIP", "LOCKED"):
2675                wait = False
2676
2677            locks.append(
2678                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2679            )
2680
2681        return locks
2682
2683    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2684        if not self._match_set(self.SET_OPERATIONS):
2685            return this
2686
2687        token_type = self._prev.token_type
2688
2689        if token_type == TokenType.UNION:
2690            expression = exp.Union
2691        elif token_type == TokenType.EXCEPT:
2692            expression = exp.Except
2693        else:
2694            expression = exp.Intersect
2695
2696        return self.expression(
2697            expression,
2698            this=this,
2699            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2700            expression=self._parse_set_operations(self._parse_select(nested=True)),
2701        )
2702
2703    def _parse_expression(self) -> t.Optional[exp.Expression]:
2704        return self._parse_alias(self._parse_conjunction())
2705
2706    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2707        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2708
2709    def _parse_equality(self) -> t.Optional[exp.Expression]:
2710        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2711
2712    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2713        return self._parse_tokens(self._parse_range, self.COMPARISON)
2714
2715    def _parse_range(self) -> t.Optional[exp.Expression]:
2716        this = self._parse_bitwise()
2717        negate = self._match(TokenType.NOT)
2718
2719        if self._match_set(self.RANGE_PARSERS):
2720            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2721            if not expression:
2722                return this
2723
2724            this = expression
2725        elif self._match(TokenType.ISNULL):
2726            this = self.expression(exp.Is, this=this, expression=exp.Null())
2727
2728        # Postgres supports ISNULL and NOTNULL for conditions.
2729        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2730        if self._match(TokenType.NOTNULL):
2731            this = self.expression(exp.Is, this=this, expression=exp.Null())
2732            this = self.expression(exp.Not, this=this)
2733
2734        if negate:
2735            this = self.expression(exp.Not, this=this)
2736
2737        if self._match(TokenType.IS):
2738            this = self._parse_is(this)
2739
2740        return this
2741
2742    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2743        index = self._index - 1
2744        negate = self._match(TokenType.NOT)
2745        if self._match(TokenType.DISTINCT_FROM):
2746            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2747            return self.expression(klass, this=this, expression=self._parse_expression())
2748
2749        expression = self._parse_null() or self._parse_boolean()
2750        if not expression:
2751            self._retreat(index)
2752            return None
2753
2754        this = self.expression(exp.Is, this=this, expression=expression)
2755        return self.expression(exp.Not, this=this) if negate else this
2756
2757    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2758        unnest = self._parse_unnest()
2759        if unnest:
2760            this = self.expression(exp.In, this=this, unnest=unnest)
2761        elif self._match(TokenType.L_PAREN):
2762            expressions = self._parse_csv(self._parse_select_or_expression)
2763
2764            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2765                this = self.expression(exp.In, this=this, query=expressions[0])
2766            else:
2767                this = self.expression(exp.In, this=this, expressions=expressions)
2768
2769            self._match_r_paren(this)
2770        else:
2771            this = self.expression(exp.In, this=this, field=self._parse_field())
2772
2773        return this
2774
2775    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2776        low = self._parse_bitwise()
2777        self._match(TokenType.AND)
2778        high = self._parse_bitwise()
2779        return self.expression(exp.Between, this=this, low=low, high=high)
2780
2781    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2782        if not self._match(TokenType.ESCAPE):
2783            return this
2784        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2785
2786    def _parse_interval(self) -> t.Optional[exp.Expression]:
2787        if not self._match(TokenType.INTERVAL):
2788            return None
2789
2790        this = self._parse_primary() or self._parse_term()
2791        unit = self._parse_function() or self._parse_var()
2792
2793        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2794        # each INTERVAL expression into this canonical form so it's easy to transpile
2795        if this and isinstance(this, exp.Literal):
2796            if this.is_number:
2797                this = exp.Literal.string(this.name)
2798
2799            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2800            parts = this.name.split()
2801            if not unit and len(parts) <= 2:
2802                this = exp.Literal.string(seq_get(parts, 0))
2803                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2804
2805        return self.expression(exp.Interval, this=this, unit=unit)
2806
2807    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2808        this = self._parse_term()
2809
2810        while True:
2811            if self._match_set(self.BITWISE):
2812                this = self.expression(
2813                    self.BITWISE[self._prev.token_type],
2814                    this=this,
2815                    expression=self._parse_term(),
2816                )
2817            elif self._match_pair(TokenType.LT, TokenType.LT):
2818                this = self.expression(
2819                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2820                )
2821            elif self._match_pair(TokenType.GT, TokenType.GT):
2822                this = self.expression(
2823                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2824                )
2825            else:
2826                break
2827
2828        return this
2829
2830    def _parse_term(self) -> t.Optional[exp.Expression]:
2831        return self._parse_tokens(self._parse_factor, self.TERM)
2832
2833    def _parse_factor(self) -> t.Optional[exp.Expression]:
2834        return self._parse_tokens(self._parse_unary, self.FACTOR)
2835
2836    def _parse_unary(self) -> t.Optional[exp.Expression]:
2837        if self._match_set(self.UNARY_PARSERS):
2838            return self.UNARY_PARSERS[self._prev.token_type](self)
2839        return self._parse_at_time_zone(self._parse_type())
2840
2841    def _parse_type(self) -> t.Optional[exp.Expression]:
2842        interval = self._parse_interval()
2843        if interval:
2844            return interval
2845
2846        index = self._index
2847        data_type = self._parse_types(check_func=True)
2848        this = self._parse_column()
2849
2850        if data_type:
2851            if isinstance(this, exp.Literal):
2852                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2853                if parser:
2854                    return parser(self, this, data_type)
2855                return self.expression(exp.Cast, this=this, to=data_type)
2856            if not data_type.expressions:
2857                self._retreat(index)
2858                return self._parse_column()
2859            return data_type
2860
2861        return this
2862
2863    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2864        this = self._parse_type()
2865        if not this:
2866            return None
2867
2868        return self.expression(
2869            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2870        )
2871
2872    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2873        index = self._index
2874
2875        prefix = self._match_text_seq("SYSUDTLIB", ".")
2876
2877        if not self._match_set(self.TYPE_TOKENS):
2878            return None
2879
2880        type_token = self._prev.token_type
2881
2882        if type_token == TokenType.PSEUDO_TYPE:
2883            return self.expression(exp.PseudoType, this=self._prev.text)
2884
2885        nested = type_token in self.NESTED_TYPE_TOKENS
2886        is_struct = type_token == TokenType.STRUCT
2887        expressions = None
2888        maybe_func = False
2889
2890        if self._match(TokenType.L_PAREN):
2891            if is_struct:
2892                expressions = self._parse_csv(self._parse_struct_types)
2893            elif nested:
2894                expressions = self._parse_csv(self._parse_types)
2895            else:
2896                expressions = self._parse_csv(self._parse_type_size)
2897
2898            if not expressions or not self._match(TokenType.R_PAREN):
2899                self._retreat(index)
2900                return None
2901
2902            maybe_func = True
2903
2904        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2905            this = exp.DataType(
2906                this=exp.DataType.Type.ARRAY,
2907                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2908                nested=True,
2909            )
2910
2911            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2912                this = exp.DataType(
2913                    this=exp.DataType.Type.ARRAY,
2914                    expressions=[this],
2915                    nested=True,
2916                )
2917
2918            return this
2919
2920        if self._match(TokenType.L_BRACKET):
2921            self._retreat(index)
2922            return None
2923
2924        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2925        if nested and self._match(TokenType.LT):
2926            if is_struct:
2927                expressions = self._parse_csv(self._parse_struct_types)
2928            else:
2929                expressions = self._parse_csv(self._parse_types)
2930
2931            if not self._match(TokenType.GT):
2932                self.raise_error("Expecting >")
2933
2934            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2935                values = self._parse_csv(self._parse_conjunction)
2936                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2937
2938        value: t.Optional[exp.Expression] = None
2939        if type_token in self.TIMESTAMPS:
2940            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2941                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2942            elif (
2943                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2944            ):
2945                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2946            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2947                if type_token == TokenType.TIME:
2948                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2949                else:
2950                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2951
2952            maybe_func = maybe_func and value is None
2953
2954            if value is None:
2955                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2956        elif type_token == TokenType.INTERVAL:
2957            unit = self._parse_var()
2958
2959            if not unit:
2960                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2961            else:
2962                value = self.expression(exp.Interval, unit=unit)
2963
2964        if maybe_func and check_func:
2965            index2 = self._index
2966            peek = self._parse_string()
2967
2968            if not peek:
2969                self._retreat(index)
2970                return None
2971
2972            self._retreat(index2)
2973
2974        if value:
2975            return value
2976
2977        return exp.DataType(
2978            this=exp.DataType.Type[type_token.value.upper()],
2979            expressions=expressions,
2980            nested=nested,
2981            values=values,
2982            prefix=prefix,
2983        )
2984
2985    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
2986        this = self._parse_type() or self._parse_id_var()
2987        self._match(TokenType.COLON)
2988        return self._parse_column_def(this)
2989
2990    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2991        if not self._match(TokenType.AT_TIME_ZONE):
2992            return this
2993        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2994
2995    def _parse_column(self) -> t.Optional[exp.Expression]:
2996        this = self._parse_field()
2997        if isinstance(this, exp.Identifier):
2998            this = self.expression(exp.Column, this=this)
2999        elif not this:
3000            return self._parse_bracket(this)
3001        this = self._parse_bracket(this)
3002
3003        while self._match_set(self.COLUMN_OPERATORS):
3004            op_token = self._prev.token_type
3005            op = self.COLUMN_OPERATORS.get(op_token)
3006
3007            if op_token == TokenType.DCOLON:
3008                field = self._parse_types()
3009                if not field:
3010                    self.raise_error("Expected type")
3011            elif op and self._curr:
3012                self._advance()
3013                value = self._prev.text
3014                field = (
3015                    exp.Literal.number(value)
3016                    if self._prev.token_type == TokenType.NUMBER
3017                    else exp.Literal.string(value)
3018                )
3019            else:
3020                field = (
3021                    self._parse_star()
3022                    or self._parse_function(anonymous=True)
3023                    or self._parse_id_var()
3024                )
3025
3026            if isinstance(field, exp.Func):
3027                # bigquery allows function calls like x.y.count(...)
3028                # SAFE.SUBSTR(...)
3029                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3030                this = self._replace_columns_with_dots(this)
3031
3032            if op:
3033                this = op(self, this, field)
3034            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3035                this = self.expression(
3036                    exp.Column,
3037                    this=field,
3038                    table=this.this,
3039                    db=this.args.get("table"),
3040                    catalog=this.args.get("db"),
3041                )
3042            else:
3043                this = self.expression(exp.Dot, this=this, expression=field)
3044            this = self._parse_bracket(this)
3045
3046        return this
3047
3048    def _parse_primary(self) -> t.Optional[exp.Expression]:
3049        if self._match_set(self.PRIMARY_PARSERS):
3050            token_type = self._prev.token_type
3051            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3052
3053            if token_type == TokenType.STRING:
3054                expressions = [primary]
3055                while self._match(TokenType.STRING):
3056                    expressions.append(exp.Literal.string(self._prev.text))
3057                if len(expressions) > 1:
3058                    return self.expression(exp.Concat, expressions=expressions)
3059            return primary
3060
3061        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3062            return exp.Literal.number(f"0.{self._prev.text}")
3063
3064        if self._match(TokenType.L_PAREN):
3065            comments = self._prev_comments
3066            query = self._parse_select()
3067
3068            if query:
3069                expressions = [query]
3070            else:
3071                expressions = self._parse_csv(self._parse_expression)
3072
3073            this = self._parse_query_modifiers(seq_get(expressions, 0))
3074
3075            if isinstance(this, exp.Subqueryable):
3076                this = self._parse_set_operations(
3077                    self._parse_subquery(this=this, parse_alias=False)
3078                )
3079            elif len(expressions) > 1:
3080                this = self.expression(exp.Tuple, expressions=expressions)
3081            else:
3082                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3083
3084            if this:
3085                this.add_comments(comments)
3086            self._match_r_paren(expression=this)
3087
3088            return this
3089
3090        return None
3091
3092    def _parse_field(
3093        self,
3094        any_token: bool = False,
3095        tokens: t.Optional[t.Collection[TokenType]] = None,
3096    ) -> t.Optional[exp.Expression]:
3097        return (
3098            self._parse_primary()
3099            or self._parse_function()
3100            or self._parse_id_var(any_token=any_token, tokens=tokens)
3101        )
3102
3103    def _parse_function(
3104        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3105    ) -> t.Optional[exp.Expression]:
3106        if not self._curr:
3107            return None
3108
3109        token_type = self._curr.token_type
3110
3111        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3112            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3113
3114        if not self._next or self._next.token_type != TokenType.L_PAREN:
3115            if token_type in self.NO_PAREN_FUNCTIONS:
3116                self._advance()
3117                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3118
3119            return None
3120
3121        if token_type not in self.FUNC_TOKENS:
3122            return None
3123
3124        this = self._curr.text
3125        upper = this.upper()
3126        self._advance(2)
3127
3128        parser = self.FUNCTION_PARSERS.get(upper)
3129
3130        if parser and not anonymous:
3131            this = parser(self)
3132        else:
3133            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3134
3135            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3136                this = self.expression(subquery_predicate, this=self._parse_select())
3137                self._match_r_paren()
3138                return this
3139
3140            if functions is None:
3141                functions = self.FUNCTIONS
3142
3143            function = functions.get(upper)
3144            args = self._parse_csv(self._parse_lambda)
3145
3146            if function and not anonymous:
3147                this = function(args)
3148                self.validate_expression(this, args)
3149            else:
3150                this = self.expression(exp.Anonymous, this=this, expressions=args)
3151
3152        self._match_r_paren(this)
3153        return self._parse_window(this)
3154
3155    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3156        return self._parse_column_def(self._parse_id_var())
3157
3158    def _parse_user_defined_function(
3159        self, kind: t.Optional[TokenType] = None
3160    ) -> t.Optional[exp.Expression]:
3161        this = self._parse_id_var()
3162
3163        while self._match(TokenType.DOT):
3164            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3165
3166        if not self._match(TokenType.L_PAREN):
3167            return this
3168
3169        expressions = self._parse_csv(self._parse_function_parameter)
3170        self._match_r_paren()
3171        return self.expression(
3172            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3173        )
3174
3175    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3176        literal = self._parse_primary()
3177        if literal:
3178            return self.expression(exp.Introducer, this=token.text, expression=literal)
3179
3180        return self.expression(exp.Identifier, this=token.text)
3181
3182    def _parse_national(self, token: Token) -> exp.Expression:
3183        return self.expression(exp.National, this=exp.Literal.string(token.text))
3184
3185    def _parse_session_parameter(self) -> exp.Expression:
3186        kind = None
3187        this = self._parse_id_var() or self._parse_primary()
3188
3189        if this and self._match(TokenType.DOT):
3190            kind = this.name
3191            this = self._parse_var() or self._parse_primary()
3192
3193        return self.expression(exp.SessionParameter, this=this, kind=kind)
3194
3195    def _parse_lambda(self) -> t.Optional[exp.Expression]:
3196        index = self._index
3197
3198        if self._match(TokenType.L_PAREN):
3199            expressions = self._parse_csv(self._parse_id_var)
3200
3201            if not self._match(TokenType.R_PAREN):
3202                self._retreat(index)
3203        else:
3204            expressions = [self._parse_id_var()]
3205
3206        if self._match_set(self.LAMBDAS):
3207            return self.LAMBDAS[self._prev.token_type](self, expressions)
3208
3209        self._retreat(index)
3210
3211        this: t.Optional[exp.Expression]
3212
3213        if self._match(TokenType.DISTINCT):
3214            this = self.expression(
3215                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3216            )
3217        else:
3218            this = self._parse_select_or_expression()
3219
3220            if isinstance(this, exp.EQ):
3221                left = this.this
3222                if isinstance(left, exp.Column):
3223                    left.replace(exp.Var(this=left.text("this")))
3224
3225        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3226
3227    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3228        index = self._index
3229
3230        try:
3231            if self._parse_select(nested=True):
3232                return this
3233        except Exception:
3234            pass
3235        finally:
3236            self._retreat(index)
3237
3238        if not self._match(TokenType.L_PAREN):
3239            return this
3240
3241        args = self._parse_csv(
3242            lambda: self._parse_constraint()
3243            or self._parse_column_def(self._parse_field(any_token=True))
3244        )
3245        self._match_r_paren()
3246        return self.expression(exp.Schema, this=this, expressions=args)
3247
3248    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3249        # column defs are not really columns, they're identifiers
3250        if isinstance(this, exp.Column):
3251            this = this.this
3252        kind = self._parse_types()
3253
3254        if self._match_text_seq("FOR", "ORDINALITY"):
3255            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3256
3257        constraints = []
3258        while True:
3259            constraint = self._parse_column_constraint()
3260            if not constraint:
3261                break
3262            constraints.append(constraint)
3263
3264        if not kind and not constraints:
3265            return this
3266
3267        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3268
3269    def _parse_auto_increment(self) -> exp.Expression:
3270        start = None
3271        increment = None
3272
3273        if self._match(TokenType.L_PAREN, advance=False):
3274            args = self._parse_wrapped_csv(self._parse_bitwise)
3275            start = seq_get(args, 0)
3276            increment = seq_get(args, 1)
3277        elif self._match_text_seq("START"):
3278            start = self._parse_bitwise()
3279            self._match_text_seq("INCREMENT")
3280            increment = self._parse_bitwise()
3281
3282        if start and increment:
3283            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3284
3285        return exp.AutoIncrementColumnConstraint()
3286
3287    def _parse_compress(self) -> exp.Expression:
3288        if self._match(TokenType.L_PAREN, advance=False):
3289            return self.expression(
3290                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3291            )
3292
3293        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3294
3295    def _parse_generated_as_identity(self) -> exp.Expression:
3296        if self._match(TokenType.BY_DEFAULT):
3297            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3298            this = self.expression(
3299                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3300            )
3301        else:
3302            self._match_text_seq("ALWAYS")
3303            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3304
3305        self._match_text_seq("AS", "IDENTITY")
3306        if self._match(TokenType.L_PAREN):
3307            if self._match_text_seq("START", "WITH"):
3308                this.set("start", self._parse_bitwise())
3309            if self._match_text_seq("INCREMENT", "BY"):
3310                this.set("increment", self._parse_bitwise())
3311            if self._match_text_seq("MINVALUE"):
3312                this.set("minvalue", self._parse_bitwise())
3313            if self._match_text_seq("MAXVALUE"):
3314                this.set("maxvalue", self._parse_bitwise())
3315
3316            if self._match_text_seq("CYCLE"):
3317                this.set("cycle", True)
3318            elif self._match_text_seq("NO", "CYCLE"):
3319                this.set("cycle", False)
3320
3321            self._match_r_paren()
3322
3323        return this
3324
3325    def _parse_inline(self) -> t.Optional[exp.Expression]:
3326        self._match_text_seq("LENGTH")
3327        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3328
3329    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3330        if self._match_text_seq("NULL"):
3331            return self.expression(exp.NotNullColumnConstraint)
3332        if self._match_text_seq("CASESPECIFIC"):
3333            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3334        return None
3335
3336    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3337        if self._match(TokenType.CONSTRAINT):
3338            this = self._parse_id_var()
3339        else:
3340            this = None
3341
3342        if self._match_texts(self.CONSTRAINT_PARSERS):
3343            return self.expression(
3344                exp.ColumnConstraint,
3345                this=this,
3346                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3347            )
3348
3349        return this
3350
3351    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3352        if not self._match(TokenType.CONSTRAINT):
3353            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3354
3355        this = self._parse_id_var()
3356        expressions = []
3357
3358        while True:
3359            constraint = self._parse_unnamed_constraint() or self._parse_function()
3360            if not constraint:
3361                break
3362            expressions.append(constraint)
3363
3364        return self.expression(exp.Constraint, this=this, expressions=expressions)
3365
3366    def _parse_unnamed_constraint(
3367        self, constraints: t.Optional[t.Collection[str]] = None
3368    ) -> t.Optional[exp.Expression]:
3369        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3370            return None
3371
3372        constraint = self._prev.text.upper()
3373        if constraint not in self.CONSTRAINT_PARSERS:
3374            self.raise_error(f"No parser found for schema constraint {constraint}.")
3375
3376        return self.CONSTRAINT_PARSERS[constraint](self)
3377
3378    def _parse_unique(self) -> exp.Expression:
3379        if not self._match(TokenType.L_PAREN, advance=False):
3380            return self.expression(exp.UniqueColumnConstraint)
3381        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3382
3383    def _parse_key_constraint_options(self) -> t.List[str]:
3384        options = []
3385        while True:
3386            if not self._curr:
3387                break
3388
3389            if self._match(TokenType.ON):
3390                action = None
3391                on = self._advance_any() and self._prev.text
3392
3393                if self._match(TokenType.NO_ACTION):
3394                    action = "NO ACTION"
3395                elif self._match(TokenType.CASCADE):
3396                    action = "CASCADE"
3397                elif self._match_pair(TokenType.SET, TokenType.NULL):
3398                    action = "SET NULL"
3399                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3400                    action = "SET DEFAULT"
3401                else:
3402                    self.raise_error("Invalid key constraint")
3403
3404                options.append(f"ON {on} {action}")
3405            elif self._match_text_seq("NOT", "ENFORCED"):
3406                options.append("NOT ENFORCED")
3407            elif self._match_text_seq("DEFERRABLE"):
3408                options.append("DEFERRABLE")
3409            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3410                options.append("INITIALLY DEFERRED")
3411            elif self._match_text_seq("NORELY"):
3412                options.append("NORELY")
3413            elif self._match_text_seq("MATCH", "FULL"):
3414                options.append("MATCH FULL")
3415            else:
3416                break
3417
3418        return options
3419
3420    def _parse_references(self, match=True) -> t.Optional[exp.Expression]:
3421        if match and not self._match(TokenType.REFERENCES):
3422            return None
3423
3424        expressions = None
3425        this = self._parse_id_var()
3426
3427        if self._match(TokenType.L_PAREN, advance=False):
3428            expressions = self._parse_wrapped_id_vars()
3429
3430        options = self._parse_key_constraint_options()
3431        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3432
3433    def _parse_foreign_key(self) -> exp.Expression:
3434        expressions = self._parse_wrapped_id_vars()
3435        reference = self._parse_references()
3436        options = {}
3437
3438        while self._match(TokenType.ON):
3439            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3440                self.raise_error("Expected DELETE or UPDATE")
3441
3442            kind = self._prev.text.lower()
3443
3444            if self._match(TokenType.NO_ACTION):
3445                action = "NO ACTION"
3446            elif self._match(TokenType.SET):
3447                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3448                action = "SET " + self._prev.text.upper()
3449            else:
3450                self._advance()
3451                action = self._prev.text.upper()
3452
3453            options[kind] = action
3454
3455        return self.expression(
3456            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3457        )
3458
3459    def _parse_primary_key(self) -> exp.Expression:
3460        desc = (
3461            self._match_set((TokenType.ASC, TokenType.DESC))
3462            and self._prev.token_type == TokenType.DESC
3463        )
3464
3465        if not self._match(TokenType.L_PAREN, advance=False):
3466            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3467
3468        expressions = self._parse_wrapped_csv(self._parse_field)
3469        options = self._parse_key_constraint_options()
3470        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3471
3472    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3473        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3474            return this
3475
3476        bracket_kind = self._prev.token_type
3477        expressions: t.List[t.Optional[exp.Expression]]
3478
3479        if self._match(TokenType.COLON):
3480            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3481        else:
3482            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3483
3484        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3485        if bracket_kind == TokenType.L_BRACE:
3486            this = self.expression(exp.Struct, expressions=expressions)
3487        elif not this or this.name.upper() == "ARRAY":
3488            this = self.expression(exp.Array, expressions=expressions)
3489        else:
3490            expressions = apply_index_offset(this, expressions, -self.index_offset)
3491            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3492
3493        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3494            self.raise_error("Expected ]")
3495        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3496            self.raise_error("Expected }")
3497
3498        self._add_comments(this)
3499        return self._parse_bracket(this)
3500
3501    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3502        if self._match(TokenType.COLON):
3503            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3504        return this
3505
3506    def _parse_case(self) -> t.Optional[exp.Expression]:
3507        ifs = []
3508        default = None
3509
3510        expression = self._parse_conjunction()
3511
3512        while self._match(TokenType.WHEN):
3513            this = self._parse_conjunction()
3514            self._match(TokenType.THEN)
3515            then = self._parse_conjunction()
3516            ifs.append(self.expression(exp.If, this=this, true=then))
3517
3518        if self._match(TokenType.ELSE):
3519            default = self._parse_conjunction()
3520
3521        if not self._match(TokenType.END):
3522            self.raise_error("Expected END after CASE", self._prev)
3523
3524        return self._parse_window(
3525            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3526        )
3527
3528    def _parse_if(self) -> t.Optional[exp.Expression]:
3529        if self._match(TokenType.L_PAREN):
3530            args = self._parse_csv(self._parse_conjunction)
3531            this = exp.If.from_arg_list(args)
3532            self.validate_expression(this, args)
3533            self._match_r_paren()
3534        else:
3535            index = self._index - 1
3536            condition = self._parse_conjunction()
3537
3538            if not condition:
3539                self._retreat(index)
3540                return None
3541
3542            self._match(TokenType.THEN)
3543            true = self._parse_conjunction()
3544            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3545            self._match(TokenType.END)
3546            this = self.expression(exp.If, this=condition, true=true, false=false)
3547
3548        return self._parse_window(this)
3549
3550    def _parse_extract(self) -> exp.Expression:
3551        this = self._parse_function() or self._parse_var() or self._parse_type()
3552
3553        if self._match(TokenType.FROM):
3554            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3555
3556        if not self._match(TokenType.COMMA):
3557            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3558
3559        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3560
3561    def _parse_cast(self, strict: bool) -> exp.Expression:
3562        this = self._parse_conjunction()
3563
3564        if not self._match(TokenType.ALIAS):
3565            if self._match(TokenType.COMMA):
3566                return self.expression(
3567                    exp.CastToStrType, this=this, expression=self._parse_string()
3568                )
3569            else:
3570                self.raise_error("Expected AS after CAST")
3571
3572        to = self._parse_types()
3573
3574        if not to:
3575            self.raise_error("Expected TYPE after CAST")
3576        elif to.this == exp.DataType.Type.CHAR:
3577            if self._match(TokenType.CHARACTER_SET):
3578                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3579
3580        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3581
3582    def _parse_string_agg(self) -> exp.Expression:
3583        expression: t.Optional[exp.Expression]
3584
3585        if self._match(TokenType.DISTINCT):
3586            args = self._parse_csv(self._parse_conjunction)
3587            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3588        else:
3589            args = self._parse_csv(self._parse_conjunction)
3590            expression = seq_get(args, 0)
3591
3592        index = self._index
3593        if not self._match(TokenType.R_PAREN):
3594            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3595            order = self._parse_order(this=expression)
3596            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3597
3598        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3599        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3600        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3601        if not self._match(TokenType.WITHIN_GROUP):
3602            self._retreat(index)
3603            this = exp.GroupConcat.from_arg_list(args)
3604            self.validate_expression(this, args)
3605            return this
3606
3607        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3608        order = self._parse_order(this=expression)
3609        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3610
3611    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3612        to: t.Optional[exp.Expression]
3613        this = self._parse_bitwise()
3614
3615        if self._match(TokenType.USING):
3616            to = self.expression(exp.CharacterSet, this=self._parse_var())
3617        elif self._match(TokenType.COMMA):
3618            to = self._parse_bitwise()
3619        else:
3620            to = None
3621
3622        # Swap the argument order if needed to produce the correct AST
3623        if self.CONVERT_TYPE_FIRST:
3624            this, to = to, this
3625
3626        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3627
3628    def _parse_decode(self) -> t.Optional[exp.Expression]:
3629        """
3630        There are generally two variants of the DECODE function:
3631
3632        - DECODE(bin, charset)
3633        - DECODE(expression, search, result [, search, result] ... [, default])
3634
3635        The second variant will always be parsed into a CASE expression. Note that NULL
3636        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3637        instead of relying on pattern matching.
3638        """
3639        args = self._parse_csv(self._parse_conjunction)
3640
3641        if len(args) < 3:
3642            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3643
3644        expression, *expressions = args
3645        if not expression:
3646            return None
3647
3648        ifs = []
3649        for search, result in zip(expressions[::2], expressions[1::2]):
3650            if not search or not result:
3651                return None
3652
3653            if isinstance(search, exp.Literal):
3654                ifs.append(
3655                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3656                )
3657            elif isinstance(search, exp.Null):
3658                ifs.append(
3659                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3660                )
3661            else:
3662                cond = exp.or_(
3663                    exp.EQ(this=expression.copy(), expression=search),
3664                    exp.and_(
3665                        exp.Is(this=expression.copy(), expression=exp.Null()),
3666                        exp.Is(this=search.copy(), expression=exp.Null()),
3667                        copy=False,
3668                    ),
3669                    copy=False,
3670                )
3671                ifs.append(exp.If(this=cond, true=result))
3672
3673        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3674
3675    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3676        self._match_text_seq("KEY")
3677        key = self._parse_field()
3678        self._match(TokenType.COLON)
3679        self._match_text_seq("VALUE")
3680        value = self._parse_field()
3681        if not key and not value:
3682            return None
3683        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3684
3685    def _parse_json_object(self) -> exp.Expression:
3686        expressions = self._parse_csv(self._parse_json_key_value)
3687
3688        null_handling = None
3689        if self._match_text_seq("NULL", "ON", "NULL"):
3690            null_handling = "NULL ON NULL"
3691        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3692            null_handling = "ABSENT ON NULL"
3693
3694        unique_keys = None
3695        if self._match_text_seq("WITH", "UNIQUE"):
3696            unique_keys = True
3697        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3698            unique_keys = False
3699
3700        self._match_text_seq("KEYS")
3701
3702        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3703        format_json = self._match_text_seq("FORMAT", "JSON")
3704        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3705
3706        return self.expression(
3707            exp.JSONObject,
3708            expressions=expressions,
3709            null_handling=null_handling,
3710            unique_keys=unique_keys,
3711            return_type=return_type,
3712            format_json=format_json,
3713            encoding=encoding,
3714        )
3715
3716    def _parse_logarithm(self) -> exp.Expression:
3717        # Default argument order is base, expression
3718        args = self._parse_csv(self._parse_range)
3719
3720        if len(args) > 1:
3721            if not self.LOG_BASE_FIRST:
3722                args.reverse()
3723            return exp.Log.from_arg_list(args)
3724
3725        return self.expression(
3726            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3727        )
3728
3729    def _parse_match_against(self) -> exp.Expression:
3730        expressions = self._parse_csv(self._parse_column)
3731
3732        self._match_text_seq(")", "AGAINST", "(")
3733
3734        this = self._parse_string()
3735
3736        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3737            modifier = "IN NATURAL LANGUAGE MODE"
3738            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3739                modifier = f"{modifier} WITH QUERY EXPANSION"
3740        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3741            modifier = "IN BOOLEAN MODE"
3742        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3743            modifier = "WITH QUERY EXPANSION"
3744        else:
3745            modifier = None
3746
3747        return self.expression(
3748            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3749        )
3750
3751    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3752    def _parse_open_json(self) -> exp.Expression:
3753        this = self._parse_bitwise()
3754        path = self._match(TokenType.COMMA) and self._parse_string()
3755
3756        def _parse_open_json_column_def() -> exp.Expression:
3757            this = self._parse_field(any_token=True)
3758            kind = self._parse_types()
3759            path = self._parse_string()
3760            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3761            return self.expression(
3762                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3763            )
3764
3765        expressions = None
3766        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3767            self._match_l_paren()
3768            expressions = self._parse_csv(_parse_open_json_column_def)
3769
3770        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3771
3772    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3773        args = self._parse_csv(self._parse_bitwise)
3774
3775        if self._match(TokenType.IN):
3776            return self.expression(
3777                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3778            )
3779
3780        if haystack_first:
3781            haystack = seq_get(args, 0)
3782            needle = seq_get(args, 1)
3783        else:
3784            needle = seq_get(args, 0)
3785            haystack = seq_get(args, 1)
3786
3787        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3788
3789        self.validate_expression(this, args)
3790
3791        return this
3792
3793    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3794        args = self._parse_csv(self._parse_table)
3795        return exp.JoinHint(this=func_name.upper(), expressions=args)
3796
3797    def _parse_substring(self) -> exp.Expression:
3798        # Postgres supports the form: substring(string [from int] [for int])
3799        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3800
3801        args = self._parse_csv(self._parse_bitwise)
3802
3803        if self._match(TokenType.FROM):
3804            args.append(self._parse_bitwise())
3805            if self._match(TokenType.FOR):
3806                args.append(self._parse_bitwise())
3807
3808        this = exp.Substring.from_arg_list(args)
3809        self.validate_expression(this, args)
3810
3811        return this
3812
3813    def _parse_trim(self) -> exp.Expression:
3814        # https://www.w3resource.com/sql/character-functions/trim.php
3815        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3816
3817        position = None
3818        collation = None
3819
3820        if self._match_set(self.TRIM_TYPES):
3821            position = self._prev.text.upper()
3822
3823        expression = self._parse_bitwise()
3824        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3825            this = self._parse_bitwise()
3826        else:
3827            this = expression
3828            expression = None
3829
3830        if self._match(TokenType.COLLATE):
3831            collation = self._parse_bitwise()
3832
3833        return self.expression(
3834            exp.Trim,
3835            this=this,
3836            position=position,
3837            expression=expression,
3838            collation=collation,
3839        )
3840
3841    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3842        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3843
3844    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3845        return self._parse_window(self._parse_id_var(), alias=True)
3846
3847    def _parse_respect_or_ignore_nulls(
3848        self, this: t.Optional[exp.Expression]
3849    ) -> t.Optional[exp.Expression]:
3850        if self._match(TokenType.IGNORE_NULLS):
3851            return self.expression(exp.IgnoreNulls, this=this)
3852        if self._match(TokenType.RESPECT_NULLS):
3853            return self.expression(exp.RespectNulls, this=this)
3854        return this
3855
3856    def _parse_window(
3857        self, this: t.Optional[exp.Expression], alias: bool = False
3858    ) -> t.Optional[exp.Expression]:
3859        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3860            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3861            self._match_r_paren()
3862
3863        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3864        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3865        if self._match(TokenType.WITHIN_GROUP):
3866            order = self._parse_wrapped(self._parse_order)
3867            this = self.expression(exp.WithinGroup, this=this, expression=order)
3868
3869        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3870        # Some dialects choose to implement and some do not.
3871        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3872
3873        # There is some code above in _parse_lambda that handles
3874        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3875
3876        # The below changes handle
3877        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3878
3879        # Oracle allows both formats
3880        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3881        #   and Snowflake chose to do the same for familiarity
3882        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3883        this = self._parse_respect_or_ignore_nulls(this)
3884
3885        # bigquery select from window x AS (partition by ...)
3886        if alias:
3887            over = None
3888            self._match(TokenType.ALIAS)
3889        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3890            return this
3891        else:
3892            over = self._prev.text.upper()
3893
3894        if not self._match(TokenType.L_PAREN):
3895            return self.expression(
3896                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3897            )
3898
3899        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3900
3901        first = self._match(TokenType.FIRST)
3902        if self._match_text_seq("LAST"):
3903            first = False
3904
3905        partition = self._parse_partition_by()
3906        order = self._parse_order()
3907        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3908
3909        if kind:
3910            self._match(TokenType.BETWEEN)
3911            start = self._parse_window_spec()
3912            self._match(TokenType.AND)
3913            end = self._parse_window_spec()
3914
3915            spec = self.expression(
3916                exp.WindowSpec,
3917                kind=kind,
3918                start=start["value"],
3919                start_side=start["side"],
3920                end=end["value"],
3921                end_side=end["side"],
3922            )
3923        else:
3924            spec = None
3925
3926        self._match_r_paren()
3927
3928        return self.expression(
3929            exp.Window,
3930            this=this,
3931            partition_by=partition,
3932            order=order,
3933            spec=spec,
3934            alias=window_alias,
3935            over=over,
3936            first=first,
3937        )
3938
3939    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3940        self._match(TokenType.BETWEEN)
3941
3942        return {
3943            "value": (
3944                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3945            )
3946            or self._parse_bitwise(),
3947            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3948        }
3949
3950    def _parse_alias(
3951        self, this: t.Optional[exp.Expression], explicit: bool = False
3952    ) -> t.Optional[exp.Expression]:
3953        any_token = self._match(TokenType.ALIAS)
3954
3955        if explicit and not any_token:
3956            return this
3957
3958        if self._match(TokenType.L_PAREN):
3959            aliases = self.expression(
3960                exp.Aliases,
3961                this=this,
3962                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3963            )
3964            self._match_r_paren(aliases)
3965            return aliases
3966
3967        alias = self._parse_id_var(any_token)
3968
3969        if alias:
3970            return self.expression(exp.Alias, this=this, alias=alias)
3971
3972        return this
3973
3974    def _parse_id_var(
3975        self,
3976        any_token: bool = True,
3977        tokens: t.Optional[t.Collection[TokenType]] = None,
3978        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3979    ) -> t.Optional[exp.Expression]:
3980        identifier = self._parse_identifier()
3981
3982        if identifier:
3983            return identifier
3984
3985        prefix = ""
3986
3987        if prefix_tokens:
3988            while self._match_set(prefix_tokens):
3989                prefix += self._prev.text
3990
3991        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3992            quoted = self._prev.token_type == TokenType.STRING
3993            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3994
3995        return None
3996
3997    def _parse_string(self) -> t.Optional[exp.Expression]:
3998        if self._match(TokenType.STRING):
3999            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
4000        return self._parse_placeholder()
4001
4002    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
4003        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
4004
4005    def _parse_number(self) -> t.Optional[exp.Expression]:
4006        if self._match(TokenType.NUMBER):
4007            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
4008        return self._parse_placeholder()
4009
4010    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4011        if self._match(TokenType.IDENTIFIER):
4012            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4013        return self._parse_placeholder()
4014
4015    def _parse_var(
4016        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4017    ) -> t.Optional[exp.Expression]:
4018        if (
4019            (any_token and self._advance_any())
4020            or self._match(TokenType.VAR)
4021            or (self._match_set(tokens) if tokens else False)
4022        ):
4023            return self.expression(exp.Var, this=self._prev.text)
4024        return self._parse_placeholder()
4025
4026    def _advance_any(self) -> t.Optional[Token]:
4027        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4028            self._advance()
4029            return self._prev
4030        return None
4031
4032    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4033        return self._parse_var() or self._parse_string()
4034
4035    def _parse_null(self) -> t.Optional[exp.Expression]:
4036        if self._match(TokenType.NULL):
4037            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4038        return None
4039
4040    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4041        if self._match(TokenType.TRUE):
4042            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4043        if self._match(TokenType.FALSE):
4044            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4045        return None
4046
4047    def _parse_star(self) -> t.Optional[exp.Expression]:
4048        if self._match(TokenType.STAR):
4049            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4050        return None
4051
4052    def _parse_parameter(self) -> exp.Expression:
4053        wrapped = self._match(TokenType.L_BRACE)
4054        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4055        self._match(TokenType.R_BRACE)
4056        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4057
4058    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4059        if self._match_set(self.PLACEHOLDER_PARSERS):
4060            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4061            if placeholder:
4062                return placeholder
4063            self._advance(-1)
4064        return None
4065
4066    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4067        if not self._match(TokenType.EXCEPT):
4068            return None
4069        if self._match(TokenType.L_PAREN, advance=False):
4070            return self._parse_wrapped_csv(self._parse_column)
4071        return self._parse_csv(self._parse_column)
4072
4073    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4074        if not self._match(TokenType.REPLACE):
4075            return None
4076        if self._match(TokenType.L_PAREN, advance=False):
4077            return self._parse_wrapped_csv(self._parse_expression)
4078        return self._parse_csv(self._parse_expression)
4079
4080    def _parse_csv(
4081        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4082    ) -> t.List[t.Optional[exp.Expression]]:
4083        parse_result = parse_method()
4084        items = [parse_result] if parse_result is not None else []
4085
4086        while self._match(sep):
4087            self._add_comments(parse_result)
4088            parse_result = parse_method()
4089            if parse_result is not None:
4090                items.append(parse_result)
4091
4092        return items
4093
4094    def _parse_tokens(
4095        self, parse_method: t.Callable, expressions: t.Dict
4096    ) -> t.Optional[exp.Expression]:
4097        this = parse_method()
4098
4099        while self._match_set(expressions):
4100            this = self.expression(
4101                expressions[self._prev.token_type],
4102                this=this,
4103                comments=self._prev_comments,
4104                expression=parse_method(),
4105            )
4106
4107        return this
4108
4109    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4110        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4111
4112    def _parse_wrapped_csv(
4113        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4114    ) -> t.List[t.Optional[exp.Expression]]:
4115        return self._parse_wrapped(
4116            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4117        )
4118
4119    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4120        wrapped = self._match(TokenType.L_PAREN)
4121        if not wrapped and not optional:
4122            self.raise_error("Expecting (")
4123        parse_result = parse_method()
4124        if wrapped:
4125            self._match_r_paren()
4126        return parse_result
4127
4128    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
4129        return self._parse_select() or self._parse_set_operations(self._parse_expression())
4130
4131    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4132        return self._parse_set_operations(
4133            self._parse_select(nested=True, parse_subquery_alias=False)
4134        )
4135
4136    def _parse_transaction(self) -> exp.Expression:
4137        this = None
4138        if self._match_texts(self.TRANSACTION_KIND):
4139            this = self._prev.text
4140
4141        self._match_texts({"TRANSACTION", "WORK"})
4142
4143        modes = []
4144        while True:
4145            mode = []
4146            while self._match(TokenType.VAR):
4147                mode.append(self._prev.text)
4148
4149            if mode:
4150                modes.append(" ".join(mode))
4151            if not self._match(TokenType.COMMA):
4152                break
4153
4154        return self.expression(exp.Transaction, this=this, modes=modes)
4155
4156    def _parse_commit_or_rollback(self) -> exp.Expression:
4157        chain = None
4158        savepoint = None
4159        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4160
4161        self._match_texts({"TRANSACTION", "WORK"})
4162
4163        if self._match_text_seq("TO"):
4164            self._match_text_seq("SAVEPOINT")
4165            savepoint = self._parse_id_var()
4166
4167        if self._match(TokenType.AND):
4168            chain = not self._match_text_seq("NO")
4169            self._match_text_seq("CHAIN")
4170
4171        if is_rollback:
4172            return self.expression(exp.Rollback, savepoint=savepoint)
4173        return self.expression(exp.Commit, chain=chain)
4174
4175    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4176        if not self._match_text_seq("ADD"):
4177            return None
4178
4179        self._match(TokenType.COLUMN)
4180        exists_column = self._parse_exists(not_=True)
4181        expression = self._parse_column_def(self._parse_field(any_token=True))
4182
4183        if expression:
4184            expression.set("exists", exists_column)
4185
4186            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4187            if self._match_texts(("FIRST", "AFTER")):
4188                position = self._prev.text
4189                column_position = self.expression(
4190                    exp.ColumnPosition, this=self._parse_column(), position=position
4191                )
4192                expression.set("position", column_position)
4193
4194        return expression
4195
4196    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4197        drop = self._match(TokenType.DROP) and self._parse_drop()
4198        if drop and not isinstance(drop, exp.Command):
4199            drop.set("kind", drop.args.get("kind", "COLUMN"))
4200        return drop
4201
4202    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4203    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4204        return self.expression(
4205            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4206        )
4207
4208    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4209        this = None
4210        kind = self._prev.token_type
4211
4212        if kind == TokenType.CONSTRAINT:
4213            this = self._parse_id_var()
4214
4215            if self._match_text_seq("CHECK"):
4216                expression = self._parse_wrapped(self._parse_conjunction)
4217                enforced = self._match_text_seq("ENFORCED")
4218
4219                return self.expression(
4220                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4221                )
4222
4223        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4224            expression = self._parse_foreign_key()
4225        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4226            expression = self._parse_primary_key()
4227        else:
4228            expression = None
4229
4230        return self.expression(exp.AddConstraint, this=this, expression=expression)
4231
4232    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4233        index = self._index - 1
4234
4235        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4236            return self._parse_csv(self._parse_add_constraint)
4237
4238        self._retreat(index)
4239        return self._parse_csv(self._parse_add_column)
4240
4241    def _parse_alter_table_alter(self) -> exp.Expression:
4242        self._match(TokenType.COLUMN)
4243        column = self._parse_field(any_token=True)
4244
4245        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4246            return self.expression(exp.AlterColumn, this=column, drop=True)
4247        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4248            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4249
4250        self._match_text_seq("SET", "DATA")
4251        return self.expression(
4252            exp.AlterColumn,
4253            this=column,
4254            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4255            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4256            using=self._match(TokenType.USING) and self._parse_conjunction(),
4257        )
4258
4259    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4260        index = self._index - 1
4261
4262        partition_exists = self._parse_exists()
4263        if self._match(TokenType.PARTITION, advance=False):
4264            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4265
4266        self._retreat(index)
4267        return self._parse_csv(self._parse_drop_column)
4268
4269    def _parse_alter_table_rename(self) -> exp.Expression:
4270        self._match_text_seq("TO")
4271        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4272
4273    def _parse_alter(self) -> t.Optional[exp.Expression]:
4274        start = self._prev
4275
4276        if not self._match(TokenType.TABLE):
4277            return self._parse_as_command(start)
4278
4279        exists = self._parse_exists()
4280        this = self._parse_table(schema=True)
4281
4282        if self._next:
4283            self._advance()
4284        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4285
4286        if parser:
4287            actions = ensure_list(parser(self))
4288
4289            if not self._curr:
4290                return self.expression(
4291                    exp.AlterTable,
4292                    this=this,
4293                    exists=exists,
4294                    actions=actions,
4295                )
4296        return self._parse_as_command(start)
4297
4298    def _parse_merge(self) -> exp.Expression:
4299        self._match(TokenType.INTO)
4300        target = self._parse_table()
4301
4302        self._match(TokenType.USING)
4303        using = self._parse_table()
4304
4305        self._match(TokenType.ON)
4306        on = self._parse_conjunction()
4307
4308        whens = []
4309        while self._match(TokenType.WHEN):
4310            matched = not self._match(TokenType.NOT)
4311            self._match_text_seq("MATCHED")
4312            source = (
4313                False
4314                if self._match_text_seq("BY", "TARGET")
4315                else self._match_text_seq("BY", "SOURCE")
4316            )
4317            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4318
4319            self._match(TokenType.THEN)
4320
4321            if self._match(TokenType.INSERT):
4322                _this = self._parse_star()
4323                if _this:
4324                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4325                else:
4326                    then = self.expression(
4327                        exp.Insert,
4328                        this=self._parse_value(),
4329                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4330                    )
4331            elif self._match(TokenType.UPDATE):
4332                expressions = self._parse_star()
4333                if expressions:
4334                    then = self.expression(exp.Update, expressions=expressions)
4335                else:
4336                    then = self.expression(
4337                        exp.Update,
4338                        expressions=self._match(TokenType.SET)
4339                        and self._parse_csv(self._parse_equality),
4340                    )
4341            elif self._match(TokenType.DELETE):
4342                then = self.expression(exp.Var, this=self._prev.text)
4343            else:
4344                then = None
4345
4346            whens.append(
4347                self.expression(
4348                    exp.When,
4349                    matched=matched,
4350                    source=source,
4351                    condition=condition,
4352                    then=then,
4353                )
4354            )
4355
4356        return self.expression(
4357            exp.Merge,
4358            this=target,
4359            using=using,
4360            on=on,
4361            expressions=whens,
4362        )
4363
4364    def _parse_show(self) -> t.Optional[exp.Expression]:
4365        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4366        if parser:
4367            return parser(self)
4368        self._advance()
4369        return self.expression(exp.Show, this=self._prev.text.upper())
4370
4371    def _parse_set_item_assignment(
4372        self, kind: t.Optional[str] = None
4373    ) -> t.Optional[exp.Expression]:
4374        index = self._index
4375
4376        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4377            return self._parse_set_transaction(global_=kind == "GLOBAL")
4378
4379        left = self._parse_primary() or self._parse_id_var()
4380
4381        if not self._match_texts(("=", "TO")):
4382            self._retreat(index)
4383            return None
4384
4385        right = self._parse_statement() or self._parse_id_var()
4386        this = self.expression(
4387            exp.EQ,
4388            this=left,
4389            expression=right,
4390        )
4391
4392        return self.expression(
4393            exp.SetItem,
4394            this=this,
4395            kind=kind,
4396        )
4397
4398    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4399        self._match_text_seq("TRANSACTION")
4400        characteristics = self._parse_csv(
4401            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4402        )
4403        return self.expression(
4404            exp.SetItem,
4405            expressions=characteristics,
4406            kind="TRANSACTION",
4407            **{"global": global_},  # type: ignore
4408        )
4409
4410    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4411        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4412        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4413
4414    def _parse_set(self) -> exp.Expression:
4415        index = self._index
4416        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4417
4418        if self._curr:
4419            self._retreat(index)
4420            return self._parse_as_command(self._prev)
4421
4422        return set_
4423
4424    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4425        for option in options:
4426            if self._match_text_seq(*option.split(" ")):
4427                return exp.Var(this=option)
4428        return None
4429
4430    def _parse_as_command(self, start: Token) -> exp.Command:
4431        while self._curr:
4432            self._advance()
4433        text = self._find_sql(start, self._prev)
4434        size = len(start.text)
4435        return exp.Command(this=text[:size], expression=text[size:])
4436
4437    def _find_parser(
4438        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4439    ) -> t.Optional[t.Callable]:
4440        if not self._curr:
4441            return None
4442
4443        index = self._index
4444        this = []
4445        while True:
4446            # The current token might be multiple words
4447            curr = self._curr.text.upper()
4448            key = curr.split(" ")
4449            this.append(curr)
4450            self._advance()
4451            result, trie = in_trie(trie, key)
4452            if result == 0:
4453                break
4454            if result == 2:
4455                subparser = parsers[" ".join(this)]
4456                return subparser
4457        self._retreat(index)
4458        return None
4459
4460    def _match(self, token_type, advance=True, expression=None):
4461        if not self._curr:
4462            return None
4463
4464        if self._curr.token_type == token_type:
4465            if advance:
4466                self._advance()
4467            self._add_comments(expression)
4468            return True
4469
4470        return None
4471
4472    def _match_set(self, types, advance=True):
4473        if not self._curr:
4474            return None
4475
4476        if self._curr.token_type in types:
4477            if advance:
4478                self._advance()
4479            return True
4480
4481        return None
4482
4483    def _match_pair(self, token_type_a, token_type_b, advance=True):
4484        if not self._curr or not self._next:
4485            return None
4486
4487        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4488            if advance:
4489                self._advance(2)
4490            return True
4491
4492        return None
4493
4494    def _match_l_paren(self, expression=None):
4495        if not self._match(TokenType.L_PAREN, expression=expression):
4496            self.raise_error("Expecting (")
4497
4498    def _match_r_paren(self, expression=None):
4499        if not self._match(TokenType.R_PAREN, expression=expression):
4500            self.raise_error("Expecting )")
4501
4502    def _match_texts(self, texts, advance=True):
4503        if self._curr and self._curr.text.upper() in texts:
4504            if advance:
4505                self._advance()
4506            return True
4507        return False
4508
4509    def _match_text_seq(self, *texts, advance=True):
4510        index = self._index
4511        for text in texts:
4512            if self._curr and self._curr.text.upper() == text:
4513                self._advance()
4514            else:
4515                self._retreat(index)
4516                return False
4517
4518        if not advance:
4519            self._retreat(index)
4520
4521        return True
4522
4523    def _replace_columns_with_dots(self, this):
4524        if isinstance(this, exp.Dot):
4525            exp.replace_children(this, self._replace_columns_with_dots)
4526        elif isinstance(this, exp.Column):
4527            exp.replace_children(this, self._replace_columns_with_dots)
4528            table = this.args.get("table")
4529            this = (
4530                self.expression(exp.Dot, this=table, expression=this.this)
4531                if table
4532                else self.expression(exp.Var, this=this.name)
4533            )
4534        elif isinstance(this, exp.Identifier):
4535            this = self.expression(exp.Var, this=this.name)
4536        return this
4537
4538    def _replace_lambda(self, node, lambda_variables):
4539        for column in node.find_all(exp.Column):
4540            if column.parts[0].name in lambda_variables:
4541                dot_or_id = column.to_dot() if column.table else column.this
4542                parent = column.parent
4543
4544                while isinstance(parent, exp.Dot):
4545                    if not isinstance(parent.parent, exp.Dot):
4546                        parent.replace(dot_or_id)
4547                        break
4548                    parent = parent.parent
4549                else:
4550                    if column is node:
4551                        node = dot_or_id
4552                    else:
4553                        column.replace(dot_or_id)
4554        return node
def parse_var_map(args: Sequence) -> sqlglot.expressions.Expression:
19def parse_var_map(args: t.Sequence) -> exp.Expression:
20    if len(args) == 1 and args[0].is_star:
21        return exp.StarMap(this=args[0])
22
23    keys = []
24    values = []
25    for i in range(0, len(args), 2):
26        keys.append(args[i])
27        values.append(args[i + 1])
28    return exp.VarMap(
29        keys=exp.Array(expressions=keys),
30        values=exp.Array(expressions=values),
31    )
def parse_like(args):
34def parse_like(args):
35    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
36    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
def binary_range_parser( expr_type: Type[sqlglot.expressions.Expression]) -> Callable[[sqlglot.parser.Parser, Optional[sqlglot.expressions.Expression]], Optional[sqlglot.expressions.Expression]]:
39def binary_range_parser(
40    expr_type: t.Type[exp.Expression],
41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
42    return lambda self, this: self._parse_escape(
43        self.expression(expr_type, this=this, expression=self._parse_bitwise())
44    )
class Parser:
  56class Parser(metaclass=_Parser):
  57    """
  58    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  59    a parsed syntax tree.
  60
  61    Args:
  62        error_level: the desired error level.
  63            Default: ErrorLevel.RAISE
  64        error_message_context: determines the amount of context to capture from a
  65            query string when displaying the error message (in number of characters).
  66            Default: 50.
  67        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  68            Default: 0
  69        alias_post_tablesample: If the table alias comes after tablesample.
  70            Default: False
  71        max_errors: Maximum number of error messages to include in a raised ParseError.
  72            This is only relevant if error_level is ErrorLevel.RAISE.
  73            Default: 3
  74        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  75            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  76            Default: "nulls_are_small"
  77    """
  78
  79    FUNCTIONS: t.Dict[str, t.Callable] = {
  80        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  81        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  82            this=seq_get(args, 0),
  83            to=exp.DataType(this=exp.DataType.Type.TEXT),
  84        ),
  85        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  86        "IFNULL": exp.Coalesce.from_arg_list,
  87        "LIKE": parse_like,
  88        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  89            this=seq_get(args, 0),
  90            to=exp.DataType(this=exp.DataType.Type.TEXT),
  91        ),
  92        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  93            this=exp.Cast(
  94                this=seq_get(args, 0),
  95                to=exp.DataType(this=exp.DataType.Type.TEXT),
  96            ),
  97            start=exp.Literal.number(1),
  98            length=exp.Literal.number(10),
  99        ),
 100        "VAR_MAP": parse_var_map,
 101    }
 102
 103    NO_PAREN_FUNCTIONS = {
 104        TokenType.CURRENT_DATE: exp.CurrentDate,
 105        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 106        TokenType.CURRENT_TIME: exp.CurrentTime,
 107        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 108        TokenType.CURRENT_USER: exp.CurrentUser,
 109    }
 110
 111    JOIN_HINTS: t.Set[str] = set()
 112
 113    NESTED_TYPE_TOKENS = {
 114        TokenType.ARRAY,
 115        TokenType.MAP,
 116        TokenType.NULLABLE,
 117        TokenType.STRUCT,
 118    }
 119
 120    TYPE_TOKENS = {
 121        TokenType.BIT,
 122        TokenType.BOOLEAN,
 123        TokenType.TINYINT,
 124        TokenType.UTINYINT,
 125        TokenType.SMALLINT,
 126        TokenType.USMALLINT,
 127        TokenType.INT,
 128        TokenType.UINT,
 129        TokenType.BIGINT,
 130        TokenType.UBIGINT,
 131        TokenType.INT128,
 132        TokenType.UINT128,
 133        TokenType.INT256,
 134        TokenType.UINT256,
 135        TokenType.FLOAT,
 136        TokenType.DOUBLE,
 137        TokenType.CHAR,
 138        TokenType.NCHAR,
 139        TokenType.VARCHAR,
 140        TokenType.NVARCHAR,
 141        TokenType.TEXT,
 142        TokenType.MEDIUMTEXT,
 143        TokenType.LONGTEXT,
 144        TokenType.MEDIUMBLOB,
 145        TokenType.LONGBLOB,
 146        TokenType.BINARY,
 147        TokenType.VARBINARY,
 148        TokenType.JSON,
 149        TokenType.JSONB,
 150        TokenType.INTERVAL,
 151        TokenType.TIME,
 152        TokenType.TIMESTAMP,
 153        TokenType.TIMESTAMPTZ,
 154        TokenType.TIMESTAMPLTZ,
 155        TokenType.DATETIME,
 156        TokenType.DATETIME64,
 157        TokenType.DATE,
 158        TokenType.DECIMAL,
 159        TokenType.BIGDECIMAL,
 160        TokenType.UUID,
 161        TokenType.GEOGRAPHY,
 162        TokenType.GEOMETRY,
 163        TokenType.HLLSKETCH,
 164        TokenType.HSTORE,
 165        TokenType.PSEUDO_TYPE,
 166        TokenType.SUPER,
 167        TokenType.SERIAL,
 168        TokenType.SMALLSERIAL,
 169        TokenType.BIGSERIAL,
 170        TokenType.XML,
 171        TokenType.UNIQUEIDENTIFIER,
 172        TokenType.MONEY,
 173        TokenType.SMALLMONEY,
 174        TokenType.ROWVERSION,
 175        TokenType.IMAGE,
 176        TokenType.VARIANT,
 177        TokenType.OBJECT,
 178        TokenType.INET,
 179        *NESTED_TYPE_TOKENS,
 180    }
 181
 182    SUBQUERY_PREDICATES = {
 183        TokenType.ANY: exp.Any,
 184        TokenType.ALL: exp.All,
 185        TokenType.EXISTS: exp.Exists,
 186        TokenType.SOME: exp.Any,
 187    }
 188
 189    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 190
 191    DB_CREATABLES = {
 192        TokenType.DATABASE,
 193        TokenType.SCHEMA,
 194        TokenType.TABLE,
 195        TokenType.VIEW,
 196    }
 197
 198    CREATABLES = {
 199        TokenType.COLUMN,
 200        TokenType.FUNCTION,
 201        TokenType.INDEX,
 202        TokenType.PROCEDURE,
 203        *DB_CREATABLES,
 204    }
 205
 206    ID_VAR_TOKENS = {
 207        TokenType.VAR,
 208        TokenType.ANTI,
 209        TokenType.APPLY,
 210        TokenType.ASC,
 211        TokenType.AUTO_INCREMENT,
 212        TokenType.BEGIN,
 213        TokenType.BOTH,
 214        TokenType.BUCKET,
 215        TokenType.CACHE,
 216        TokenType.CASCADE,
 217        TokenType.COLLATE,
 218        TokenType.COMMAND,
 219        TokenType.COMMENT,
 220        TokenType.COMMIT,
 221        TokenType.COMPOUND,
 222        TokenType.CONSTRAINT,
 223        TokenType.DEFAULT,
 224        TokenType.DELETE,
 225        TokenType.DESC,
 226        TokenType.DESCRIBE,
 227        TokenType.DIV,
 228        TokenType.END,
 229        TokenType.EXECUTE,
 230        TokenType.ESCAPE,
 231        TokenType.FALSE,
 232        TokenType.FIRST,
 233        TokenType.FILTER,
 234        TokenType.FOLLOWING,
 235        TokenType.FORMAT,
 236        TokenType.FULL,
 237        TokenType.IF,
 238        TokenType.IS,
 239        TokenType.ISNULL,
 240        TokenType.INTERVAL,
 241        TokenType.KEEP,
 242        TokenType.LAZY,
 243        TokenType.LEADING,
 244        TokenType.LEFT,
 245        TokenType.LOCAL,
 246        TokenType.MATERIALIZED,
 247        TokenType.MERGE,
 248        TokenType.NATURAL,
 249        TokenType.NEXT,
 250        TokenType.OFFSET,
 251        TokenType.ONLY,
 252        TokenType.OPTIONS,
 253        TokenType.ORDINALITY,
 254        TokenType.OVERWRITE,
 255        TokenType.PARTITION,
 256        TokenType.PERCENT,
 257        TokenType.PIVOT,
 258        TokenType.PRAGMA,
 259        TokenType.PRECEDING,
 260        TokenType.RANGE,
 261        TokenType.REFERENCES,
 262        TokenType.RIGHT,
 263        TokenType.ROW,
 264        TokenType.ROWS,
 265        TokenType.SEED,
 266        TokenType.SEMI,
 267        TokenType.SET,
 268        TokenType.SETTINGS,
 269        TokenType.SHOW,
 270        TokenType.SORTKEY,
 271        TokenType.TEMPORARY,
 272        TokenType.TOP,
 273        TokenType.TRAILING,
 274        TokenType.TRUE,
 275        TokenType.UNBOUNDED,
 276        TokenType.UNIQUE,
 277        TokenType.UNLOGGED,
 278        TokenType.UNPIVOT,
 279        TokenType.VOLATILE,
 280        TokenType.WINDOW,
 281        *CREATABLES,
 282        *SUBQUERY_PREDICATES,
 283        *TYPE_TOKENS,
 284        *NO_PAREN_FUNCTIONS,
 285    }
 286
 287    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 288
 289    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 290        TokenType.APPLY,
 291        TokenType.FULL,
 292        TokenType.LEFT,
 293        TokenType.LOCK,
 294        TokenType.NATURAL,
 295        TokenType.OFFSET,
 296        TokenType.RIGHT,
 297        TokenType.WINDOW,
 298    }
 299
 300    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 301
 302    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 303
 304    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 305
 306    FUNC_TOKENS = {
 307        TokenType.COMMAND,
 308        TokenType.CURRENT_DATE,
 309        TokenType.CURRENT_DATETIME,
 310        TokenType.CURRENT_TIMESTAMP,
 311        TokenType.CURRENT_TIME,
 312        TokenType.CURRENT_USER,
 313        TokenType.FILTER,
 314        TokenType.FIRST,
 315        TokenType.FORMAT,
 316        TokenType.GLOB,
 317        TokenType.IDENTIFIER,
 318        TokenType.INDEX,
 319        TokenType.ISNULL,
 320        TokenType.ILIKE,
 321        TokenType.LIKE,
 322        TokenType.MERGE,
 323        TokenType.OFFSET,
 324        TokenType.PRIMARY_KEY,
 325        TokenType.RANGE,
 326        TokenType.REPLACE,
 327        TokenType.ROW,
 328        TokenType.UNNEST,
 329        TokenType.VAR,
 330        TokenType.LEFT,
 331        TokenType.RIGHT,
 332        TokenType.DATE,
 333        TokenType.DATETIME,
 334        TokenType.TABLE,
 335        TokenType.TIMESTAMP,
 336        TokenType.TIMESTAMPTZ,
 337        TokenType.WINDOW,
 338        *TYPE_TOKENS,
 339        *SUBQUERY_PREDICATES,
 340    }
 341
 342    CONJUNCTION = {
 343        TokenType.AND: exp.And,
 344        TokenType.OR: exp.Or,
 345    }
 346
 347    EQUALITY = {
 348        TokenType.EQ: exp.EQ,
 349        TokenType.NEQ: exp.NEQ,
 350        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 351    }
 352
 353    COMPARISON = {
 354        TokenType.GT: exp.GT,
 355        TokenType.GTE: exp.GTE,
 356        TokenType.LT: exp.LT,
 357        TokenType.LTE: exp.LTE,
 358    }
 359
 360    BITWISE = {
 361        TokenType.AMP: exp.BitwiseAnd,
 362        TokenType.CARET: exp.BitwiseXor,
 363        TokenType.PIPE: exp.BitwiseOr,
 364        TokenType.DPIPE: exp.DPipe,
 365    }
 366
 367    TERM = {
 368        TokenType.DASH: exp.Sub,
 369        TokenType.PLUS: exp.Add,
 370        TokenType.MOD: exp.Mod,
 371        TokenType.COLLATE: exp.Collate,
 372    }
 373
 374    FACTOR = {
 375        TokenType.DIV: exp.IntDiv,
 376        TokenType.LR_ARROW: exp.Distance,
 377        TokenType.SLASH: exp.Div,
 378        TokenType.STAR: exp.Mul,
 379    }
 380
 381    TIMESTAMPS = {
 382        TokenType.TIME,
 383        TokenType.TIMESTAMP,
 384        TokenType.TIMESTAMPTZ,
 385        TokenType.TIMESTAMPLTZ,
 386    }
 387
 388    SET_OPERATIONS = {
 389        TokenType.UNION,
 390        TokenType.INTERSECT,
 391        TokenType.EXCEPT,
 392    }
 393
 394    JOIN_SIDES = {
 395        TokenType.LEFT,
 396        TokenType.RIGHT,
 397        TokenType.FULL,
 398    }
 399
 400    JOIN_KINDS = {
 401        TokenType.INNER,
 402        TokenType.OUTER,
 403        TokenType.CROSS,
 404        TokenType.SEMI,
 405        TokenType.ANTI,
 406    }
 407
 408    LAMBDAS = {
 409        TokenType.ARROW: lambda self, expressions: self.expression(
 410            exp.Lambda,
 411            this=self._replace_lambda(
 412                self._parse_conjunction(),
 413                {node.name for node in expressions},
 414            ),
 415            expressions=expressions,
 416        ),
 417        TokenType.FARROW: lambda self, expressions: self.expression(
 418            exp.Kwarg,
 419            this=exp.Var(this=expressions[0].name),
 420            expression=self._parse_conjunction(),
 421        ),
 422    }
 423
 424    COLUMN_OPERATORS = {
 425        TokenType.DOT: None,
 426        TokenType.DCOLON: lambda self, this, to: self.expression(
 427            exp.Cast if self.STRICT_CAST else exp.TryCast,
 428            this=this,
 429            to=to,
 430        ),
 431        TokenType.ARROW: lambda self, this, path: self.expression(
 432            exp.JSONExtract,
 433            this=this,
 434            expression=path,
 435        ),
 436        TokenType.DARROW: lambda self, this, path: self.expression(
 437            exp.JSONExtractScalar,
 438            this=this,
 439            expression=path,
 440        ),
 441        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 442            exp.JSONBExtract,
 443            this=this,
 444            expression=path,
 445        ),
 446        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 447            exp.JSONBExtractScalar,
 448            this=this,
 449            expression=path,
 450        ),
 451        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 452            exp.JSONBContains,
 453            this=this,
 454            expression=key,
 455        ),
 456    }
 457
 458    EXPRESSION_PARSERS = {
 459        exp.Column: lambda self: self._parse_column(),
 460        exp.DataType: lambda self: self._parse_types(),
 461        exp.From: lambda self: self._parse_from(),
 462        exp.Group: lambda self: self._parse_group(),
 463        exp.Identifier: lambda self: self._parse_id_var(),
 464        exp.Lateral: lambda self: self._parse_lateral(),
 465        exp.Join: lambda self: self._parse_join(),
 466        exp.Order: lambda self: self._parse_order(),
 467        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 468        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 469        exp.Lambda: lambda self: self._parse_lambda(),
 470        exp.Limit: lambda self: self._parse_limit(),
 471        exp.Offset: lambda self: self._parse_offset(),
 472        exp.TableAlias: lambda self: self._parse_table_alias(),
 473        exp.Table: lambda self: self._parse_table(),
 474        exp.Condition: lambda self: self._parse_conjunction(),
 475        exp.Expression: lambda self: self._parse_statement(),
 476        exp.Properties: lambda self: self._parse_properties(),
 477        exp.Where: lambda self: self._parse_where(),
 478        exp.Ordered: lambda self: self._parse_ordered(),
 479        exp.Having: lambda self: self._parse_having(),
 480        exp.With: lambda self: self._parse_with(),
 481        exp.Window: lambda self: self._parse_named_window(),
 482        exp.Qualify: lambda self: self._parse_qualify(),
 483        exp.Returning: lambda self: self._parse_returning(),
 484        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 485    }
 486
 487    STATEMENT_PARSERS = {
 488        TokenType.ALTER: lambda self: self._parse_alter(),
 489        TokenType.BEGIN: lambda self: self._parse_transaction(),
 490        TokenType.CACHE: lambda self: self._parse_cache(),
 491        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 492        TokenType.COMMENT: lambda self: self._parse_comment(),
 493        TokenType.CREATE: lambda self: self._parse_create(),
 494        TokenType.DELETE: lambda self: self._parse_delete(),
 495        TokenType.DESC: lambda self: self._parse_describe(),
 496        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 497        TokenType.DROP: lambda self: self._parse_drop(),
 498        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 499        TokenType.INSERT: lambda self: self._parse_insert(),
 500        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 501        TokenType.MERGE: lambda self: self._parse_merge(),
 502        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 503        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 504        TokenType.SET: lambda self: self._parse_set(),
 505        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 506        TokenType.UPDATE: lambda self: self._parse_update(),
 507        TokenType.USE: lambda self: self.expression(
 508            exp.Use,
 509            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 510            and exp.Var(this=self._prev.text),
 511            this=self._parse_table(schema=False),
 512        ),
 513    }
 514
 515    UNARY_PARSERS = {
 516        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 517        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 518        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 519        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 520    }
 521
 522    PRIMARY_PARSERS = {
 523        TokenType.STRING: lambda self, token: self.expression(
 524            exp.Literal, this=token.text, is_string=True
 525        ),
 526        TokenType.NUMBER: lambda self, token: self.expression(
 527            exp.Literal, this=token.text, is_string=False
 528        ),
 529        TokenType.STAR: lambda self, _: self.expression(
 530            exp.Star,
 531            **{"except": self._parse_except(), "replace": self._parse_replace()},
 532        ),
 533        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 534        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 535        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 536        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 537        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 538        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 539        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 540        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 541        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 542    }
 543
 544    PLACEHOLDER_PARSERS = {
 545        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 546        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 547        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 548        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 549        else None,
 550    }
 551
 552    RANGE_PARSERS = {
 553        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 554        TokenType.GLOB: binary_range_parser(exp.Glob),
 555        TokenType.ILIKE: binary_range_parser(exp.ILike),
 556        TokenType.IN: lambda self, this: self._parse_in(this),
 557        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 558        TokenType.IS: lambda self, this: self._parse_is(this),
 559        TokenType.LIKE: binary_range_parser(exp.Like),
 560        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 561        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 562        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 563    }
 564
 565    PROPERTY_PARSERS = {
 566        "AFTER": lambda self: self._parse_afterjournal(
 567            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 568        ),
 569        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 570        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 571        "BEFORE": lambda self: self._parse_journal(
 572            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 573        ),
 574        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 575        "CHARACTER SET": lambda self: self._parse_character_set(),
 576        "CHECKSUM": lambda self: self._parse_checksum(),
 577        "CLUSTER BY": lambda self: self.expression(
 578            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 579        ),
 580        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 581        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 582        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 583            default=self._prev.text.upper() == "DEFAULT"
 584        ),
 585        "DEFINER": lambda self: self._parse_definer(),
 586        "DETERMINISTIC": lambda self: self.expression(
 587            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 588        ),
 589        "DISTKEY": lambda self: self._parse_distkey(),
 590        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 591        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 592        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 593        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 594        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 595        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 596        "FREESPACE": lambda self: self._parse_freespace(),
 597        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 598        "IMMUTABLE": lambda self: self.expression(
 599            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 600        ),
 601        "JOURNAL": lambda self: self._parse_journal(
 602            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 603        ),
 604        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 605        "LIKE": lambda self: self._parse_create_like(),
 606        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 607        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 608        "LOCK": lambda self: self._parse_locking(),
 609        "LOCKING": lambda self: self._parse_locking(),
 610        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 611        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 612        "MAX": lambda self: self._parse_datablocksize(),
 613        "MAXIMUM": lambda self: self._parse_datablocksize(),
 614        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 615            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 616        ),
 617        "MIN": lambda self: self._parse_datablocksize(),
 618        "MINIMUM": lambda self: self._parse_datablocksize(),
 619        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 620        "NO": lambda self: self._parse_noprimaryindex(),
 621        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 622        "ON": lambda self: self._parse_oncommit(),
 623        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 624        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 625        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 626        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 627        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 628        "RETURNS": lambda self: self._parse_returns(),
 629        "ROW": lambda self: self._parse_row(),
 630        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 631        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 632        "SETTINGS": lambda self: self.expression(
 633            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 634        ),
 635        "SORTKEY": lambda self: self._parse_sortkey(),
 636        "STABLE": lambda self: self.expression(
 637            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 638        ),
 639        "STORED": lambda self: self._parse_stored(),
 640        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 641        "TEMP": lambda self: self._parse_temporary(global_=False),
 642        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 643        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 644        "TTL": lambda self: self._parse_ttl(),
 645        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 646        "VOLATILE": lambda self: self._parse_volatile_property(),
 647        "WITH": lambda self: self._parse_with_property(),
 648    }
 649
 650    CONSTRAINT_PARSERS = {
 651        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 652        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 653        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 654        "CHARACTER SET": lambda self: self.expression(
 655            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 656        ),
 657        "CHECK": lambda self: self.expression(
 658            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 659        ),
 660        "COLLATE": lambda self: self.expression(
 661            exp.CollateColumnConstraint, this=self._parse_var()
 662        ),
 663        "COMMENT": lambda self: self.expression(
 664            exp.CommentColumnConstraint, this=self._parse_string()
 665        ),
 666        "COMPRESS": lambda self: self._parse_compress(),
 667        "DEFAULT": lambda self: self.expression(
 668            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 669        ),
 670        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 671        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 672        "FORMAT": lambda self: self.expression(
 673            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 674        ),
 675        "GENERATED": lambda self: self._parse_generated_as_identity(),
 676        "IDENTITY": lambda self: self._parse_auto_increment(),
 677        "INLINE": lambda self: self._parse_inline(),
 678        "LIKE": lambda self: self._parse_create_like(),
 679        "NOT": lambda self: self._parse_not_constraint(),
 680        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 681        "ON": lambda self: self._match(TokenType.UPDATE)
 682        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 683        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 684        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 685        "REFERENCES": lambda self: self._parse_references(match=False),
 686        "TITLE": lambda self: self.expression(
 687            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 688        ),
 689        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 690        "UNIQUE": lambda self: self._parse_unique(),
 691        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 692    }
 693
 694    ALTER_PARSERS = {
 695        "ADD": lambda self: self._parse_alter_table_add(),
 696        "ALTER": lambda self: self._parse_alter_table_alter(),
 697        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 698        "DROP": lambda self: self._parse_alter_table_drop(),
 699        "RENAME": lambda self: self._parse_alter_table_rename(),
 700    }
 701
 702    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 703
 704    NO_PAREN_FUNCTION_PARSERS = {
 705        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 706        TokenType.CASE: lambda self: self._parse_case(),
 707        TokenType.IF: lambda self: self._parse_if(),
 708        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 709            exp.NextValueFor,
 710            this=self._parse_column(),
 711            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 712        ),
 713    }
 714
 715    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 716        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 717        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 718        "DECODE": lambda self: self._parse_decode(),
 719        "EXTRACT": lambda self: self._parse_extract(),
 720        "JSON_OBJECT": lambda self: self._parse_json_object(),
 721        "LOG": lambda self: self._parse_logarithm(),
 722        "MATCH": lambda self: self._parse_match_against(),
 723        "OPENJSON": lambda self: self._parse_open_json(),
 724        "POSITION": lambda self: self._parse_position(),
 725        "STRING_AGG": lambda self: self._parse_string_agg(),
 726        "SUBSTRING": lambda self: self._parse_substring(),
 727        "TRIM": lambda self: self._parse_trim(),
 728        "TRY_CAST": lambda self: self._parse_cast(False),
 729        "TRY_CONVERT": lambda self: self._parse_convert(False),
 730    }
 731
 732    QUERY_MODIFIER_PARSERS = {
 733        "joins": lambda self: list(iter(self._parse_join, None)),
 734        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 735        "match": lambda self: self._parse_match_recognize(),
 736        "where": lambda self: self._parse_where(),
 737        "group": lambda self: self._parse_group(),
 738        "having": lambda self: self._parse_having(),
 739        "qualify": lambda self: self._parse_qualify(),
 740        "windows": lambda self: self._parse_window_clause(),
 741        "order": lambda self: self._parse_order(),
 742        "limit": lambda self: self._parse_limit(),
 743        "offset": lambda self: self._parse_offset(),
 744        "locks": lambda self: self._parse_locks(),
 745        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 746    }
 747
 748    SET_PARSERS = {
 749        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 750        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 751        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 752        "TRANSACTION": lambda self: self._parse_set_transaction(),
 753    }
 754
 755    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 756
 757    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 758
 759    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 760
 761    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 762
 763    TRANSACTION_CHARACTERISTICS = {
 764        "ISOLATION LEVEL REPEATABLE READ",
 765        "ISOLATION LEVEL READ COMMITTED",
 766        "ISOLATION LEVEL READ UNCOMMITTED",
 767        "ISOLATION LEVEL SERIALIZABLE",
 768        "READ WRITE",
 769        "READ ONLY",
 770    }
 771
 772    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 773
 774    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 775
 776    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 777    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 778
 779    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 780
 781    STRICT_CAST = True
 782
 783    CONVERT_TYPE_FIRST = False
 784
 785    PREFIXED_PIVOT_COLUMNS = False
 786    IDENTIFY_PIVOT_STRINGS = False
 787
 788    LOG_BASE_FIRST = True
 789    LOG_DEFAULTS_TO_LN = False
 790
 791    __slots__ = (
 792        "error_level",
 793        "error_message_context",
 794        "sql",
 795        "errors",
 796        "index_offset",
 797        "unnest_column_only",
 798        "alias_post_tablesample",
 799        "max_errors",
 800        "null_ordering",
 801        "_tokens",
 802        "_index",
 803        "_curr",
 804        "_next",
 805        "_prev",
 806        "_prev_comments",
 807        "_show_trie",
 808        "_set_trie",
 809    )
 810
 811    def __init__(
 812        self,
 813        error_level: t.Optional[ErrorLevel] = None,
 814        error_message_context: int = 100,
 815        index_offset: int = 0,
 816        unnest_column_only: bool = False,
 817        alias_post_tablesample: bool = False,
 818        max_errors: int = 3,
 819        null_ordering: t.Optional[str] = None,
 820    ):
 821        self.error_level = error_level or ErrorLevel.IMMEDIATE
 822        self.error_message_context = error_message_context
 823        self.index_offset = index_offset
 824        self.unnest_column_only = unnest_column_only
 825        self.alias_post_tablesample = alias_post_tablesample
 826        self.max_errors = max_errors
 827        self.null_ordering = null_ordering
 828        self.reset()
 829
 830    def reset(self):
 831        self.sql = ""
 832        self.errors = []
 833        self._tokens = []
 834        self._index = 0
 835        self._curr = None
 836        self._next = None
 837        self._prev = None
 838        self._prev_comments = None
 839
 840    def parse(
 841        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 842    ) -> t.List[t.Optional[exp.Expression]]:
 843        """
 844        Parses a list of tokens and returns a list of syntax trees, one tree
 845        per parsed SQL statement.
 846
 847        Args:
 848            raw_tokens: the list of tokens.
 849            sql: the original SQL string, used to produce helpful debug messages.
 850
 851        Returns:
 852            The list of syntax trees.
 853        """
 854        return self._parse(
 855            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 856        )
 857
 858    def parse_into(
 859        self,
 860        expression_types: exp.IntoType,
 861        raw_tokens: t.List[Token],
 862        sql: t.Optional[str] = None,
 863    ) -> t.List[t.Optional[exp.Expression]]:
 864        """
 865        Parses a list of tokens into a given Expression type. If a collection of Expression
 866        types is given instead, this method will try to parse the token list into each one
 867        of them, stopping at the first for which the parsing succeeds.
 868
 869        Args:
 870            expression_types: the expression type(s) to try and parse the token list into.
 871            raw_tokens: the list of tokens.
 872            sql: the original SQL string, used to produce helpful debug messages.
 873
 874        Returns:
 875            The target Expression.
 876        """
 877        errors = []
 878        for expression_type in ensure_collection(expression_types):
 879            parser = self.EXPRESSION_PARSERS.get(expression_type)
 880            if not parser:
 881                raise TypeError(f"No parser registered for {expression_type}")
 882            try:
 883                return self._parse(parser, raw_tokens, sql)
 884            except ParseError as e:
 885                e.errors[0]["into_expression"] = expression_type
 886                errors.append(e)
 887        raise ParseError(
 888            f"Failed to parse into {expression_types}",
 889            errors=merge_errors(errors),
 890        ) from errors[-1]
 891
 892    def _parse(
 893        self,
 894        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 895        raw_tokens: t.List[Token],
 896        sql: t.Optional[str] = None,
 897    ) -> t.List[t.Optional[exp.Expression]]:
 898        self.reset()
 899        self.sql = sql or ""
 900        total = len(raw_tokens)
 901        chunks: t.List[t.List[Token]] = [[]]
 902
 903        for i, token in enumerate(raw_tokens):
 904            if token.token_type == TokenType.SEMICOLON:
 905                if i < total - 1:
 906                    chunks.append([])
 907            else:
 908                chunks[-1].append(token)
 909
 910        expressions = []
 911
 912        for tokens in chunks:
 913            self._index = -1
 914            self._tokens = tokens
 915            self._advance()
 916
 917            expressions.append(parse_method(self))
 918
 919            if self._index < len(self._tokens):
 920                self.raise_error("Invalid expression / Unexpected token")
 921
 922            self.check_errors()
 923
 924        return expressions
 925
 926    def check_errors(self) -> None:
 927        """
 928        Logs or raises any found errors, depending on the chosen error level setting.
 929        """
 930        if self.error_level == ErrorLevel.WARN:
 931            for error in self.errors:
 932                logger.error(str(error))
 933        elif self.error_level == ErrorLevel.RAISE and self.errors:
 934            raise ParseError(
 935                concat_messages(self.errors, self.max_errors),
 936                errors=merge_errors(self.errors),
 937            )
 938
 939    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 940        """
 941        Appends an error in the list of recorded errors or raises it, depending on the chosen
 942        error level setting.
 943        """
 944        token = token or self._curr or self._prev or Token.string("")
 945        start = token.start
 946        end = token.end + 1
 947        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 948        highlight = self.sql[start:end]
 949        end_context = self.sql[end : end + self.error_message_context]
 950
 951        error = ParseError.new(
 952            f"{message}. Line {token.line}, Col: {token.col}.\n"
 953            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 954            description=message,
 955            line=token.line,
 956            col=token.col,
 957            start_context=start_context,
 958            highlight=highlight,
 959            end_context=end_context,
 960        )
 961
 962        if self.error_level == ErrorLevel.IMMEDIATE:
 963            raise error
 964
 965        self.errors.append(error)
 966
 967    def expression(
 968        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 969    ) -> E:
 970        """
 971        Creates a new, validated Expression.
 972
 973        Args:
 974            exp_class: the expression class to instantiate.
 975            comments: an optional list of comments to attach to the expression.
 976            kwargs: the arguments to set for the expression along with their respective values.
 977
 978        Returns:
 979            The target expression.
 980        """
 981        instance = exp_class(**kwargs)
 982        instance.add_comments(comments) if comments else self._add_comments(instance)
 983        self.validate_expression(instance)
 984        return instance
 985
 986    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 987        if expression and self._prev_comments:
 988            expression.add_comments(self._prev_comments)
 989            self._prev_comments = None
 990
 991    def validate_expression(
 992        self, expression: exp.Expression, args: t.Optional[t.List] = None
 993    ) -> None:
 994        """
 995        Validates an already instantiated expression, making sure that all its mandatory arguments
 996        are set.
 997
 998        Args:
 999            expression: the expression to validate.
1000            args: an optional list of items that was used to instantiate the expression, if it's a Func.
1001        """
1002        if self.error_level == ErrorLevel.IGNORE:
1003            return
1004
1005        for error_message in expression.error_messages(args):
1006            self.raise_error(error_message)
1007
1008    def _find_sql(self, start: Token, end: Token) -> str:
1009        return self.sql[start.start : end.end + 1]
1010
1011    def _advance(self, times: int = 1) -> None:
1012        self._index += times
1013        self._curr = seq_get(self._tokens, self._index)
1014        self._next = seq_get(self._tokens, self._index + 1)
1015        if self._index > 0:
1016            self._prev = self._tokens[self._index - 1]
1017            self._prev_comments = self._prev.comments
1018        else:
1019            self._prev = None
1020            self._prev_comments = None
1021
1022    def _retreat(self, index: int) -> None:
1023        if index != self._index:
1024            self._advance(index - self._index)
1025
1026    def _parse_command(self) -> exp.Command:
1027        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1028
1029    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1030        start = self._prev
1031        exists = self._parse_exists() if allow_exists else None
1032
1033        self._match(TokenType.ON)
1034
1035        kind = self._match_set(self.CREATABLES) and self._prev
1036
1037        if not kind:
1038            return self._parse_as_command(start)
1039
1040        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1041            this = self._parse_user_defined_function(kind=kind.token_type)
1042        elif kind.token_type == TokenType.TABLE:
1043            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1044        elif kind.token_type == TokenType.COLUMN:
1045            this = self._parse_column()
1046        else:
1047            this = self._parse_id_var()
1048
1049        self._match(TokenType.IS)
1050
1051        return self.expression(
1052            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1053        )
1054
1055    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1056    def _parse_ttl(self) -> exp.Expression:
1057        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1058            this = self._parse_bitwise()
1059
1060            if self._match_text_seq("DELETE"):
1061                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1062            if self._match_text_seq("RECOMPRESS"):
1063                return self.expression(
1064                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1065                )
1066            if self._match_text_seq("TO", "DISK"):
1067                return self.expression(
1068                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1069                )
1070            if self._match_text_seq("TO", "VOLUME"):
1071                return self.expression(
1072                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1073                )
1074
1075            return this
1076
1077        expressions = self._parse_csv(_parse_ttl_action)
1078        where = self._parse_where()
1079        group = self._parse_group()
1080
1081        aggregates = None
1082        if group and self._match(TokenType.SET):
1083            aggregates = self._parse_csv(self._parse_set_item)
1084
1085        return self.expression(
1086            exp.MergeTreeTTL,
1087            expressions=expressions,
1088            where=where,
1089            group=group,
1090            aggregates=aggregates,
1091        )
1092
1093    def _parse_statement(self) -> t.Optional[exp.Expression]:
1094        if self._curr is None:
1095            return None
1096
1097        if self._match_set(self.STATEMENT_PARSERS):
1098            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1099
1100        if self._match_set(Tokenizer.COMMANDS):
1101            return self._parse_command()
1102
1103        expression = self._parse_expression()
1104        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1105        return self._parse_query_modifiers(expression)
1106
1107    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1108        start = self._prev
1109        temporary = self._match(TokenType.TEMPORARY)
1110        materialized = self._match(TokenType.MATERIALIZED)
1111        kind = self._match_set(self.CREATABLES) and self._prev.text
1112        if not kind:
1113            return self._parse_as_command(start)
1114
1115        return self.expression(
1116            exp.Drop,
1117            exists=self._parse_exists(),
1118            this=self._parse_table(schema=True),
1119            kind=kind,
1120            temporary=temporary,
1121            materialized=materialized,
1122            cascade=self._match(TokenType.CASCADE),
1123            constraints=self._match_text_seq("CONSTRAINTS"),
1124            purge=self._match_text_seq("PURGE"),
1125        )
1126
1127    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1128        return (
1129            self._match(TokenType.IF)
1130            and (not not_ or self._match(TokenType.NOT))
1131            and self._match(TokenType.EXISTS)
1132        )
1133
1134    def _parse_create(self) -> t.Optional[exp.Expression]:
1135        start = self._prev
1136        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1137            TokenType.OR, TokenType.REPLACE
1138        )
1139        unique = self._match(TokenType.UNIQUE)
1140
1141        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1142            self._match(TokenType.TABLE)
1143
1144        properties = None
1145        create_token = self._match_set(self.CREATABLES) and self._prev
1146
1147        if not create_token:
1148            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1149            create_token = self._match_set(self.CREATABLES) and self._prev
1150
1151            if not properties or not create_token:
1152                return self._parse_as_command(start)
1153
1154        exists = self._parse_exists(not_=True)
1155        this = None
1156        expression = None
1157        indexes = None
1158        no_schema_binding = None
1159        begin = None
1160        clone = None
1161
1162        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1163            this = self._parse_user_defined_function(kind=create_token.token_type)
1164            temp_properties = self._parse_properties()
1165            if properties and temp_properties:
1166                properties.expressions.extend(temp_properties.expressions)
1167            elif temp_properties:
1168                properties = temp_properties
1169
1170            self._match(TokenType.ALIAS)
1171            begin = self._match(TokenType.BEGIN)
1172            return_ = self._match_text_seq("RETURN")
1173            expression = self._parse_statement()
1174
1175            if return_:
1176                expression = self.expression(exp.Return, this=expression)
1177        elif create_token.token_type == TokenType.INDEX:
1178            this = self._parse_index()
1179        elif create_token.token_type in self.DB_CREATABLES:
1180            table_parts = self._parse_table_parts(schema=True)
1181
1182            # exp.Properties.Location.POST_NAME
1183            if self._match(TokenType.COMMA):
1184                temp_properties = self._parse_properties(before=True)
1185                if properties and temp_properties:
1186                    properties.expressions.extend(temp_properties.expressions)
1187                elif temp_properties:
1188                    properties = temp_properties
1189
1190            this = self._parse_schema(this=table_parts)
1191
1192            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1193            temp_properties = self._parse_properties()
1194            if properties and temp_properties:
1195                properties.expressions.extend(temp_properties.expressions)
1196            elif temp_properties:
1197                properties = temp_properties
1198
1199            self._match(TokenType.ALIAS)
1200
1201            # exp.Properties.Location.POST_ALIAS
1202            if not (
1203                self._match(TokenType.SELECT, advance=False)
1204                or self._match(TokenType.WITH, advance=False)
1205                or self._match(TokenType.L_PAREN, advance=False)
1206            ):
1207                temp_properties = self._parse_properties()
1208                if properties and temp_properties:
1209                    properties.expressions.extend(temp_properties.expressions)
1210                elif temp_properties:
1211                    properties = temp_properties
1212
1213            expression = self._parse_ddl_select()
1214
1215            if create_token.token_type == TokenType.TABLE:
1216                indexes = []
1217                while True:
1218                    index = self._parse_create_table_index()
1219
1220                    # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX
1221                    temp_properties = self._parse_properties()
1222                    if properties and temp_properties:
1223                        properties.expressions.extend(temp_properties.expressions)
1224                    elif temp_properties:
1225                        properties = temp_properties
1226
1227                    if not index:
1228                        break
1229                    else:
1230                        self._match(TokenType.COMMA)
1231                        indexes.append(index)
1232            elif create_token.token_type == TokenType.VIEW:
1233                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1234                    no_schema_binding = True
1235
1236            if self._match_text_seq("CLONE"):
1237                clone = self._parse_table(schema=True)
1238                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1239                clone_kind = (
1240                    self._match(TokenType.L_PAREN)
1241                    and self._match_texts(self.CLONE_KINDS)
1242                    and self._prev.text.upper()
1243                )
1244                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1245                self._match(TokenType.R_PAREN)
1246                clone = self.expression(
1247                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1248                )
1249
1250        return self.expression(
1251            exp.Create,
1252            this=this,
1253            kind=create_token.text,
1254            replace=replace,
1255            unique=unique,
1256            expression=expression,
1257            exists=exists,
1258            properties=properties,
1259            indexes=indexes,
1260            no_schema_binding=no_schema_binding,
1261            begin=begin,
1262            clone=clone,
1263        )
1264
1265    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1266        self._match(TokenType.COMMA)
1267
1268        # parsers look to _prev for no/dual/default, so need to consume first
1269        self._match_text_seq("NO")
1270        self._match_text_seq("DUAL")
1271        self._match_text_seq("DEFAULT")
1272
1273        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1274            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1275
1276        return None
1277
1278    def _parse_property(self) -> t.Optional[exp.Expression]:
1279        if self._match_texts(self.PROPERTY_PARSERS):
1280            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1281
1282        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1283            return self._parse_character_set(default=True)
1284
1285        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1286            return self._parse_sortkey(compound=True)
1287
1288        if self._match_text_seq("SQL", "SECURITY"):
1289            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1290
1291        assignment = self._match_pair(
1292            TokenType.VAR, TokenType.EQ, advance=False
1293        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1294
1295        if assignment:
1296            key = self._parse_var_or_string()
1297            self._match(TokenType.EQ)
1298            return self.expression(exp.Property, this=key, value=self._parse_column())
1299
1300        return None
1301
1302    def _parse_stored(self) -> exp.Expression:
1303        self._match(TokenType.ALIAS)
1304
1305        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1306        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1307
1308        return self.expression(
1309            exp.FileFormatProperty,
1310            this=self.expression(
1311                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1312            )
1313            if input_format or output_format
1314            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1315        )
1316
1317    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1318        self._match(TokenType.EQ)
1319        self._match(TokenType.ALIAS)
1320        return self.expression(exp_class, this=self._parse_field())
1321
1322    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1323        properties = []
1324
1325        while True:
1326            if before:
1327                identified_property = self._parse_property_before()
1328            else:
1329                identified_property = self._parse_property()
1330
1331            if not identified_property:
1332                break
1333            for p in ensure_list(identified_property):
1334                properties.append(p)
1335
1336        if properties:
1337            return self.expression(exp.Properties, expressions=properties)
1338
1339        return None
1340
1341    def _parse_fallback(self, no=False) -> exp.Expression:
1342        self._match_text_seq("FALLBACK")
1343        return self.expression(
1344            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1345        )
1346
1347    def _parse_volatile_property(self) -> exp.Expression:
1348        if self._index >= 2:
1349            pre_volatile_token = self._tokens[self._index - 2]
1350        else:
1351            pre_volatile_token = None
1352
1353        if pre_volatile_token and pre_volatile_token.token_type in (
1354            TokenType.CREATE,
1355            TokenType.REPLACE,
1356            TokenType.UNIQUE,
1357        ):
1358            return exp.VolatileProperty()
1359
1360        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1361
1362    def _parse_with_property(
1363        self,
1364    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1365        self._match(TokenType.WITH)
1366        if self._match(TokenType.L_PAREN, advance=False):
1367            return self._parse_wrapped_csv(self._parse_property)
1368
1369        if self._match_text_seq("JOURNAL"):
1370            return self._parse_withjournaltable()
1371
1372        if self._match_text_seq("DATA"):
1373            return self._parse_withdata(no=False)
1374        elif self._match_text_seq("NO", "DATA"):
1375            return self._parse_withdata(no=True)
1376
1377        if not self._next:
1378            return None
1379
1380        return self._parse_withisolatedloading()
1381
1382    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1383    def _parse_definer(self) -> t.Optional[exp.Expression]:
1384        self._match(TokenType.EQ)
1385
1386        user = self._parse_id_var()
1387        self._match(TokenType.PARAMETER)
1388        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1389
1390        if not user or not host:
1391            return None
1392
1393        return exp.DefinerProperty(this=f"{user}@{host}")
1394
1395    def _parse_withjournaltable(self) -> exp.Expression:
1396        self._match(TokenType.TABLE)
1397        self._match(TokenType.EQ)
1398        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1399
1400    def _parse_log(self, no=False) -> exp.Expression:
1401        self._match_text_seq("LOG")
1402        return self.expression(exp.LogProperty, no=no)
1403
1404    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1405        before = self._match_text_seq("BEFORE")
1406        self._match_text_seq("JOURNAL")
1407        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1408
1409    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1410        self._match_text_seq("NOT")
1411        self._match_text_seq("LOCAL")
1412        self._match_text_seq("AFTER", "JOURNAL")
1413        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1414
1415    def _parse_checksum(self) -> exp.Expression:
1416        self._match_text_seq("CHECKSUM")
1417        self._match(TokenType.EQ)
1418
1419        on = None
1420        if self._match(TokenType.ON):
1421            on = True
1422        elif self._match_text_seq("OFF"):
1423            on = False
1424        default = self._match(TokenType.DEFAULT)
1425
1426        return self.expression(
1427            exp.ChecksumProperty,
1428            on=on,
1429            default=default,
1430        )
1431
1432    def _parse_freespace(self) -> exp.Expression:
1433        self._match_text_seq("FREESPACE")
1434        self._match(TokenType.EQ)
1435        return self.expression(
1436            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1437        )
1438
1439    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1440        self._match_text_seq("MERGEBLOCKRATIO")
1441        if self._match(TokenType.EQ):
1442            return self.expression(
1443                exp.MergeBlockRatioProperty,
1444                this=self._parse_number(),
1445                percent=self._match(TokenType.PERCENT),
1446            )
1447        else:
1448            return self.expression(
1449                exp.MergeBlockRatioProperty,
1450                no=no,
1451                default=default,
1452            )
1453
1454    def _parse_datablocksize(self, default=None) -> exp.Expression:
1455        if default:
1456            self._match_text_seq("DATABLOCKSIZE")
1457            return self.expression(exp.DataBlocksizeProperty, default=True)
1458        elif self._match_texts(("MIN", "MINIMUM")):
1459            self._match_text_seq("DATABLOCKSIZE")
1460            return self.expression(exp.DataBlocksizeProperty, min=True)
1461        elif self._match_texts(("MAX", "MAXIMUM")):
1462            self._match_text_seq("DATABLOCKSIZE")
1463            return self.expression(exp.DataBlocksizeProperty, min=False)
1464
1465        self._match_text_seq("DATABLOCKSIZE")
1466        self._match(TokenType.EQ)
1467        size = self._parse_number()
1468        units = None
1469        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1470            units = self._prev.text
1471        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1472
1473    def _parse_blockcompression(self) -> exp.Expression:
1474        self._match_text_seq("BLOCKCOMPRESSION")
1475        self._match(TokenType.EQ)
1476        always = self._match_text_seq("ALWAYS")
1477        manual = self._match_text_seq("MANUAL")
1478        never = self._match_text_seq("NEVER")
1479        default = self._match_text_seq("DEFAULT")
1480        autotemp = None
1481        if self._match_text_seq("AUTOTEMP"):
1482            autotemp = self._parse_schema()
1483
1484        return self.expression(
1485            exp.BlockCompressionProperty,
1486            always=always,
1487            manual=manual,
1488            never=never,
1489            default=default,
1490            autotemp=autotemp,
1491        )
1492
1493    def _parse_withisolatedloading(self) -> exp.Expression:
1494        no = self._match_text_seq("NO")
1495        concurrent = self._match_text_seq("CONCURRENT")
1496        self._match_text_seq("ISOLATED", "LOADING")
1497        for_all = self._match_text_seq("FOR", "ALL")
1498        for_insert = self._match_text_seq("FOR", "INSERT")
1499        for_none = self._match_text_seq("FOR", "NONE")
1500        return self.expression(
1501            exp.IsolatedLoadingProperty,
1502            no=no,
1503            concurrent=concurrent,
1504            for_all=for_all,
1505            for_insert=for_insert,
1506            for_none=for_none,
1507        )
1508
1509    def _parse_locking(self) -> exp.Expression:
1510        if self._match(TokenType.TABLE):
1511            kind = "TABLE"
1512        elif self._match(TokenType.VIEW):
1513            kind = "VIEW"
1514        elif self._match(TokenType.ROW):
1515            kind = "ROW"
1516        elif self._match_text_seq("DATABASE"):
1517            kind = "DATABASE"
1518        else:
1519            kind = None
1520
1521        if kind in ("DATABASE", "TABLE", "VIEW"):
1522            this = self._parse_table_parts()
1523        else:
1524            this = None
1525
1526        if self._match(TokenType.FOR):
1527            for_or_in = "FOR"
1528        elif self._match(TokenType.IN):
1529            for_or_in = "IN"
1530        else:
1531            for_or_in = None
1532
1533        if self._match_text_seq("ACCESS"):
1534            lock_type = "ACCESS"
1535        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1536            lock_type = "EXCLUSIVE"
1537        elif self._match_text_seq("SHARE"):
1538            lock_type = "SHARE"
1539        elif self._match_text_seq("READ"):
1540            lock_type = "READ"
1541        elif self._match_text_seq("WRITE"):
1542            lock_type = "WRITE"
1543        elif self._match_text_seq("CHECKSUM"):
1544            lock_type = "CHECKSUM"
1545        else:
1546            lock_type = None
1547
1548        override = self._match_text_seq("OVERRIDE")
1549
1550        return self.expression(
1551            exp.LockingProperty,
1552            this=this,
1553            kind=kind,
1554            for_or_in=for_or_in,
1555            lock_type=lock_type,
1556            override=override,
1557        )
1558
1559    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1560        if self._match(TokenType.PARTITION_BY):
1561            return self._parse_csv(self._parse_conjunction)
1562        return []
1563
1564    def _parse_partitioned_by(self) -> exp.Expression:
1565        self._match(TokenType.EQ)
1566        return self.expression(
1567            exp.PartitionedByProperty,
1568            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1569        )
1570
1571    def _parse_withdata(self, no=False) -> exp.Expression:
1572        if self._match_text_seq("AND", "STATISTICS"):
1573            statistics = True
1574        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1575            statistics = False
1576        else:
1577            statistics = None
1578
1579        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1580
1581    def _parse_noprimaryindex(self) -> exp.Expression:
1582        self._match_text_seq("PRIMARY", "INDEX")
1583        return exp.NoPrimaryIndexProperty()
1584
1585    def _parse_oncommit(self) -> exp.Expression:
1586        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1587            return exp.OnCommitProperty()
1588        return exp.OnCommitProperty(delete=self._match_text_seq("COMMIT", "DELETE", "ROWS"))
1589
1590    def _parse_distkey(self) -> exp.Expression:
1591        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1592
1593    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1594        table = self._parse_table(schema=True)
1595        options = []
1596        while self._match_texts(("INCLUDING", "EXCLUDING")):
1597            this = self._prev.text.upper()
1598            id_var = self._parse_id_var()
1599
1600            if not id_var:
1601                return None
1602
1603            options.append(
1604                self.expression(
1605                    exp.Property,
1606                    this=this,
1607                    value=exp.Var(this=id_var.this.upper()),
1608                )
1609            )
1610        return self.expression(exp.LikeProperty, this=table, expressions=options)
1611
1612    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1613        return self.expression(
1614            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1615        )
1616
1617    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1618        self._match(TokenType.EQ)
1619        return self.expression(
1620            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1621        )
1622
1623    def _parse_returns(self) -> exp.Expression:
1624        value: t.Optional[exp.Expression]
1625        is_table = self._match(TokenType.TABLE)
1626
1627        if is_table:
1628            if self._match(TokenType.LT):
1629                value = self.expression(
1630                    exp.Schema,
1631                    this="TABLE",
1632                    expressions=self._parse_csv(self._parse_struct_types),
1633                )
1634                if not self._match(TokenType.GT):
1635                    self.raise_error("Expecting >")
1636            else:
1637                value = self._parse_schema(exp.Var(this="TABLE"))
1638        else:
1639            value = self._parse_types()
1640
1641        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1642
1643    def _parse_temporary(self, global_=False) -> exp.Expression:
1644        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1645        return self.expression(exp.TemporaryProperty, global_=global_)
1646
1647    def _parse_describe(self) -> exp.Expression:
1648        kind = self._match_set(self.CREATABLES) and self._prev.text
1649        this = self._parse_table()
1650
1651        return self.expression(exp.Describe, this=this, kind=kind)
1652
1653    def _parse_insert(self) -> exp.Expression:
1654        overwrite = self._match(TokenType.OVERWRITE)
1655        local = self._match(TokenType.LOCAL)
1656        alternative = None
1657
1658        if self._match_text_seq("DIRECTORY"):
1659            this: t.Optional[exp.Expression] = self.expression(
1660                exp.Directory,
1661                this=self._parse_var_or_string(),
1662                local=local,
1663                row_format=self._parse_row_format(match_row=True),
1664            )
1665        else:
1666            if self._match(TokenType.OR):
1667                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1668
1669            self._match(TokenType.INTO)
1670            self._match(TokenType.TABLE)
1671            this = self._parse_table(schema=True)
1672
1673        return self.expression(
1674            exp.Insert,
1675            this=this,
1676            exists=self._parse_exists(),
1677            partition=self._parse_partition(),
1678            expression=self._parse_ddl_select(),
1679            conflict=self._parse_on_conflict(),
1680            returning=self._parse_returning(),
1681            overwrite=overwrite,
1682            alternative=alternative,
1683        )
1684
1685    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1686        conflict = self._match_text_seq("ON", "CONFLICT")
1687        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1688
1689        if not (conflict or duplicate):
1690            return None
1691
1692        nothing = None
1693        expressions = None
1694        key = None
1695        constraint = None
1696
1697        if conflict:
1698            if self._match_text_seq("ON", "CONSTRAINT"):
1699                constraint = self._parse_id_var()
1700            else:
1701                key = self._parse_csv(self._parse_value)
1702
1703        self._match_text_seq("DO")
1704        if self._match_text_seq("NOTHING"):
1705            nothing = True
1706        else:
1707            self._match(TokenType.UPDATE)
1708            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1709
1710        return self.expression(
1711            exp.OnConflict,
1712            duplicate=duplicate,
1713            expressions=expressions,
1714            nothing=nothing,
1715            key=key,
1716            constraint=constraint,
1717        )
1718
1719    def _parse_returning(self) -> t.Optional[exp.Expression]:
1720        if not self._match(TokenType.RETURNING):
1721            return None
1722
1723        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1724
1725    def _parse_row(self) -> t.Optional[exp.Expression]:
1726        if not self._match(TokenType.FORMAT):
1727            return None
1728        return self._parse_row_format()
1729
1730    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1731        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1732            return None
1733
1734        if self._match_text_seq("SERDE"):
1735            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1736
1737        self._match_text_seq("DELIMITED")
1738
1739        kwargs = {}
1740
1741        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1742            kwargs["fields"] = self._parse_string()
1743            if self._match_text_seq("ESCAPED", "BY"):
1744                kwargs["escaped"] = self._parse_string()
1745        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1746            kwargs["collection_items"] = self._parse_string()
1747        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1748            kwargs["map_keys"] = self._parse_string()
1749        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1750            kwargs["lines"] = self._parse_string()
1751        if self._match_text_seq("NULL", "DEFINED", "AS"):
1752            kwargs["null"] = self._parse_string()
1753
1754        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1755
1756    def _parse_load_data(self) -> exp.Expression:
1757        local = self._match(TokenType.LOCAL)
1758        self._match_text_seq("INPATH")
1759        inpath = self._parse_string()
1760        overwrite = self._match(TokenType.OVERWRITE)
1761        self._match_pair(TokenType.INTO, TokenType.TABLE)
1762
1763        return self.expression(
1764            exp.LoadData,
1765            this=self._parse_table(schema=True),
1766            local=local,
1767            overwrite=overwrite,
1768            inpath=inpath,
1769            partition=self._parse_partition(),
1770            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1771            serde=self._match_text_seq("SERDE") and self._parse_string(),
1772        )
1773
1774    def _parse_delete(self) -> exp.Expression:
1775        self._match(TokenType.FROM)
1776
1777        return self.expression(
1778            exp.Delete,
1779            this=self._parse_table(),
1780            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1781            where=self._parse_where(),
1782            returning=self._parse_returning(),
1783        )
1784
1785    def _parse_update(self) -> exp.Expression:
1786        return self.expression(
1787            exp.Update,
1788            **{  # type: ignore
1789                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1790                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1791                "from": self._parse_from(modifiers=True),
1792                "where": self._parse_where(),
1793                "returning": self._parse_returning(),
1794            },
1795        )
1796
1797    def _parse_uncache(self) -> exp.Expression:
1798        if not self._match(TokenType.TABLE):
1799            self.raise_error("Expecting TABLE after UNCACHE")
1800
1801        return self.expression(
1802            exp.Uncache,
1803            exists=self._parse_exists(),
1804            this=self._parse_table(schema=True),
1805        )
1806
1807    def _parse_cache(self) -> exp.Expression:
1808        lazy = self._match(TokenType.LAZY)
1809        self._match(TokenType.TABLE)
1810        table = self._parse_table(schema=True)
1811        options = []
1812
1813        if self._match(TokenType.OPTIONS):
1814            self._match_l_paren()
1815            k = self._parse_string()
1816            self._match(TokenType.EQ)
1817            v = self._parse_string()
1818            options = [k, v]
1819            self._match_r_paren()
1820
1821        self._match(TokenType.ALIAS)
1822        return self.expression(
1823            exp.Cache,
1824            this=table,
1825            lazy=lazy,
1826            options=options,
1827            expression=self._parse_select(nested=True),
1828        )
1829
1830    def _parse_partition(self) -> t.Optional[exp.Expression]:
1831        if not self._match(TokenType.PARTITION):
1832            return None
1833
1834        return self.expression(
1835            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1836        )
1837
1838    def _parse_value(self) -> exp.Expression:
1839        if self._match(TokenType.L_PAREN):
1840            expressions = self._parse_csv(self._parse_conjunction)
1841            self._match_r_paren()
1842            return self.expression(exp.Tuple, expressions=expressions)
1843
1844        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1845        # Source: https://prestodb.io/docs/current/sql/values.html
1846        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1847
1848    def _parse_select(
1849        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1850    ) -> t.Optional[exp.Expression]:
1851        cte = self._parse_with()
1852        if cte:
1853            this = self._parse_statement()
1854
1855            if not this:
1856                self.raise_error("Failed to parse any statement following CTE")
1857                return cte
1858
1859            if "with" in this.arg_types:
1860                this.set("with", cte)
1861            else:
1862                self.raise_error(f"{this.key} does not support CTE")
1863                this = cte
1864        elif self._match(TokenType.SELECT):
1865            comments = self._prev_comments
1866
1867            hint = self._parse_hint()
1868            all_ = self._match(TokenType.ALL)
1869            distinct = self._match(TokenType.DISTINCT)
1870
1871            kind = (
1872                self._match(TokenType.ALIAS)
1873                and self._match_texts(("STRUCT", "VALUE"))
1874                and self._prev.text
1875            )
1876
1877            if distinct:
1878                distinct = self.expression(
1879                    exp.Distinct,
1880                    on=self._parse_value() if self._match(TokenType.ON) else None,
1881                )
1882
1883            if all_ and distinct:
1884                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1885
1886            limit = self._parse_limit(top=True)
1887            expressions = self._parse_csv(self._parse_expression)
1888
1889            this = self.expression(
1890                exp.Select,
1891                kind=kind,
1892                hint=hint,
1893                distinct=distinct,
1894                expressions=expressions,
1895                limit=limit,
1896            )
1897            this.comments = comments
1898
1899            into = self._parse_into()
1900            if into:
1901                this.set("into", into)
1902
1903            from_ = self._parse_from()
1904            if from_:
1905                this.set("from", from_)
1906
1907            this = self._parse_query_modifiers(this)
1908        elif (table or nested) and self._match(TokenType.L_PAREN):
1909            this = self._parse_table() if table else self._parse_select(nested=True)
1910            this = self._parse_set_operations(self._parse_query_modifiers(this))
1911            self._match_r_paren()
1912
1913            # early return so that subquery unions aren't parsed again
1914            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1915            # Union ALL should be a property of the top select node, not the subquery
1916            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1917        elif self._match(TokenType.VALUES):
1918            this = self.expression(
1919                exp.Values,
1920                expressions=self._parse_csv(self._parse_value),
1921                alias=self._parse_table_alias(),
1922            )
1923        else:
1924            this = None
1925
1926        return self._parse_set_operations(this)
1927
1928    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1929        if not skip_with_token and not self._match(TokenType.WITH):
1930            return None
1931
1932        comments = self._prev_comments
1933        recursive = self._match(TokenType.RECURSIVE)
1934
1935        expressions = []
1936        while True:
1937            expressions.append(self._parse_cte())
1938
1939            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1940                break
1941            else:
1942                self._match(TokenType.WITH)
1943
1944        return self.expression(
1945            exp.With, comments=comments, expressions=expressions, recursive=recursive
1946        )
1947
1948    def _parse_cte(self) -> exp.Expression:
1949        alias = self._parse_table_alias()
1950        if not alias or not alias.this:
1951            self.raise_error("Expected CTE to have alias")
1952
1953        self._match(TokenType.ALIAS)
1954
1955        return self.expression(
1956            exp.CTE,
1957            this=self._parse_wrapped(self._parse_statement),
1958            alias=alias,
1959        )
1960
1961    def _parse_table_alias(
1962        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1963    ) -> t.Optional[exp.Expression]:
1964        any_token = self._match(TokenType.ALIAS)
1965        alias = (
1966            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1967            or self._parse_string_as_identifier()
1968        )
1969
1970        index = self._index
1971        if self._match(TokenType.L_PAREN):
1972            columns = self._parse_csv(self._parse_function_parameter)
1973            self._match_r_paren() if columns else self._retreat(index)
1974        else:
1975            columns = None
1976
1977        if not alias and not columns:
1978            return None
1979
1980        return self.expression(exp.TableAlias, this=alias, columns=columns)
1981
1982    def _parse_subquery(
1983        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1984    ) -> exp.Expression:
1985        return self.expression(
1986            exp.Subquery,
1987            this=this,
1988            pivots=self._parse_pivots(),
1989            alias=self._parse_table_alias() if parse_alias else None,
1990        )
1991
1992    def _parse_query_modifiers(
1993        self, this: t.Optional[exp.Expression]
1994    ) -> t.Optional[exp.Expression]:
1995        if isinstance(this, self.MODIFIABLES):
1996            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1997                expression = parser(self)
1998
1999                if expression:
2000                    this.set(key, expression)
2001        return this
2002
2003    def _parse_hint(self) -> t.Optional[exp.Expression]:
2004        if self._match(TokenType.HINT):
2005            hints = self._parse_csv(self._parse_function)
2006            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2007                self.raise_error("Expected */ after HINT")
2008            return self.expression(exp.Hint, expressions=hints)
2009
2010        return None
2011
2012    def _parse_into(self) -> t.Optional[exp.Expression]:
2013        if not self._match(TokenType.INTO):
2014            return None
2015
2016        temp = self._match(TokenType.TEMPORARY)
2017        unlogged = self._match(TokenType.UNLOGGED)
2018        self._match(TokenType.TABLE)
2019
2020        return self.expression(
2021            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2022        )
2023
2024    def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]:
2025        if not self._match(TokenType.FROM):
2026            return None
2027
2028        comments = self._prev_comments
2029        this = self._parse_table()
2030
2031        return self.expression(
2032            exp.From,
2033            comments=comments,
2034            this=self._parse_query_modifiers(this) if modifiers else this,
2035        )
2036
2037    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2038        if not self._match(TokenType.MATCH_RECOGNIZE):
2039            return None
2040
2041        self._match_l_paren()
2042
2043        partition = self._parse_partition_by()
2044        order = self._parse_order()
2045        measures = (
2046            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2047        )
2048
2049        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2050            rows = exp.Var(this="ONE ROW PER MATCH")
2051        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2052            text = "ALL ROWS PER MATCH"
2053            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2054                text += f" SHOW EMPTY MATCHES"
2055            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2056                text += f" OMIT EMPTY MATCHES"
2057            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2058                text += f" WITH UNMATCHED ROWS"
2059            rows = exp.Var(this=text)
2060        else:
2061            rows = None
2062
2063        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2064            text = "AFTER MATCH SKIP"
2065            if self._match_text_seq("PAST", "LAST", "ROW"):
2066                text += f" PAST LAST ROW"
2067            elif self._match_text_seq("TO", "NEXT", "ROW"):
2068                text += f" TO NEXT ROW"
2069            elif self._match_text_seq("TO", "FIRST"):
2070                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2071            elif self._match_text_seq("TO", "LAST"):
2072                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2073            after = exp.Var(this=text)
2074        else:
2075            after = None
2076
2077        if self._match_text_seq("PATTERN"):
2078            self._match_l_paren()
2079
2080            if not self._curr:
2081                self.raise_error("Expecting )", self._curr)
2082
2083            paren = 1
2084            start = self._curr
2085
2086            while self._curr and paren > 0:
2087                if self._curr.token_type == TokenType.L_PAREN:
2088                    paren += 1
2089                if self._curr.token_type == TokenType.R_PAREN:
2090                    paren -= 1
2091                end = self._prev
2092                self._advance()
2093            if paren > 0:
2094                self.raise_error("Expecting )", self._curr)
2095            pattern = exp.Var(this=self._find_sql(start, end))
2096        else:
2097            pattern = None
2098
2099        define = (
2100            self._parse_csv(
2101                lambda: self.expression(
2102                    exp.Alias,
2103                    alias=self._parse_id_var(any_token=True),
2104                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2105                )
2106            )
2107            if self._match_text_seq("DEFINE")
2108            else None
2109        )
2110
2111        self._match_r_paren()
2112
2113        return self.expression(
2114            exp.MatchRecognize,
2115            partition_by=partition,
2116            order=order,
2117            measures=measures,
2118            rows=rows,
2119            after=after,
2120            pattern=pattern,
2121            define=define,
2122            alias=self._parse_table_alias(),
2123        )
2124
2125    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2126        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2127        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2128
2129        if outer_apply or cross_apply:
2130            this = self._parse_select(table=True)
2131            view = None
2132            outer = not cross_apply
2133        elif self._match(TokenType.LATERAL):
2134            this = self._parse_select(table=True)
2135            view = self._match(TokenType.VIEW)
2136            outer = self._match(TokenType.OUTER)
2137        else:
2138            return None
2139
2140        if not this:
2141            this = self._parse_function() or self._parse_id_var(any_token=False)
2142            while self._match(TokenType.DOT):
2143                this = exp.Dot(
2144                    this=this,
2145                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2146                )
2147
2148        table_alias: t.Optional[exp.Expression]
2149
2150        if view:
2151            table = self._parse_id_var(any_token=False)
2152            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2153            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2154        else:
2155            table_alias = self._parse_table_alias()
2156
2157        expression = self.expression(
2158            exp.Lateral,
2159            this=this,
2160            view=view,
2161            outer=outer,
2162            alias=table_alias,
2163        )
2164
2165        return expression
2166
2167    def _parse_join_side_and_kind(
2168        self,
2169    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2170        return (
2171            self._match(TokenType.NATURAL) and self._prev,
2172            self._match_set(self.JOIN_SIDES) and self._prev,
2173            self._match_set(self.JOIN_KINDS) and self._prev,
2174        )
2175
2176    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2177        if self._match(TokenType.COMMA):
2178            return self.expression(exp.Join, this=self._parse_table())
2179
2180        index = self._index
2181        natural, side, kind = self._parse_join_side_and_kind()
2182        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2183        join = self._match(TokenType.JOIN)
2184
2185        if not skip_join_token and not join:
2186            self._retreat(index)
2187            kind = None
2188            natural = None
2189            side = None
2190
2191        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2192        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2193
2194        if not skip_join_token and not join and not outer_apply and not cross_apply:
2195            return None
2196
2197        if outer_apply:
2198            side = Token(TokenType.LEFT, "LEFT")
2199
2200        kwargs: t.Dict[
2201            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2202        ] = {"this": self._parse_table()}
2203
2204        if natural:
2205            kwargs["natural"] = True
2206        if side:
2207            kwargs["side"] = side.text
2208        if kind:
2209            kwargs["kind"] = kind.text
2210        if hint:
2211            kwargs["hint"] = hint
2212
2213        if self._match(TokenType.ON):
2214            kwargs["on"] = self._parse_conjunction()
2215        elif self._match(TokenType.USING):
2216            kwargs["using"] = self._parse_wrapped_id_vars()
2217
2218        return self.expression(exp.Join, **kwargs)  # type: ignore
2219
2220    def _parse_index(self) -> exp.Expression:
2221        index = self._parse_id_var()
2222        self._match(TokenType.ON)
2223        self._match(TokenType.TABLE)  # hive
2224
2225        return self.expression(
2226            exp.Index,
2227            this=index,
2228            table=self.expression(exp.Table, this=self._parse_id_var()),
2229            columns=self._parse_expression(),
2230        )
2231
2232    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2233        unique = self._match(TokenType.UNIQUE)
2234        primary = self._match_text_seq("PRIMARY")
2235        amp = self._match_text_seq("AMP")
2236        if not self._match(TokenType.INDEX):
2237            return None
2238        index = self._parse_id_var()
2239        columns = None
2240        if self._match(TokenType.L_PAREN, advance=False):
2241            columns = self._parse_wrapped_csv(self._parse_column)
2242        return self.expression(
2243            exp.Index,
2244            this=index,
2245            columns=columns,
2246            unique=unique,
2247            primary=primary,
2248            amp=amp,
2249        )
2250
2251    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2252        return (
2253            (not schema and self._parse_function())
2254            or self._parse_id_var(any_token=False)
2255            or self._parse_string_as_identifier()
2256            or self._parse_placeholder()
2257        )
2258
2259    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2260        catalog = None
2261        db = None
2262        table = self._parse_table_part(schema=schema)
2263
2264        while self._match(TokenType.DOT):
2265            if catalog:
2266                # This allows nesting the table in arbitrarily many dot expressions if needed
2267                table = self.expression(
2268                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2269                )
2270            else:
2271                catalog = db
2272                db = table
2273                table = self._parse_table_part(schema=schema)
2274
2275        if not table:
2276            self.raise_error(f"Expected table name but got {self._curr}")
2277
2278        return self.expression(
2279            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2280        )
2281
2282    def _parse_table(
2283        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2284    ) -> t.Optional[exp.Expression]:
2285        lateral = self._parse_lateral()
2286        if lateral:
2287            return lateral
2288
2289        unnest = self._parse_unnest()
2290        if unnest:
2291            return unnest
2292
2293        values = self._parse_derived_table_values()
2294        if values:
2295            return values
2296
2297        subquery = self._parse_select(table=True)
2298        if subquery:
2299            if not subquery.args.get("pivots"):
2300                subquery.set("pivots", self._parse_pivots())
2301            return subquery
2302
2303        this = self._parse_table_parts(schema=schema)
2304
2305        if schema:
2306            return self._parse_schema(this=this)
2307
2308        if self.alias_post_tablesample:
2309            table_sample = self._parse_table_sample()
2310
2311        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2312        if alias:
2313            this.set("alias", alias)
2314
2315        if not this.args.get("pivots"):
2316            this.set("pivots", self._parse_pivots())
2317
2318        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2319            this.set(
2320                "hints",
2321                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2322            )
2323            self._match_r_paren()
2324
2325        if not self.alias_post_tablesample:
2326            table_sample = self._parse_table_sample()
2327
2328        if table_sample:
2329            table_sample.set("this", this)
2330            this = table_sample
2331
2332        return this
2333
2334    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2335        if not self._match(TokenType.UNNEST):
2336            return None
2337
2338        expressions = self._parse_wrapped_csv(self._parse_type)
2339        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2340        alias = self._parse_table_alias()
2341
2342        if alias and self.unnest_column_only:
2343            if alias.args.get("columns"):
2344                self.raise_error("Unexpected extra column alias in unnest.")
2345            alias.set("columns", [alias.this])
2346            alias.set("this", None)
2347
2348        offset = None
2349        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2350            self._match(TokenType.ALIAS)
2351            offset = self._parse_id_var() or exp.Identifier(this="offset")
2352
2353        return self.expression(
2354            exp.Unnest,
2355            expressions=expressions,
2356            ordinality=ordinality,
2357            alias=alias,
2358            offset=offset,
2359        )
2360
2361    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2362        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2363        if not is_derived and not self._match(TokenType.VALUES):
2364            return None
2365
2366        expressions = self._parse_csv(self._parse_value)
2367
2368        if is_derived:
2369            self._match_r_paren()
2370
2371        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2372
2373    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2374        if not self._match(TokenType.TABLE_SAMPLE) and not (
2375            as_modifier and self._match_text_seq("USING", "SAMPLE")
2376        ):
2377            return None
2378
2379        bucket_numerator = None
2380        bucket_denominator = None
2381        bucket_field = None
2382        percent = None
2383        rows = None
2384        size = None
2385        seed = None
2386
2387        kind = (
2388            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2389        )
2390        method = self._parse_var(tokens=(TokenType.ROW,))
2391
2392        self._match(TokenType.L_PAREN)
2393
2394        num = self._parse_number()
2395
2396        if self._match(TokenType.BUCKET):
2397            bucket_numerator = self._parse_number()
2398            self._match(TokenType.OUT_OF)
2399            bucket_denominator = bucket_denominator = self._parse_number()
2400            self._match(TokenType.ON)
2401            bucket_field = self._parse_field()
2402        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2403            percent = num
2404        elif self._match(TokenType.ROWS):
2405            rows = num
2406        else:
2407            size = num
2408
2409        self._match(TokenType.R_PAREN)
2410
2411        if self._match(TokenType.L_PAREN):
2412            method = self._parse_var()
2413            seed = self._match(TokenType.COMMA) and self._parse_number()
2414            self._match_r_paren()
2415        elif self._match_texts(("SEED", "REPEATABLE")):
2416            seed = self._parse_wrapped(self._parse_number)
2417
2418        return self.expression(
2419            exp.TableSample,
2420            method=method,
2421            bucket_numerator=bucket_numerator,
2422            bucket_denominator=bucket_denominator,
2423            bucket_field=bucket_field,
2424            percent=percent,
2425            rows=rows,
2426            size=size,
2427            seed=seed,
2428            kind=kind,
2429        )
2430
2431    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2432        return list(iter(self._parse_pivot, None))
2433
2434    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2435        index = self._index
2436
2437        if self._match(TokenType.PIVOT):
2438            unpivot = False
2439        elif self._match(TokenType.UNPIVOT):
2440            unpivot = True
2441        else:
2442            return None
2443
2444        expressions = []
2445        field = None
2446
2447        if not self._match(TokenType.L_PAREN):
2448            self._retreat(index)
2449            return None
2450
2451        if unpivot:
2452            expressions = self._parse_csv(self._parse_column)
2453        else:
2454            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2455
2456        if not expressions:
2457            self.raise_error("Failed to parse PIVOT's aggregation list")
2458
2459        if not self._match(TokenType.FOR):
2460            self.raise_error("Expecting FOR")
2461
2462        value = self._parse_column()
2463
2464        if not self._match(TokenType.IN):
2465            self.raise_error("Expecting IN")
2466
2467        field = self._parse_in(value)
2468
2469        self._match_r_paren()
2470
2471        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2472
2473        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2474            pivot.set("alias", self._parse_table_alias())
2475
2476        if not unpivot:
2477            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2478
2479            columns: t.List[exp.Expression] = []
2480            for fld in pivot.args["field"].expressions:
2481                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2482                for name in names:
2483                    if self.PREFIXED_PIVOT_COLUMNS:
2484                        name = f"{name}_{field_name}" if name else field_name
2485                    else:
2486                        name = f"{field_name}_{name}" if name else field_name
2487
2488                    columns.append(exp.to_identifier(name))
2489
2490            pivot.set("columns", columns)
2491
2492        return pivot
2493
2494    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2495        return [agg.alias for agg in aggregations]
2496
2497    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2498        if not skip_where_token and not self._match(TokenType.WHERE):
2499            return None
2500
2501        return self.expression(
2502            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2503        )
2504
2505    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2506        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2507            return None
2508
2509        elements = defaultdict(list)
2510
2511        while True:
2512            expressions = self._parse_csv(self._parse_conjunction)
2513            if expressions:
2514                elements["expressions"].extend(expressions)
2515
2516            grouping_sets = self._parse_grouping_sets()
2517            if grouping_sets:
2518                elements["grouping_sets"].extend(grouping_sets)
2519
2520            rollup = None
2521            cube = None
2522            totals = None
2523
2524            with_ = self._match(TokenType.WITH)
2525            if self._match(TokenType.ROLLUP):
2526                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2527                elements["rollup"].extend(ensure_list(rollup))
2528
2529            if self._match(TokenType.CUBE):
2530                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2531                elements["cube"].extend(ensure_list(cube))
2532
2533            if self._match_text_seq("TOTALS"):
2534                totals = True
2535                elements["totals"] = True  # type: ignore
2536
2537            if not (grouping_sets or rollup or cube or totals):
2538                break
2539
2540        return self.expression(exp.Group, **elements)  # type: ignore
2541
2542    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2543        if not self._match(TokenType.GROUPING_SETS):
2544            return None
2545
2546        return self._parse_wrapped_csv(self._parse_grouping_set)
2547
2548    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2549        if self._match(TokenType.L_PAREN):
2550            grouping_set = self._parse_csv(self._parse_column)
2551            self._match_r_paren()
2552            return self.expression(exp.Tuple, expressions=grouping_set)
2553
2554        return self._parse_column()
2555
2556    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2557        if not skip_having_token and not self._match(TokenType.HAVING):
2558            return None
2559        return self.expression(exp.Having, this=self._parse_conjunction())
2560
2561    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2562        if not self._match(TokenType.QUALIFY):
2563            return None
2564        return self.expression(exp.Qualify, this=self._parse_conjunction())
2565
2566    def _parse_order(
2567        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2568    ) -> t.Optional[exp.Expression]:
2569        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2570            return this
2571
2572        return self.expression(
2573            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2574        )
2575
2576    def _parse_sort(
2577        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2578    ) -> t.Optional[exp.Expression]:
2579        if not self._match(token_type):
2580            return None
2581        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2582
2583    def _parse_ordered(self) -> exp.Expression:
2584        this = self._parse_conjunction()
2585        self._match(TokenType.ASC)
2586        is_desc = self._match(TokenType.DESC)
2587        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2588        is_nulls_last = self._match(TokenType.NULLS_LAST)
2589        desc = is_desc or False
2590        asc = not desc
2591        nulls_first = is_nulls_first or False
2592        explicitly_null_ordered = is_nulls_first or is_nulls_last
2593        if (
2594            not explicitly_null_ordered
2595            and (
2596                (asc and self.null_ordering == "nulls_are_small")
2597                or (desc and self.null_ordering != "nulls_are_small")
2598            )
2599            and self.null_ordering != "nulls_are_last"
2600        ):
2601            nulls_first = True
2602
2603        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2604
2605    def _parse_limit(
2606        self, this: t.Optional[exp.Expression] = None, top: bool = False
2607    ) -> t.Optional[exp.Expression]:
2608        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2609            limit_paren = self._match(TokenType.L_PAREN)
2610            limit_exp = self.expression(
2611                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2612            )
2613
2614            if limit_paren:
2615                self._match_r_paren()
2616
2617            return limit_exp
2618
2619        if self._match(TokenType.FETCH):
2620            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2621            direction = self._prev.text if direction else "FIRST"
2622
2623            count = self._parse_number()
2624            percent = self._match(TokenType.PERCENT)
2625
2626            self._match_set((TokenType.ROW, TokenType.ROWS))
2627
2628            only = self._match(TokenType.ONLY)
2629            with_ties = self._match_text_seq("WITH", "TIES")
2630
2631            if only and with_ties:
2632                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2633
2634            return self.expression(
2635                exp.Fetch,
2636                direction=direction,
2637                count=count,
2638                percent=percent,
2639                with_ties=with_ties,
2640            )
2641
2642        return this
2643
2644    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2645        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2646            return this
2647
2648        count = self._parse_number()
2649        self._match_set((TokenType.ROW, TokenType.ROWS))
2650        return self.expression(exp.Offset, this=this, expression=count)
2651
2652    def _parse_locks(self) -> t.List[exp.Expression]:
2653        # Lists are invariant, so we need to use a type hint here
2654        locks: t.List[exp.Expression] = []
2655
2656        while True:
2657            if self._match_text_seq("FOR", "UPDATE"):
2658                update = True
2659            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2660                "LOCK", "IN", "SHARE", "MODE"
2661            ):
2662                update = False
2663            else:
2664                break
2665
2666            expressions = None
2667            if self._match_text_seq("OF"):
2668                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2669
2670            wait: t.Optional[bool | exp.Expression] = None
2671            if self._match_text_seq("NOWAIT"):
2672                wait = True
2673            elif self._match_text_seq("WAIT"):
2674                wait = self._parse_primary()
2675            elif self._match_text_seq("SKIP", "LOCKED"):
2676                wait = False
2677
2678            locks.append(
2679                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2680            )
2681
2682        return locks
2683
2684    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2685        if not self._match_set(self.SET_OPERATIONS):
2686            return this
2687
2688        token_type = self._prev.token_type
2689
2690        if token_type == TokenType.UNION:
2691            expression = exp.Union
2692        elif token_type == TokenType.EXCEPT:
2693            expression = exp.Except
2694        else:
2695            expression = exp.Intersect
2696
2697        return self.expression(
2698            expression,
2699            this=this,
2700            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2701            expression=self._parse_set_operations(self._parse_select(nested=True)),
2702        )
2703
2704    def _parse_expression(self) -> t.Optional[exp.Expression]:
2705        return self._parse_alias(self._parse_conjunction())
2706
2707    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2708        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2709
2710    def _parse_equality(self) -> t.Optional[exp.Expression]:
2711        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2712
2713    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2714        return self._parse_tokens(self._parse_range, self.COMPARISON)
2715
2716    def _parse_range(self) -> t.Optional[exp.Expression]:
2717        this = self._parse_bitwise()
2718        negate = self._match(TokenType.NOT)
2719
2720        if self._match_set(self.RANGE_PARSERS):
2721            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2722            if not expression:
2723                return this
2724
2725            this = expression
2726        elif self._match(TokenType.ISNULL):
2727            this = self.expression(exp.Is, this=this, expression=exp.Null())
2728
2729        # Postgres supports ISNULL and NOTNULL for conditions.
2730        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2731        if self._match(TokenType.NOTNULL):
2732            this = self.expression(exp.Is, this=this, expression=exp.Null())
2733            this = self.expression(exp.Not, this=this)
2734
2735        if negate:
2736            this = self.expression(exp.Not, this=this)
2737
2738        if self._match(TokenType.IS):
2739            this = self._parse_is(this)
2740
2741        return this
2742
2743    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2744        index = self._index - 1
2745        negate = self._match(TokenType.NOT)
2746        if self._match(TokenType.DISTINCT_FROM):
2747            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2748            return self.expression(klass, this=this, expression=self._parse_expression())
2749
2750        expression = self._parse_null() or self._parse_boolean()
2751        if not expression:
2752            self._retreat(index)
2753            return None
2754
2755        this = self.expression(exp.Is, this=this, expression=expression)
2756        return self.expression(exp.Not, this=this) if negate else this
2757
2758    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2759        unnest = self._parse_unnest()
2760        if unnest:
2761            this = self.expression(exp.In, this=this, unnest=unnest)
2762        elif self._match(TokenType.L_PAREN):
2763            expressions = self._parse_csv(self._parse_select_or_expression)
2764
2765            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2766                this = self.expression(exp.In, this=this, query=expressions[0])
2767            else:
2768                this = self.expression(exp.In, this=this, expressions=expressions)
2769
2770            self._match_r_paren(this)
2771        else:
2772            this = self.expression(exp.In, this=this, field=self._parse_field())
2773
2774        return this
2775
2776    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2777        low = self._parse_bitwise()
2778        self._match(TokenType.AND)
2779        high = self._parse_bitwise()
2780        return self.expression(exp.Between, this=this, low=low, high=high)
2781
2782    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2783        if not self._match(TokenType.ESCAPE):
2784            return this
2785        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2786
2787    def _parse_interval(self) -> t.Optional[exp.Expression]:
2788        if not self._match(TokenType.INTERVAL):
2789            return None
2790
2791        this = self._parse_primary() or self._parse_term()
2792        unit = self._parse_function() or self._parse_var()
2793
2794        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2795        # each INTERVAL expression into this canonical form so it's easy to transpile
2796        if this and isinstance(this, exp.Literal):
2797            if this.is_number:
2798                this = exp.Literal.string(this.name)
2799
2800            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2801            parts = this.name.split()
2802            if not unit and len(parts) <= 2:
2803                this = exp.Literal.string(seq_get(parts, 0))
2804                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2805
2806        return self.expression(exp.Interval, this=this, unit=unit)
2807
2808    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2809        this = self._parse_term()
2810
2811        while True:
2812            if self._match_set(self.BITWISE):
2813                this = self.expression(
2814                    self.BITWISE[self._prev.token_type],
2815                    this=this,
2816                    expression=self._parse_term(),
2817                )
2818            elif self._match_pair(TokenType.LT, TokenType.LT):
2819                this = self.expression(
2820                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2821                )
2822            elif self._match_pair(TokenType.GT, TokenType.GT):
2823                this = self.expression(
2824                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2825                )
2826            else:
2827                break
2828
2829        return this
2830
2831    def _parse_term(self) -> t.Optional[exp.Expression]:
2832        return self._parse_tokens(self._parse_factor, self.TERM)
2833
2834    def _parse_factor(self) -> t.Optional[exp.Expression]:
2835        return self._parse_tokens(self._parse_unary, self.FACTOR)
2836
2837    def _parse_unary(self) -> t.Optional[exp.Expression]:
2838        if self._match_set(self.UNARY_PARSERS):
2839            return self.UNARY_PARSERS[self._prev.token_type](self)
2840        return self._parse_at_time_zone(self._parse_type())
2841
2842    def _parse_type(self) -> t.Optional[exp.Expression]:
2843        interval = self._parse_interval()
2844        if interval:
2845            return interval
2846
2847        index = self._index
2848        data_type = self._parse_types(check_func=True)
2849        this = self._parse_column()
2850
2851        if data_type:
2852            if isinstance(this, exp.Literal):
2853                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2854                if parser:
2855                    return parser(self, this, data_type)
2856                return self.expression(exp.Cast, this=this, to=data_type)
2857            if not data_type.expressions:
2858                self._retreat(index)
2859                return self._parse_column()
2860            return data_type
2861
2862        return this
2863
2864    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2865        this = self._parse_type()
2866        if not this:
2867            return None
2868
2869        return self.expression(
2870            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2871        )
2872
2873    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2874        index = self._index
2875
2876        prefix = self._match_text_seq("SYSUDTLIB", ".")
2877
2878        if not self._match_set(self.TYPE_TOKENS):
2879            return None
2880
2881        type_token = self._prev.token_type
2882
2883        if type_token == TokenType.PSEUDO_TYPE:
2884            return self.expression(exp.PseudoType, this=self._prev.text)
2885
2886        nested = type_token in self.NESTED_TYPE_TOKENS
2887        is_struct = type_token == TokenType.STRUCT
2888        expressions = None
2889        maybe_func = False
2890
2891        if self._match(TokenType.L_PAREN):
2892            if is_struct:
2893                expressions = self._parse_csv(self._parse_struct_types)
2894            elif nested:
2895                expressions = self._parse_csv(self._parse_types)
2896            else:
2897                expressions = self._parse_csv(self._parse_type_size)
2898
2899            if not expressions or not self._match(TokenType.R_PAREN):
2900                self._retreat(index)
2901                return None
2902
2903            maybe_func = True
2904
2905        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2906            this = exp.DataType(
2907                this=exp.DataType.Type.ARRAY,
2908                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2909                nested=True,
2910            )
2911
2912            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2913                this = exp.DataType(
2914                    this=exp.DataType.Type.ARRAY,
2915                    expressions=[this],
2916                    nested=True,
2917                )
2918
2919            return this
2920
2921        if self._match(TokenType.L_BRACKET):
2922            self._retreat(index)
2923            return None
2924
2925        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2926        if nested and self._match(TokenType.LT):
2927            if is_struct:
2928                expressions = self._parse_csv(self._parse_struct_types)
2929            else:
2930                expressions = self._parse_csv(self._parse_types)
2931
2932            if not self._match(TokenType.GT):
2933                self.raise_error("Expecting >")
2934
2935            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2936                values = self._parse_csv(self._parse_conjunction)
2937                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2938
2939        value: t.Optional[exp.Expression] = None
2940        if type_token in self.TIMESTAMPS:
2941            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2942                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2943            elif (
2944                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2945            ):
2946                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2947            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2948                if type_token == TokenType.TIME:
2949                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2950                else:
2951                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2952
2953            maybe_func = maybe_func and value is None
2954
2955            if value is None:
2956                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2957        elif type_token == TokenType.INTERVAL:
2958            unit = self._parse_var()
2959
2960            if not unit:
2961                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2962            else:
2963                value = self.expression(exp.Interval, unit=unit)
2964
2965        if maybe_func and check_func:
2966            index2 = self._index
2967            peek = self._parse_string()
2968
2969            if not peek:
2970                self._retreat(index)
2971                return None
2972
2973            self._retreat(index2)
2974
2975        if value:
2976            return value
2977
2978        return exp.DataType(
2979            this=exp.DataType.Type[type_token.value.upper()],
2980            expressions=expressions,
2981            nested=nested,
2982            values=values,
2983            prefix=prefix,
2984        )
2985
2986    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
2987        this = self._parse_type() or self._parse_id_var()
2988        self._match(TokenType.COLON)
2989        return self._parse_column_def(this)
2990
2991    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2992        if not self._match(TokenType.AT_TIME_ZONE):
2993            return this
2994        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2995
2996    def _parse_column(self) -> t.Optional[exp.Expression]:
2997        this = self._parse_field()
2998        if isinstance(this, exp.Identifier):
2999            this = self.expression(exp.Column, this=this)
3000        elif not this:
3001            return self._parse_bracket(this)
3002        this = self._parse_bracket(this)
3003
3004        while self._match_set(self.COLUMN_OPERATORS):
3005            op_token = self._prev.token_type
3006            op = self.COLUMN_OPERATORS.get(op_token)
3007
3008            if op_token == TokenType.DCOLON:
3009                field = self._parse_types()
3010                if not field:
3011                    self.raise_error("Expected type")
3012            elif op and self._curr:
3013                self._advance()
3014                value = self._prev.text
3015                field = (
3016                    exp.Literal.number(value)
3017                    if self._prev.token_type == TokenType.NUMBER
3018                    else exp.Literal.string(value)
3019                )
3020            else:
3021                field = (
3022                    self._parse_star()
3023                    or self._parse_function(anonymous=True)
3024                    or self._parse_id_var()
3025                )
3026
3027            if isinstance(field, exp.Func):
3028                # bigquery allows function calls like x.y.count(...)
3029                # SAFE.SUBSTR(...)
3030                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3031                this = self._replace_columns_with_dots(this)
3032
3033            if op:
3034                this = op(self, this, field)
3035            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3036                this = self.expression(
3037                    exp.Column,
3038                    this=field,
3039                    table=this.this,
3040                    db=this.args.get("table"),
3041                    catalog=this.args.get("db"),
3042                )
3043            else:
3044                this = self.expression(exp.Dot, this=this, expression=field)
3045            this = self._parse_bracket(this)
3046
3047        return this
3048
3049    def _parse_primary(self) -> t.Optional[exp.Expression]:
3050        if self._match_set(self.PRIMARY_PARSERS):
3051            token_type = self._prev.token_type
3052            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3053
3054            if token_type == TokenType.STRING:
3055                expressions = [primary]
3056                while self._match(TokenType.STRING):
3057                    expressions.append(exp.Literal.string(self._prev.text))
3058                if len(expressions) > 1:
3059                    return self.expression(exp.Concat, expressions=expressions)
3060            return primary
3061
3062        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3063            return exp.Literal.number(f"0.{self._prev.text}")
3064
3065        if self._match(TokenType.L_PAREN):
3066            comments = self._prev_comments
3067            query = self._parse_select()
3068
3069            if query:
3070                expressions = [query]
3071            else:
3072                expressions = self._parse_csv(self._parse_expression)
3073
3074            this = self._parse_query_modifiers(seq_get(expressions, 0))
3075
3076            if isinstance(this, exp.Subqueryable):
3077                this = self._parse_set_operations(
3078                    self._parse_subquery(this=this, parse_alias=False)
3079                )
3080            elif len(expressions) > 1:
3081                this = self.expression(exp.Tuple, expressions=expressions)
3082            else:
3083                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3084
3085            if this:
3086                this.add_comments(comments)
3087            self._match_r_paren(expression=this)
3088
3089            return this
3090
3091        return None
3092
3093    def _parse_field(
3094        self,
3095        any_token: bool = False,
3096        tokens: t.Optional[t.Collection[TokenType]] = None,
3097    ) -> t.Optional[exp.Expression]:
3098        return (
3099            self._parse_primary()
3100            or self._parse_function()
3101            or self._parse_id_var(any_token=any_token, tokens=tokens)
3102        )
3103
3104    def _parse_function(
3105        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3106    ) -> t.Optional[exp.Expression]:
3107        if not self._curr:
3108            return None
3109
3110        token_type = self._curr.token_type
3111
3112        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3113            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3114
3115        if not self._next or self._next.token_type != TokenType.L_PAREN:
3116            if token_type in self.NO_PAREN_FUNCTIONS:
3117                self._advance()
3118                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3119
3120            return None
3121
3122        if token_type not in self.FUNC_TOKENS:
3123            return None
3124
3125        this = self._curr.text
3126        upper = this.upper()
3127        self._advance(2)
3128
3129        parser = self.FUNCTION_PARSERS.get(upper)
3130
3131        if parser and not anonymous:
3132            this = parser(self)
3133        else:
3134            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3135
3136            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3137                this = self.expression(subquery_predicate, this=self._parse_select())
3138                self._match_r_paren()
3139                return this
3140
3141            if functions is None:
3142                functions = self.FUNCTIONS
3143
3144            function = functions.get(upper)
3145            args = self._parse_csv(self._parse_lambda)
3146
3147            if function and not anonymous:
3148                this = function(args)
3149                self.validate_expression(this, args)
3150            else:
3151                this = self.expression(exp.Anonymous, this=this, expressions=args)
3152
3153        self._match_r_paren(this)
3154        return self._parse_window(this)
3155
3156    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3157        return self._parse_column_def(self._parse_id_var())
3158
3159    def _parse_user_defined_function(
3160        self, kind: t.Optional[TokenType] = None
3161    ) -> t.Optional[exp.Expression]:
3162        this = self._parse_id_var()
3163
3164        while self._match(TokenType.DOT):
3165            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3166
3167        if not self._match(TokenType.L_PAREN):
3168            return this
3169
3170        expressions = self._parse_csv(self._parse_function_parameter)
3171        self._match_r_paren()
3172        return self.expression(
3173            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3174        )
3175
3176    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3177        literal = self._parse_primary()
3178        if literal:
3179            return self.expression(exp.Introducer, this=token.text, expression=literal)
3180
3181        return self.expression(exp.Identifier, this=token.text)
3182
3183    def _parse_national(self, token: Token) -> exp.Expression:
3184        return self.expression(exp.National, this=exp.Literal.string(token.text))
3185
3186    def _parse_session_parameter(self) -> exp.Expression:
3187        kind = None
3188        this = self._parse_id_var() or self._parse_primary()
3189
3190        if this and self._match(TokenType.DOT):
3191            kind = this.name
3192            this = self._parse_var() or self._parse_primary()
3193
3194        return self.expression(exp.SessionParameter, this=this, kind=kind)
3195
3196    def _parse_lambda(self) -> t.Optional[exp.Expression]:
3197        index = self._index
3198
3199        if self._match(TokenType.L_PAREN):
3200            expressions = self._parse_csv(self._parse_id_var)
3201
3202            if not self._match(TokenType.R_PAREN):
3203                self._retreat(index)
3204        else:
3205            expressions = [self._parse_id_var()]
3206
3207        if self._match_set(self.LAMBDAS):
3208            return self.LAMBDAS[self._prev.token_type](self, expressions)
3209
3210        self._retreat(index)
3211
3212        this: t.Optional[exp.Expression]
3213
3214        if self._match(TokenType.DISTINCT):
3215            this = self.expression(
3216                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3217            )
3218        else:
3219            this = self._parse_select_or_expression()
3220
3221            if isinstance(this, exp.EQ):
3222                left = this.this
3223                if isinstance(left, exp.Column):
3224                    left.replace(exp.Var(this=left.text("this")))
3225
3226        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3227
3228    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3229        index = self._index
3230
3231        try:
3232            if self._parse_select(nested=True):
3233                return this
3234        except Exception:
3235            pass
3236        finally:
3237            self._retreat(index)
3238
3239        if not self._match(TokenType.L_PAREN):
3240            return this
3241
3242        args = self._parse_csv(
3243            lambda: self._parse_constraint()
3244            or self._parse_column_def(self._parse_field(any_token=True))
3245        )
3246        self._match_r_paren()
3247        return self.expression(exp.Schema, this=this, expressions=args)
3248
3249    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3250        # column defs are not really columns, they're identifiers
3251        if isinstance(this, exp.Column):
3252            this = this.this
3253        kind = self._parse_types()
3254
3255        if self._match_text_seq("FOR", "ORDINALITY"):
3256            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3257
3258        constraints = []
3259        while True:
3260            constraint = self._parse_column_constraint()
3261            if not constraint:
3262                break
3263            constraints.append(constraint)
3264
3265        if not kind and not constraints:
3266            return this
3267
3268        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3269
3270    def _parse_auto_increment(self) -> exp.Expression:
3271        start = None
3272        increment = None
3273
3274        if self._match(TokenType.L_PAREN, advance=False):
3275            args = self._parse_wrapped_csv(self._parse_bitwise)
3276            start = seq_get(args, 0)
3277            increment = seq_get(args, 1)
3278        elif self._match_text_seq("START"):
3279            start = self._parse_bitwise()
3280            self._match_text_seq("INCREMENT")
3281            increment = self._parse_bitwise()
3282
3283        if start and increment:
3284            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3285
3286        return exp.AutoIncrementColumnConstraint()
3287
3288    def _parse_compress(self) -> exp.Expression:
3289        if self._match(TokenType.L_PAREN, advance=False):
3290            return self.expression(
3291                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3292            )
3293
3294        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3295
3296    def _parse_generated_as_identity(self) -> exp.Expression:
3297        if self._match(TokenType.BY_DEFAULT):
3298            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3299            this = self.expression(
3300                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3301            )
3302        else:
3303            self._match_text_seq("ALWAYS")
3304            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3305
3306        self._match_text_seq("AS", "IDENTITY")
3307        if self._match(TokenType.L_PAREN):
3308            if self._match_text_seq("START", "WITH"):
3309                this.set("start", self._parse_bitwise())
3310            if self._match_text_seq("INCREMENT", "BY"):
3311                this.set("increment", self._parse_bitwise())
3312            if self._match_text_seq("MINVALUE"):
3313                this.set("minvalue", self._parse_bitwise())
3314            if self._match_text_seq("MAXVALUE"):
3315                this.set("maxvalue", self._parse_bitwise())
3316
3317            if self._match_text_seq("CYCLE"):
3318                this.set("cycle", True)
3319            elif self._match_text_seq("NO", "CYCLE"):
3320                this.set("cycle", False)
3321
3322            self._match_r_paren()
3323
3324        return this
3325
3326    def _parse_inline(self) -> t.Optional[exp.Expression]:
3327        self._match_text_seq("LENGTH")
3328        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3329
3330    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3331        if self._match_text_seq("NULL"):
3332            return self.expression(exp.NotNullColumnConstraint)
3333        if self._match_text_seq("CASESPECIFIC"):
3334            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3335        return None
3336
3337    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3338        if self._match(TokenType.CONSTRAINT):
3339            this = self._parse_id_var()
3340        else:
3341            this = None
3342
3343        if self._match_texts(self.CONSTRAINT_PARSERS):
3344            return self.expression(
3345                exp.ColumnConstraint,
3346                this=this,
3347                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3348            )
3349
3350        return this
3351
3352    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3353        if not self._match(TokenType.CONSTRAINT):
3354            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3355
3356        this = self._parse_id_var()
3357        expressions = []
3358
3359        while True:
3360            constraint = self._parse_unnamed_constraint() or self._parse_function()
3361            if not constraint:
3362                break
3363            expressions.append(constraint)
3364
3365        return self.expression(exp.Constraint, this=this, expressions=expressions)
3366
3367    def _parse_unnamed_constraint(
3368        self, constraints: t.Optional[t.Collection[str]] = None
3369    ) -> t.Optional[exp.Expression]:
3370        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3371            return None
3372
3373        constraint = self._prev.text.upper()
3374        if constraint not in self.CONSTRAINT_PARSERS:
3375            self.raise_error(f"No parser found for schema constraint {constraint}.")
3376
3377        return self.CONSTRAINT_PARSERS[constraint](self)
3378
3379    def _parse_unique(self) -> exp.Expression:
3380        if not self._match(TokenType.L_PAREN, advance=False):
3381            return self.expression(exp.UniqueColumnConstraint)
3382        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3383
3384    def _parse_key_constraint_options(self) -> t.List[str]:
3385        options = []
3386        while True:
3387            if not self._curr:
3388                break
3389
3390            if self._match(TokenType.ON):
3391                action = None
3392                on = self._advance_any() and self._prev.text
3393
3394                if self._match(TokenType.NO_ACTION):
3395                    action = "NO ACTION"
3396                elif self._match(TokenType.CASCADE):
3397                    action = "CASCADE"
3398                elif self._match_pair(TokenType.SET, TokenType.NULL):
3399                    action = "SET NULL"
3400                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3401                    action = "SET DEFAULT"
3402                else:
3403                    self.raise_error("Invalid key constraint")
3404
3405                options.append(f"ON {on} {action}")
3406            elif self._match_text_seq("NOT", "ENFORCED"):
3407                options.append("NOT ENFORCED")
3408            elif self._match_text_seq("DEFERRABLE"):
3409                options.append("DEFERRABLE")
3410            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3411                options.append("INITIALLY DEFERRED")
3412            elif self._match_text_seq("NORELY"):
3413                options.append("NORELY")
3414            elif self._match_text_seq("MATCH", "FULL"):
3415                options.append("MATCH FULL")
3416            else:
3417                break
3418
3419        return options
3420
3421    def _parse_references(self, match=True) -> t.Optional[exp.Expression]:
3422        if match and not self._match(TokenType.REFERENCES):
3423            return None
3424
3425        expressions = None
3426        this = self._parse_id_var()
3427
3428        if self._match(TokenType.L_PAREN, advance=False):
3429            expressions = self._parse_wrapped_id_vars()
3430
3431        options = self._parse_key_constraint_options()
3432        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3433
3434    def _parse_foreign_key(self) -> exp.Expression:
3435        expressions = self._parse_wrapped_id_vars()
3436        reference = self._parse_references()
3437        options = {}
3438
3439        while self._match(TokenType.ON):
3440            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3441                self.raise_error("Expected DELETE or UPDATE")
3442
3443            kind = self._prev.text.lower()
3444
3445            if self._match(TokenType.NO_ACTION):
3446                action = "NO ACTION"
3447            elif self._match(TokenType.SET):
3448                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3449                action = "SET " + self._prev.text.upper()
3450            else:
3451                self._advance()
3452                action = self._prev.text.upper()
3453
3454            options[kind] = action
3455
3456        return self.expression(
3457            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3458        )
3459
3460    def _parse_primary_key(self) -> exp.Expression:
3461        desc = (
3462            self._match_set((TokenType.ASC, TokenType.DESC))
3463            and self._prev.token_type == TokenType.DESC
3464        )
3465
3466        if not self._match(TokenType.L_PAREN, advance=False):
3467            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3468
3469        expressions = self._parse_wrapped_csv(self._parse_field)
3470        options = self._parse_key_constraint_options()
3471        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3472
3473    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3474        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3475            return this
3476
3477        bracket_kind = self._prev.token_type
3478        expressions: t.List[t.Optional[exp.Expression]]
3479
3480        if self._match(TokenType.COLON):
3481            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3482        else:
3483            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3484
3485        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3486        if bracket_kind == TokenType.L_BRACE:
3487            this = self.expression(exp.Struct, expressions=expressions)
3488        elif not this or this.name.upper() == "ARRAY":
3489            this = self.expression(exp.Array, expressions=expressions)
3490        else:
3491            expressions = apply_index_offset(this, expressions, -self.index_offset)
3492            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3493
3494        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3495            self.raise_error("Expected ]")
3496        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3497            self.raise_error("Expected }")
3498
3499        self._add_comments(this)
3500        return self._parse_bracket(this)
3501
3502    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3503        if self._match(TokenType.COLON):
3504            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3505        return this
3506
3507    def _parse_case(self) -> t.Optional[exp.Expression]:
3508        ifs = []
3509        default = None
3510
3511        expression = self._parse_conjunction()
3512
3513        while self._match(TokenType.WHEN):
3514            this = self._parse_conjunction()
3515            self._match(TokenType.THEN)
3516            then = self._parse_conjunction()
3517            ifs.append(self.expression(exp.If, this=this, true=then))
3518
3519        if self._match(TokenType.ELSE):
3520            default = self._parse_conjunction()
3521
3522        if not self._match(TokenType.END):
3523            self.raise_error("Expected END after CASE", self._prev)
3524
3525        return self._parse_window(
3526            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3527        )
3528
3529    def _parse_if(self) -> t.Optional[exp.Expression]:
3530        if self._match(TokenType.L_PAREN):
3531            args = self._parse_csv(self._parse_conjunction)
3532            this = exp.If.from_arg_list(args)
3533            self.validate_expression(this, args)
3534            self._match_r_paren()
3535        else:
3536            index = self._index - 1
3537            condition = self._parse_conjunction()
3538
3539            if not condition:
3540                self._retreat(index)
3541                return None
3542
3543            self._match(TokenType.THEN)
3544            true = self._parse_conjunction()
3545            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3546            self._match(TokenType.END)
3547            this = self.expression(exp.If, this=condition, true=true, false=false)
3548
3549        return self._parse_window(this)
3550
3551    def _parse_extract(self) -> exp.Expression:
3552        this = self._parse_function() or self._parse_var() or self._parse_type()
3553
3554        if self._match(TokenType.FROM):
3555            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3556
3557        if not self._match(TokenType.COMMA):
3558            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3559
3560        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3561
3562    def _parse_cast(self, strict: bool) -> exp.Expression:
3563        this = self._parse_conjunction()
3564
3565        if not self._match(TokenType.ALIAS):
3566            if self._match(TokenType.COMMA):
3567                return self.expression(
3568                    exp.CastToStrType, this=this, expression=self._parse_string()
3569                )
3570            else:
3571                self.raise_error("Expected AS after CAST")
3572
3573        to = self._parse_types()
3574
3575        if not to:
3576            self.raise_error("Expected TYPE after CAST")
3577        elif to.this == exp.DataType.Type.CHAR:
3578            if self._match(TokenType.CHARACTER_SET):
3579                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3580
3581        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3582
3583    def _parse_string_agg(self) -> exp.Expression:
3584        expression: t.Optional[exp.Expression]
3585
3586        if self._match(TokenType.DISTINCT):
3587            args = self._parse_csv(self._parse_conjunction)
3588            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3589        else:
3590            args = self._parse_csv(self._parse_conjunction)
3591            expression = seq_get(args, 0)
3592
3593        index = self._index
3594        if not self._match(TokenType.R_PAREN):
3595            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3596            order = self._parse_order(this=expression)
3597            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3598
3599        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3600        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3601        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3602        if not self._match(TokenType.WITHIN_GROUP):
3603            self._retreat(index)
3604            this = exp.GroupConcat.from_arg_list(args)
3605            self.validate_expression(this, args)
3606            return this
3607
3608        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3609        order = self._parse_order(this=expression)
3610        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3611
3612    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3613        to: t.Optional[exp.Expression]
3614        this = self._parse_bitwise()
3615
3616        if self._match(TokenType.USING):
3617            to = self.expression(exp.CharacterSet, this=self._parse_var())
3618        elif self._match(TokenType.COMMA):
3619            to = self._parse_bitwise()
3620        else:
3621            to = None
3622
3623        # Swap the argument order if needed to produce the correct AST
3624        if self.CONVERT_TYPE_FIRST:
3625            this, to = to, this
3626
3627        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3628
3629    def _parse_decode(self) -> t.Optional[exp.Expression]:
3630        """
3631        There are generally two variants of the DECODE function:
3632
3633        - DECODE(bin, charset)
3634        - DECODE(expression, search, result [, search, result] ... [, default])
3635
3636        The second variant will always be parsed into a CASE expression. Note that NULL
3637        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3638        instead of relying on pattern matching.
3639        """
3640        args = self._parse_csv(self._parse_conjunction)
3641
3642        if len(args) < 3:
3643            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3644
3645        expression, *expressions = args
3646        if not expression:
3647            return None
3648
3649        ifs = []
3650        for search, result in zip(expressions[::2], expressions[1::2]):
3651            if not search or not result:
3652                return None
3653
3654            if isinstance(search, exp.Literal):
3655                ifs.append(
3656                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3657                )
3658            elif isinstance(search, exp.Null):
3659                ifs.append(
3660                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3661                )
3662            else:
3663                cond = exp.or_(
3664                    exp.EQ(this=expression.copy(), expression=search),
3665                    exp.and_(
3666                        exp.Is(this=expression.copy(), expression=exp.Null()),
3667                        exp.Is(this=search.copy(), expression=exp.Null()),
3668                        copy=False,
3669                    ),
3670                    copy=False,
3671                )
3672                ifs.append(exp.If(this=cond, true=result))
3673
3674        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3675
3676    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3677        self._match_text_seq("KEY")
3678        key = self._parse_field()
3679        self._match(TokenType.COLON)
3680        self._match_text_seq("VALUE")
3681        value = self._parse_field()
3682        if not key and not value:
3683            return None
3684        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3685
3686    def _parse_json_object(self) -> exp.Expression:
3687        expressions = self._parse_csv(self._parse_json_key_value)
3688
3689        null_handling = None
3690        if self._match_text_seq("NULL", "ON", "NULL"):
3691            null_handling = "NULL ON NULL"
3692        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3693            null_handling = "ABSENT ON NULL"
3694
3695        unique_keys = None
3696        if self._match_text_seq("WITH", "UNIQUE"):
3697            unique_keys = True
3698        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3699            unique_keys = False
3700
3701        self._match_text_seq("KEYS")
3702
3703        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3704        format_json = self._match_text_seq("FORMAT", "JSON")
3705        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3706
3707        return self.expression(
3708            exp.JSONObject,
3709            expressions=expressions,
3710            null_handling=null_handling,
3711            unique_keys=unique_keys,
3712            return_type=return_type,
3713            format_json=format_json,
3714            encoding=encoding,
3715        )
3716
3717    def _parse_logarithm(self) -> exp.Expression:
3718        # Default argument order is base, expression
3719        args = self._parse_csv(self._parse_range)
3720
3721        if len(args) > 1:
3722            if not self.LOG_BASE_FIRST:
3723                args.reverse()
3724            return exp.Log.from_arg_list(args)
3725
3726        return self.expression(
3727            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3728        )
3729
3730    def _parse_match_against(self) -> exp.Expression:
3731        expressions = self._parse_csv(self._parse_column)
3732
3733        self._match_text_seq(")", "AGAINST", "(")
3734
3735        this = self._parse_string()
3736
3737        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3738            modifier = "IN NATURAL LANGUAGE MODE"
3739            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3740                modifier = f"{modifier} WITH QUERY EXPANSION"
3741        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3742            modifier = "IN BOOLEAN MODE"
3743        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3744            modifier = "WITH QUERY EXPANSION"
3745        else:
3746            modifier = None
3747
3748        return self.expression(
3749            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3750        )
3751
3752    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3753    def _parse_open_json(self) -> exp.Expression:
3754        this = self._parse_bitwise()
3755        path = self._match(TokenType.COMMA) and self._parse_string()
3756
3757        def _parse_open_json_column_def() -> exp.Expression:
3758            this = self._parse_field(any_token=True)
3759            kind = self._parse_types()
3760            path = self._parse_string()
3761            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3762            return self.expression(
3763                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3764            )
3765
3766        expressions = None
3767        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3768            self._match_l_paren()
3769            expressions = self._parse_csv(_parse_open_json_column_def)
3770
3771        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3772
3773    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3774        args = self._parse_csv(self._parse_bitwise)
3775
3776        if self._match(TokenType.IN):
3777            return self.expression(
3778                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3779            )
3780
3781        if haystack_first:
3782            haystack = seq_get(args, 0)
3783            needle = seq_get(args, 1)
3784        else:
3785            needle = seq_get(args, 0)
3786            haystack = seq_get(args, 1)
3787
3788        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3789
3790        self.validate_expression(this, args)
3791
3792        return this
3793
3794    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3795        args = self._parse_csv(self._parse_table)
3796        return exp.JoinHint(this=func_name.upper(), expressions=args)
3797
3798    def _parse_substring(self) -> exp.Expression:
3799        # Postgres supports the form: substring(string [from int] [for int])
3800        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3801
3802        args = self._parse_csv(self._parse_bitwise)
3803
3804        if self._match(TokenType.FROM):
3805            args.append(self._parse_bitwise())
3806            if self._match(TokenType.FOR):
3807                args.append(self._parse_bitwise())
3808
3809        this = exp.Substring.from_arg_list(args)
3810        self.validate_expression(this, args)
3811
3812        return this
3813
3814    def _parse_trim(self) -> exp.Expression:
3815        # https://www.w3resource.com/sql/character-functions/trim.php
3816        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3817
3818        position = None
3819        collation = None
3820
3821        if self._match_set(self.TRIM_TYPES):
3822            position = self._prev.text.upper()
3823
3824        expression = self._parse_bitwise()
3825        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3826            this = self._parse_bitwise()
3827        else:
3828            this = expression
3829            expression = None
3830
3831        if self._match(TokenType.COLLATE):
3832            collation = self._parse_bitwise()
3833
3834        return self.expression(
3835            exp.Trim,
3836            this=this,
3837            position=position,
3838            expression=expression,
3839            collation=collation,
3840        )
3841
3842    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3843        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3844
3845    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3846        return self._parse_window(self._parse_id_var(), alias=True)
3847
3848    def _parse_respect_or_ignore_nulls(
3849        self, this: t.Optional[exp.Expression]
3850    ) -> t.Optional[exp.Expression]:
3851        if self._match(TokenType.IGNORE_NULLS):
3852            return self.expression(exp.IgnoreNulls, this=this)
3853        if self._match(TokenType.RESPECT_NULLS):
3854            return self.expression(exp.RespectNulls, this=this)
3855        return this
3856
3857    def _parse_window(
3858        self, this: t.Optional[exp.Expression], alias: bool = False
3859    ) -> t.Optional[exp.Expression]:
3860        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3861            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3862            self._match_r_paren()
3863
3864        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3865        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3866        if self._match(TokenType.WITHIN_GROUP):
3867            order = self._parse_wrapped(self._parse_order)
3868            this = self.expression(exp.WithinGroup, this=this, expression=order)
3869
3870        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3871        # Some dialects choose to implement and some do not.
3872        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3873
3874        # There is some code above in _parse_lambda that handles
3875        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3876
3877        # The below changes handle
3878        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3879
3880        # Oracle allows both formats
3881        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3882        #   and Snowflake chose to do the same for familiarity
3883        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3884        this = self._parse_respect_or_ignore_nulls(this)
3885
3886        # bigquery select from window x AS (partition by ...)
3887        if alias:
3888            over = None
3889            self._match(TokenType.ALIAS)
3890        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3891            return this
3892        else:
3893            over = self._prev.text.upper()
3894
3895        if not self._match(TokenType.L_PAREN):
3896            return self.expression(
3897                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3898            )
3899
3900        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3901
3902        first = self._match(TokenType.FIRST)
3903        if self._match_text_seq("LAST"):
3904            first = False
3905
3906        partition = self._parse_partition_by()
3907        order = self._parse_order()
3908        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3909
3910        if kind:
3911            self._match(TokenType.BETWEEN)
3912            start = self._parse_window_spec()
3913            self._match(TokenType.AND)
3914            end = self._parse_window_spec()
3915
3916            spec = self.expression(
3917                exp.WindowSpec,
3918                kind=kind,
3919                start=start["value"],
3920                start_side=start["side"],
3921                end=end["value"],
3922                end_side=end["side"],
3923            )
3924        else:
3925            spec = None
3926
3927        self._match_r_paren()
3928
3929        return self.expression(
3930            exp.Window,
3931            this=this,
3932            partition_by=partition,
3933            order=order,
3934            spec=spec,
3935            alias=window_alias,
3936            over=over,
3937            first=first,
3938        )
3939
3940    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3941        self._match(TokenType.BETWEEN)
3942
3943        return {
3944            "value": (
3945                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3946            )
3947            or self._parse_bitwise(),
3948            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3949        }
3950
3951    def _parse_alias(
3952        self, this: t.Optional[exp.Expression], explicit: bool = False
3953    ) -> t.Optional[exp.Expression]:
3954        any_token = self._match(TokenType.ALIAS)
3955
3956        if explicit and not any_token:
3957            return this
3958
3959        if self._match(TokenType.L_PAREN):
3960            aliases = self.expression(
3961                exp.Aliases,
3962                this=this,
3963                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3964            )
3965            self._match_r_paren(aliases)
3966            return aliases
3967
3968        alias = self._parse_id_var(any_token)
3969
3970        if alias:
3971            return self.expression(exp.Alias, this=this, alias=alias)
3972
3973        return this
3974
3975    def _parse_id_var(
3976        self,
3977        any_token: bool = True,
3978        tokens: t.Optional[t.Collection[TokenType]] = None,
3979        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3980    ) -> t.Optional[exp.Expression]:
3981        identifier = self._parse_identifier()
3982
3983        if identifier:
3984            return identifier
3985
3986        prefix = ""
3987
3988        if prefix_tokens:
3989            while self._match_set(prefix_tokens):
3990                prefix += self._prev.text
3991
3992        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3993            quoted = self._prev.token_type == TokenType.STRING
3994            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3995
3996        return None
3997
3998    def _parse_string(self) -> t.Optional[exp.Expression]:
3999        if self._match(TokenType.STRING):
4000            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
4001        return self._parse_placeholder()
4002
4003    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
4004        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
4005
4006    def _parse_number(self) -> t.Optional[exp.Expression]:
4007        if self._match(TokenType.NUMBER):
4008            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
4009        return self._parse_placeholder()
4010
4011    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4012        if self._match(TokenType.IDENTIFIER):
4013            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4014        return self._parse_placeholder()
4015
4016    def _parse_var(
4017        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4018    ) -> t.Optional[exp.Expression]:
4019        if (
4020            (any_token and self._advance_any())
4021            or self._match(TokenType.VAR)
4022            or (self._match_set(tokens) if tokens else False)
4023        ):
4024            return self.expression(exp.Var, this=self._prev.text)
4025        return self._parse_placeholder()
4026
4027    def _advance_any(self) -> t.Optional[Token]:
4028        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4029            self._advance()
4030            return self._prev
4031        return None
4032
4033    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4034        return self._parse_var() or self._parse_string()
4035
4036    def _parse_null(self) -> t.Optional[exp.Expression]:
4037        if self._match(TokenType.NULL):
4038            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4039        return None
4040
4041    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4042        if self._match(TokenType.TRUE):
4043            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4044        if self._match(TokenType.FALSE):
4045            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4046        return None
4047
4048    def _parse_star(self) -> t.Optional[exp.Expression]:
4049        if self._match(TokenType.STAR):
4050            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4051        return None
4052
4053    def _parse_parameter(self) -> exp.Expression:
4054        wrapped = self._match(TokenType.L_BRACE)
4055        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4056        self._match(TokenType.R_BRACE)
4057        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4058
4059    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4060        if self._match_set(self.PLACEHOLDER_PARSERS):
4061            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4062            if placeholder:
4063                return placeholder
4064            self._advance(-1)
4065        return None
4066
4067    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4068        if not self._match(TokenType.EXCEPT):
4069            return None
4070        if self._match(TokenType.L_PAREN, advance=False):
4071            return self._parse_wrapped_csv(self._parse_column)
4072        return self._parse_csv(self._parse_column)
4073
4074    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4075        if not self._match(TokenType.REPLACE):
4076            return None
4077        if self._match(TokenType.L_PAREN, advance=False):
4078            return self._parse_wrapped_csv(self._parse_expression)
4079        return self._parse_csv(self._parse_expression)
4080
4081    def _parse_csv(
4082        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4083    ) -> t.List[t.Optional[exp.Expression]]:
4084        parse_result = parse_method()
4085        items = [parse_result] if parse_result is not None else []
4086
4087        while self._match(sep):
4088            self._add_comments(parse_result)
4089            parse_result = parse_method()
4090            if parse_result is not None:
4091                items.append(parse_result)
4092
4093        return items
4094
4095    def _parse_tokens(
4096        self, parse_method: t.Callable, expressions: t.Dict
4097    ) -> t.Optional[exp.Expression]:
4098        this = parse_method()
4099
4100        while self._match_set(expressions):
4101            this = self.expression(
4102                expressions[self._prev.token_type],
4103                this=this,
4104                comments=self._prev_comments,
4105                expression=parse_method(),
4106            )
4107
4108        return this
4109
4110    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4111        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4112
4113    def _parse_wrapped_csv(
4114        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4115    ) -> t.List[t.Optional[exp.Expression]]:
4116        return self._parse_wrapped(
4117            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4118        )
4119
4120    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4121        wrapped = self._match(TokenType.L_PAREN)
4122        if not wrapped and not optional:
4123            self.raise_error("Expecting (")
4124        parse_result = parse_method()
4125        if wrapped:
4126            self._match_r_paren()
4127        return parse_result
4128
4129    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
4130        return self._parse_select() or self._parse_set_operations(self._parse_expression())
4131
4132    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4133        return self._parse_set_operations(
4134            self._parse_select(nested=True, parse_subquery_alias=False)
4135        )
4136
4137    def _parse_transaction(self) -> exp.Expression:
4138        this = None
4139        if self._match_texts(self.TRANSACTION_KIND):
4140            this = self._prev.text
4141
4142        self._match_texts({"TRANSACTION", "WORK"})
4143
4144        modes = []
4145        while True:
4146            mode = []
4147            while self._match(TokenType.VAR):
4148                mode.append(self._prev.text)
4149
4150            if mode:
4151                modes.append(" ".join(mode))
4152            if not self._match(TokenType.COMMA):
4153                break
4154
4155        return self.expression(exp.Transaction, this=this, modes=modes)
4156
4157    def _parse_commit_or_rollback(self) -> exp.Expression:
4158        chain = None
4159        savepoint = None
4160        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4161
4162        self._match_texts({"TRANSACTION", "WORK"})
4163
4164        if self._match_text_seq("TO"):
4165            self._match_text_seq("SAVEPOINT")
4166            savepoint = self._parse_id_var()
4167
4168        if self._match(TokenType.AND):
4169            chain = not self._match_text_seq("NO")
4170            self._match_text_seq("CHAIN")
4171
4172        if is_rollback:
4173            return self.expression(exp.Rollback, savepoint=savepoint)
4174        return self.expression(exp.Commit, chain=chain)
4175
4176    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4177        if not self._match_text_seq("ADD"):
4178            return None
4179
4180        self._match(TokenType.COLUMN)
4181        exists_column = self._parse_exists(not_=True)
4182        expression = self._parse_column_def(self._parse_field(any_token=True))
4183
4184        if expression:
4185            expression.set("exists", exists_column)
4186
4187            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4188            if self._match_texts(("FIRST", "AFTER")):
4189                position = self._prev.text
4190                column_position = self.expression(
4191                    exp.ColumnPosition, this=self._parse_column(), position=position
4192                )
4193                expression.set("position", column_position)
4194
4195        return expression
4196
4197    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4198        drop = self._match(TokenType.DROP) and self._parse_drop()
4199        if drop and not isinstance(drop, exp.Command):
4200            drop.set("kind", drop.args.get("kind", "COLUMN"))
4201        return drop
4202
4203    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4204    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4205        return self.expression(
4206            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4207        )
4208
4209    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4210        this = None
4211        kind = self._prev.token_type
4212
4213        if kind == TokenType.CONSTRAINT:
4214            this = self._parse_id_var()
4215
4216            if self._match_text_seq("CHECK"):
4217                expression = self._parse_wrapped(self._parse_conjunction)
4218                enforced = self._match_text_seq("ENFORCED")
4219
4220                return self.expression(
4221                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4222                )
4223
4224        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4225            expression = self._parse_foreign_key()
4226        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4227            expression = self._parse_primary_key()
4228        else:
4229            expression = None
4230
4231        return self.expression(exp.AddConstraint, this=this, expression=expression)
4232
4233    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4234        index = self._index - 1
4235
4236        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4237            return self._parse_csv(self._parse_add_constraint)
4238
4239        self._retreat(index)
4240        return self._parse_csv(self._parse_add_column)
4241
4242    def _parse_alter_table_alter(self) -> exp.Expression:
4243        self._match(TokenType.COLUMN)
4244        column = self._parse_field(any_token=True)
4245
4246        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4247            return self.expression(exp.AlterColumn, this=column, drop=True)
4248        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4249            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4250
4251        self._match_text_seq("SET", "DATA")
4252        return self.expression(
4253            exp.AlterColumn,
4254            this=column,
4255            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4256            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4257            using=self._match(TokenType.USING) and self._parse_conjunction(),
4258        )
4259
4260    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4261        index = self._index - 1
4262
4263        partition_exists = self._parse_exists()
4264        if self._match(TokenType.PARTITION, advance=False):
4265            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4266
4267        self._retreat(index)
4268        return self._parse_csv(self._parse_drop_column)
4269
4270    def _parse_alter_table_rename(self) -> exp.Expression:
4271        self._match_text_seq("TO")
4272        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4273
4274    def _parse_alter(self) -> t.Optional[exp.Expression]:
4275        start = self._prev
4276
4277        if not self._match(TokenType.TABLE):
4278            return self._parse_as_command(start)
4279
4280        exists = self._parse_exists()
4281        this = self._parse_table(schema=True)
4282
4283        if self._next:
4284            self._advance()
4285        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4286
4287        if parser:
4288            actions = ensure_list(parser(self))
4289
4290            if not self._curr:
4291                return self.expression(
4292                    exp.AlterTable,
4293                    this=this,
4294                    exists=exists,
4295                    actions=actions,
4296                )
4297        return self._parse_as_command(start)
4298
4299    def _parse_merge(self) -> exp.Expression:
4300        self._match(TokenType.INTO)
4301        target = self._parse_table()
4302
4303        self._match(TokenType.USING)
4304        using = self._parse_table()
4305
4306        self._match(TokenType.ON)
4307        on = self._parse_conjunction()
4308
4309        whens = []
4310        while self._match(TokenType.WHEN):
4311            matched = not self._match(TokenType.NOT)
4312            self._match_text_seq("MATCHED")
4313            source = (
4314                False
4315                if self._match_text_seq("BY", "TARGET")
4316                else self._match_text_seq("BY", "SOURCE")
4317            )
4318            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4319
4320            self._match(TokenType.THEN)
4321
4322            if self._match(TokenType.INSERT):
4323                _this = self._parse_star()
4324                if _this:
4325                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4326                else:
4327                    then = self.expression(
4328                        exp.Insert,
4329                        this=self._parse_value(),
4330                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4331                    )
4332            elif self._match(TokenType.UPDATE):
4333                expressions = self._parse_star()
4334                if expressions:
4335                    then = self.expression(exp.Update, expressions=expressions)
4336                else:
4337                    then = self.expression(
4338                        exp.Update,
4339                        expressions=self._match(TokenType.SET)
4340                        and self._parse_csv(self._parse_equality),
4341                    )
4342            elif self._match(TokenType.DELETE):
4343                then = self.expression(exp.Var, this=self._prev.text)
4344            else:
4345                then = None
4346
4347            whens.append(
4348                self.expression(
4349                    exp.When,
4350                    matched=matched,
4351                    source=source,
4352                    condition=condition,
4353                    then=then,
4354                )
4355            )
4356
4357        return self.expression(
4358            exp.Merge,
4359            this=target,
4360            using=using,
4361            on=on,
4362            expressions=whens,
4363        )
4364
4365    def _parse_show(self) -> t.Optional[exp.Expression]:
4366        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4367        if parser:
4368            return parser(self)
4369        self._advance()
4370        return self.expression(exp.Show, this=self._prev.text.upper())
4371
4372    def _parse_set_item_assignment(
4373        self, kind: t.Optional[str] = None
4374    ) -> t.Optional[exp.Expression]:
4375        index = self._index
4376
4377        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4378            return self._parse_set_transaction(global_=kind == "GLOBAL")
4379
4380        left = self._parse_primary() or self._parse_id_var()
4381
4382        if not self._match_texts(("=", "TO")):
4383            self._retreat(index)
4384            return None
4385
4386        right = self._parse_statement() or self._parse_id_var()
4387        this = self.expression(
4388            exp.EQ,
4389            this=left,
4390            expression=right,
4391        )
4392
4393        return self.expression(
4394            exp.SetItem,
4395            this=this,
4396            kind=kind,
4397        )
4398
4399    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4400        self._match_text_seq("TRANSACTION")
4401        characteristics = self._parse_csv(
4402            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4403        )
4404        return self.expression(
4405            exp.SetItem,
4406            expressions=characteristics,
4407            kind="TRANSACTION",
4408            **{"global": global_},  # type: ignore
4409        )
4410
4411    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4412        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4413        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4414
4415    def _parse_set(self) -> exp.Expression:
4416        index = self._index
4417        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4418
4419        if self._curr:
4420            self._retreat(index)
4421            return self._parse_as_command(self._prev)
4422
4423        return set_
4424
4425    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4426        for option in options:
4427            if self._match_text_seq(*option.split(" ")):
4428                return exp.Var(this=option)
4429        return None
4430
4431    def _parse_as_command(self, start: Token) -> exp.Command:
4432        while self._curr:
4433            self._advance()
4434        text = self._find_sql(start, self._prev)
4435        size = len(start.text)
4436        return exp.Command(this=text[:size], expression=text[size:])
4437
4438    def _find_parser(
4439        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4440    ) -> t.Optional[t.Callable]:
4441        if not self._curr:
4442            return None
4443
4444        index = self._index
4445        this = []
4446        while True:
4447            # The current token might be multiple words
4448            curr = self._curr.text.upper()
4449            key = curr.split(" ")
4450            this.append(curr)
4451            self._advance()
4452            result, trie = in_trie(trie, key)
4453            if result == 0:
4454                break
4455            if result == 2:
4456                subparser = parsers[" ".join(this)]
4457                return subparser
4458        self._retreat(index)
4459        return None
4460
4461    def _match(self, token_type, advance=True, expression=None):
4462        if not self._curr:
4463            return None
4464
4465        if self._curr.token_type == token_type:
4466            if advance:
4467                self._advance()
4468            self._add_comments(expression)
4469            return True
4470
4471        return None
4472
4473    def _match_set(self, types, advance=True):
4474        if not self._curr:
4475            return None
4476
4477        if self._curr.token_type in types:
4478            if advance:
4479                self._advance()
4480            return True
4481
4482        return None
4483
4484    def _match_pair(self, token_type_a, token_type_b, advance=True):
4485        if not self._curr or not self._next:
4486            return None
4487
4488        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4489            if advance:
4490                self._advance(2)
4491            return True
4492
4493        return None
4494
4495    def _match_l_paren(self, expression=None):
4496        if not self._match(TokenType.L_PAREN, expression=expression):
4497            self.raise_error("Expecting (")
4498
4499    def _match_r_paren(self, expression=None):
4500        if not self._match(TokenType.R_PAREN, expression=expression):
4501            self.raise_error("Expecting )")
4502
4503    def _match_texts(self, texts, advance=True):
4504        if self._curr and self._curr.text.upper() in texts:
4505            if advance:
4506                self._advance()
4507            return True
4508        return False
4509
4510    def _match_text_seq(self, *texts, advance=True):
4511        index = self._index
4512        for text in texts:
4513            if self._curr and self._curr.text.upper() == text:
4514                self._advance()
4515            else:
4516                self._retreat(index)
4517                return False
4518
4519        if not advance:
4520            self._retreat(index)
4521
4522        return True
4523
4524    def _replace_columns_with_dots(self, this):
4525        if isinstance(this, exp.Dot):
4526            exp.replace_children(this, self._replace_columns_with_dots)
4527        elif isinstance(this, exp.Column):
4528            exp.replace_children(this, self._replace_columns_with_dots)
4529            table = this.args.get("table")
4530            this = (
4531                self.expression(exp.Dot, this=table, expression=this.this)
4532                if table
4533                else self.expression(exp.Var, this=this.name)
4534            )
4535        elif isinstance(this, exp.Identifier):
4536            this = self.expression(exp.Var, this=this.name)
4537        return this
4538
4539    def _replace_lambda(self, node, lambda_variables):
4540        for column in node.find_all(exp.Column):
4541            if column.parts[0].name in lambda_variables:
4542                dot_or_id = column.to_dot() if column.table else column.this
4543                parent = column.parent
4544
4545                while isinstance(parent, exp.Dot):
4546                    if not isinstance(parent.parent, exp.Dot):
4547                        parent.replace(dot_or_id)
4548                        break
4549                    parent = parent.parent
4550                else:
4551                    if column is node:
4552                        node = dot_or_id
4553                    else:
4554                        column.replace(dot_or_id)
4555        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
811    def __init__(
812        self,
813        error_level: t.Optional[ErrorLevel] = None,
814        error_message_context: int = 100,
815        index_offset: int = 0,
816        unnest_column_only: bool = False,
817        alias_post_tablesample: bool = False,
818        max_errors: int = 3,
819        null_ordering: t.Optional[str] = None,
820    ):
821        self.error_level = error_level or ErrorLevel.IMMEDIATE
822        self.error_message_context = error_message_context
823        self.index_offset = index_offset
824        self.unnest_column_only = unnest_column_only
825        self.alias_post_tablesample = alias_post_tablesample
826        self.max_errors = max_errors
827        self.null_ordering = null_ordering
828        self.reset()
def reset(self):
830    def reset(self):
831        self.sql = ""
832        self.errors = []
833        self._tokens = []
834        self._index = 0
835        self._curr = None
836        self._next = None
837        self._prev = None
838        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
840    def parse(
841        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
842    ) -> t.List[t.Optional[exp.Expression]]:
843        """
844        Parses a list of tokens and returns a list of syntax trees, one tree
845        per parsed SQL statement.
846
847        Args:
848            raw_tokens: the list of tokens.
849            sql: the original SQL string, used to produce helpful debug messages.
850
851        Returns:
852            The list of syntax trees.
853        """
854        return self._parse(
855            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
856        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
858    def parse_into(
859        self,
860        expression_types: exp.IntoType,
861        raw_tokens: t.List[Token],
862        sql: t.Optional[str] = None,
863    ) -> t.List[t.Optional[exp.Expression]]:
864        """
865        Parses a list of tokens into a given Expression type. If a collection of Expression
866        types is given instead, this method will try to parse the token list into each one
867        of them, stopping at the first for which the parsing succeeds.
868
869        Args:
870            expression_types: the expression type(s) to try and parse the token list into.
871            raw_tokens: the list of tokens.
872            sql: the original SQL string, used to produce helpful debug messages.
873
874        Returns:
875            The target Expression.
876        """
877        errors = []
878        for expression_type in ensure_collection(expression_types):
879            parser = self.EXPRESSION_PARSERS.get(expression_type)
880            if not parser:
881                raise TypeError(f"No parser registered for {expression_type}")
882            try:
883                return self._parse(parser, raw_tokens, sql)
884            except ParseError as e:
885                e.errors[0]["into_expression"] = expression_type
886                errors.append(e)
887        raise ParseError(
888            f"Failed to parse into {expression_types}",
889            errors=merge_errors(errors),
890        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
926    def check_errors(self) -> None:
927        """
928        Logs or raises any found errors, depending on the chosen error level setting.
929        """
930        if self.error_level == ErrorLevel.WARN:
931            for error in self.errors:
932                logger.error(str(error))
933        elif self.error_level == ErrorLevel.RAISE and self.errors:
934            raise ParseError(
935                concat_messages(self.errors, self.max_errors),
936                errors=merge_errors(self.errors),
937            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
939    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
940        """
941        Appends an error in the list of recorded errors or raises it, depending on the chosen
942        error level setting.
943        """
944        token = token or self._curr or self._prev or Token.string("")
945        start = token.start
946        end = token.end + 1
947        start_context = self.sql[max(start - self.error_message_context, 0) : start]
948        highlight = self.sql[start:end]
949        end_context = self.sql[end : end + self.error_message_context]
950
951        error = ParseError.new(
952            f"{message}. Line {token.line}, Col: {token.col}.\n"
953            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
954            description=message,
955            line=token.line,
956            col=token.col,
957            start_context=start_context,
958            highlight=highlight,
959            end_context=end_context,
960        )
961
962        if self.error_level == ErrorLevel.IMMEDIATE:
963            raise error
964
965        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[~E], comments: Optional[List[str]] = None, **kwargs) -> ~E:
967    def expression(
968        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
969    ) -> E:
970        """
971        Creates a new, validated Expression.
972
973        Args:
974            exp_class: the expression class to instantiate.
975            comments: an optional list of comments to attach to the expression.
976            kwargs: the arguments to set for the expression along with their respective values.
977
978        Returns:
979            The target expression.
980        """
981        instance = exp_class(**kwargs)
982        instance.add_comments(comments) if comments else self._add_comments(instance)
983        self.validate_expression(instance)
984        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
 991    def validate_expression(
 992        self, expression: exp.Expression, args: t.Optional[t.List] = None
 993    ) -> None:
 994        """
 995        Validates an already instantiated expression, making sure that all its mandatory arguments
 996        are set.
 997
 998        Args:
 999            expression: the expression to validate.
1000            args: an optional list of items that was used to instantiate the expression, if it's a Func.
1001        """
1002        if self.error_level == ErrorLevel.IGNORE:
1003            return
1004
1005        for error_message in expression.error_messages(args):
1006            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.