Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get
  10from sqlglot.tokens import Token, Tokenizer, TokenType
  11from sqlglot.trie import in_trie, new_trie
  12
  13logger = logging.getLogger("sqlglot")
  14
  15E = t.TypeVar("E", bound=exp.Expression)
  16
  17
  18def parse_var_map(args: t.Sequence) -> exp.Expression:
  19    if len(args) == 1 and args[0].is_star:
  20        return exp.StarMap(this=args[0])
  21
  22    keys = []
  23    values = []
  24    for i in range(0, len(args), 2):
  25        keys.append(args[i])
  26        values.append(args[i + 1])
  27    return exp.VarMap(
  28        keys=exp.Array(expressions=keys),
  29        values=exp.Array(expressions=values),
  30    )
  31
  32
  33def parse_like(args):
  34    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  35    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  36
  37
  38def binary_range_parser(
  39    expr_type: t.Type[exp.Expression],
  40) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  41    return lambda self, this: self._parse_escape(
  42        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  43    )
  44
  45
  46class _Parser(type):
  47    def __new__(cls, clsname, bases, attrs):
  48        klass = super().__new__(cls, clsname, bases, attrs)
  49        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  50        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  51
  52        return klass
  53
  54
  55class Parser(metaclass=_Parser):
  56    """
  57    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  58    a parsed syntax tree.
  59
  60    Args:
  61        error_level: the desired error level.
  62            Default: ErrorLevel.RAISE
  63        error_message_context: determines the amount of context to capture from a
  64            query string when displaying the error message (in number of characters).
  65            Default: 50.
  66        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  67            Default: 0
  68        alias_post_tablesample: If the table alias comes after tablesample.
  69            Default: False
  70        max_errors: Maximum number of error messages to include in a raised ParseError.
  71            This is only relevant if error_level is ErrorLevel.RAISE.
  72            Default: 3
  73        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  74            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  75            Default: "nulls_are_small"
  76    """
  77
  78    FUNCTIONS: t.Dict[str, t.Callable] = {
  79        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  80        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  81            this=seq_get(args, 0),
  82            to=exp.DataType(this=exp.DataType.Type.TEXT),
  83        ),
  84        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  85        "IFNULL": exp.Coalesce.from_arg_list,
  86        "LIKE": parse_like,
  87        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  88            this=seq_get(args, 0),
  89            to=exp.DataType(this=exp.DataType.Type.TEXT),
  90        ),
  91        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  92            this=exp.Cast(
  93                this=seq_get(args, 0),
  94                to=exp.DataType(this=exp.DataType.Type.TEXT),
  95            ),
  96            start=exp.Literal.number(1),
  97            length=exp.Literal.number(10),
  98        ),
  99        "VAR_MAP": parse_var_map,
 100    }
 101
 102    NO_PAREN_FUNCTIONS = {
 103        TokenType.CURRENT_DATE: exp.CurrentDate,
 104        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 105        TokenType.CURRENT_TIME: exp.CurrentTime,
 106        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 107        TokenType.CURRENT_USER: exp.CurrentUser,
 108    }
 109
 110    JOIN_HINTS: t.Set[str] = set()
 111
 112    NESTED_TYPE_TOKENS = {
 113        TokenType.ARRAY,
 114        TokenType.MAP,
 115        TokenType.NULLABLE,
 116        TokenType.STRUCT,
 117    }
 118
 119    TYPE_TOKENS = {
 120        TokenType.BIT,
 121        TokenType.BOOLEAN,
 122        TokenType.TINYINT,
 123        TokenType.UTINYINT,
 124        TokenType.SMALLINT,
 125        TokenType.USMALLINT,
 126        TokenType.INT,
 127        TokenType.UINT,
 128        TokenType.BIGINT,
 129        TokenType.UBIGINT,
 130        TokenType.INT128,
 131        TokenType.UINT128,
 132        TokenType.INT256,
 133        TokenType.UINT256,
 134        TokenType.FLOAT,
 135        TokenType.DOUBLE,
 136        TokenType.CHAR,
 137        TokenType.NCHAR,
 138        TokenType.VARCHAR,
 139        TokenType.NVARCHAR,
 140        TokenType.TEXT,
 141        TokenType.MEDIUMTEXT,
 142        TokenType.LONGTEXT,
 143        TokenType.MEDIUMBLOB,
 144        TokenType.LONGBLOB,
 145        TokenType.BINARY,
 146        TokenType.VARBINARY,
 147        TokenType.JSON,
 148        TokenType.JSONB,
 149        TokenType.INTERVAL,
 150        TokenType.TIME,
 151        TokenType.TIMESTAMP,
 152        TokenType.TIMESTAMPTZ,
 153        TokenType.TIMESTAMPLTZ,
 154        TokenType.DATETIME,
 155        TokenType.DATETIME64,
 156        TokenType.DATE,
 157        TokenType.DECIMAL,
 158        TokenType.BIGDECIMAL,
 159        TokenType.UUID,
 160        TokenType.GEOGRAPHY,
 161        TokenType.GEOMETRY,
 162        TokenType.HLLSKETCH,
 163        TokenType.HSTORE,
 164        TokenType.PSEUDO_TYPE,
 165        TokenType.SUPER,
 166        TokenType.SERIAL,
 167        TokenType.SMALLSERIAL,
 168        TokenType.BIGSERIAL,
 169        TokenType.XML,
 170        TokenType.UNIQUEIDENTIFIER,
 171        TokenType.MONEY,
 172        TokenType.SMALLMONEY,
 173        TokenType.ROWVERSION,
 174        TokenType.IMAGE,
 175        TokenType.VARIANT,
 176        TokenType.OBJECT,
 177        TokenType.INET,
 178        *NESTED_TYPE_TOKENS,
 179    }
 180
 181    SUBQUERY_PREDICATES = {
 182        TokenType.ANY: exp.Any,
 183        TokenType.ALL: exp.All,
 184        TokenType.EXISTS: exp.Exists,
 185        TokenType.SOME: exp.Any,
 186    }
 187
 188    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 189
 190    DB_CREATABLES = {
 191        TokenType.DATABASE,
 192        TokenType.SCHEMA,
 193        TokenType.TABLE,
 194        TokenType.VIEW,
 195    }
 196
 197    CREATABLES = {
 198        TokenType.COLUMN,
 199        TokenType.FUNCTION,
 200        TokenType.INDEX,
 201        TokenType.PROCEDURE,
 202        *DB_CREATABLES,
 203    }
 204
 205    ID_VAR_TOKENS = {
 206        TokenType.VAR,
 207        TokenType.ANTI,
 208        TokenType.APPLY,
 209        TokenType.AUTO_INCREMENT,
 210        TokenType.BEGIN,
 211        TokenType.BOTH,
 212        TokenType.BUCKET,
 213        TokenType.CACHE,
 214        TokenType.CASCADE,
 215        TokenType.COLLATE,
 216        TokenType.COMMAND,
 217        TokenType.COMMENT,
 218        TokenType.COMMIT,
 219        TokenType.COMPOUND,
 220        TokenType.CONSTRAINT,
 221        TokenType.DEFAULT,
 222        TokenType.DELETE,
 223        TokenType.DESCRIBE,
 224        TokenType.DIV,
 225        TokenType.END,
 226        TokenType.EXECUTE,
 227        TokenType.ESCAPE,
 228        TokenType.FALSE,
 229        TokenType.FIRST,
 230        TokenType.FILTER,
 231        TokenType.FOLLOWING,
 232        TokenType.FORMAT,
 233        TokenType.FULL,
 234        TokenType.IF,
 235        TokenType.IS,
 236        TokenType.ISNULL,
 237        TokenType.INTERVAL,
 238        TokenType.KEEP,
 239        TokenType.LAZY,
 240        TokenType.LEADING,
 241        TokenType.LEFT,
 242        TokenType.LOCAL,
 243        TokenType.MATERIALIZED,
 244        TokenType.MERGE,
 245        TokenType.NATURAL,
 246        TokenType.NEXT,
 247        TokenType.OFFSET,
 248        TokenType.ONLY,
 249        TokenType.OPTIONS,
 250        TokenType.ORDINALITY,
 251        TokenType.OVERWRITE,
 252        TokenType.PARTITION,
 253        TokenType.PERCENT,
 254        TokenType.PIVOT,
 255        TokenType.PRAGMA,
 256        TokenType.PRECEDING,
 257        TokenType.RANGE,
 258        TokenType.REFERENCES,
 259        TokenType.RIGHT,
 260        TokenType.ROW,
 261        TokenType.ROWS,
 262        TokenType.SEED,
 263        TokenType.SEMI,
 264        TokenType.SET,
 265        TokenType.SETTINGS,
 266        TokenType.SHOW,
 267        TokenType.SORTKEY,
 268        TokenType.TEMPORARY,
 269        TokenType.TOP,
 270        TokenType.TRAILING,
 271        TokenType.TRUE,
 272        TokenType.UNBOUNDED,
 273        TokenType.UNIQUE,
 274        TokenType.UNLOGGED,
 275        TokenType.UNPIVOT,
 276        TokenType.VOLATILE,
 277        TokenType.WINDOW,
 278        *CREATABLES,
 279        *SUBQUERY_PREDICATES,
 280        *TYPE_TOKENS,
 281        *NO_PAREN_FUNCTIONS,
 282    }
 283
 284    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 285
 286    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 287        TokenType.APPLY,
 288        TokenType.FULL,
 289        TokenType.LEFT,
 290        TokenType.LOCK,
 291        TokenType.NATURAL,
 292        TokenType.OFFSET,
 293        TokenType.RIGHT,
 294        TokenType.WINDOW,
 295    }
 296
 297    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 298
 299    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 300
 301    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 302
 303    FUNC_TOKENS = {
 304        TokenType.COMMAND,
 305        TokenType.CURRENT_DATE,
 306        TokenType.CURRENT_DATETIME,
 307        TokenType.CURRENT_TIMESTAMP,
 308        TokenType.CURRENT_TIME,
 309        TokenType.CURRENT_USER,
 310        TokenType.FILTER,
 311        TokenType.FIRST,
 312        TokenType.FORMAT,
 313        TokenType.GLOB,
 314        TokenType.IDENTIFIER,
 315        TokenType.INDEX,
 316        TokenType.ISNULL,
 317        TokenType.ILIKE,
 318        TokenType.LIKE,
 319        TokenType.MERGE,
 320        TokenType.OFFSET,
 321        TokenType.PRIMARY_KEY,
 322        TokenType.RANGE,
 323        TokenType.REPLACE,
 324        TokenType.ROW,
 325        TokenType.UNNEST,
 326        TokenType.VAR,
 327        TokenType.LEFT,
 328        TokenType.RIGHT,
 329        TokenType.DATE,
 330        TokenType.DATETIME,
 331        TokenType.TABLE,
 332        TokenType.TIMESTAMP,
 333        TokenType.TIMESTAMPTZ,
 334        TokenType.WINDOW,
 335        *TYPE_TOKENS,
 336        *SUBQUERY_PREDICATES,
 337    }
 338
 339    CONJUNCTION = {
 340        TokenType.AND: exp.And,
 341        TokenType.OR: exp.Or,
 342    }
 343
 344    EQUALITY = {
 345        TokenType.EQ: exp.EQ,
 346        TokenType.NEQ: exp.NEQ,
 347        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 348    }
 349
 350    COMPARISON = {
 351        TokenType.GT: exp.GT,
 352        TokenType.GTE: exp.GTE,
 353        TokenType.LT: exp.LT,
 354        TokenType.LTE: exp.LTE,
 355    }
 356
 357    BITWISE = {
 358        TokenType.AMP: exp.BitwiseAnd,
 359        TokenType.CARET: exp.BitwiseXor,
 360        TokenType.PIPE: exp.BitwiseOr,
 361        TokenType.DPIPE: exp.DPipe,
 362    }
 363
 364    TERM = {
 365        TokenType.DASH: exp.Sub,
 366        TokenType.PLUS: exp.Add,
 367        TokenType.MOD: exp.Mod,
 368        TokenType.COLLATE: exp.Collate,
 369    }
 370
 371    FACTOR = {
 372        TokenType.DIV: exp.IntDiv,
 373        TokenType.LR_ARROW: exp.Distance,
 374        TokenType.SLASH: exp.Div,
 375        TokenType.STAR: exp.Mul,
 376    }
 377
 378    TIMESTAMPS = {
 379        TokenType.TIME,
 380        TokenType.TIMESTAMP,
 381        TokenType.TIMESTAMPTZ,
 382        TokenType.TIMESTAMPLTZ,
 383    }
 384
 385    SET_OPERATIONS = {
 386        TokenType.UNION,
 387        TokenType.INTERSECT,
 388        TokenType.EXCEPT,
 389    }
 390
 391    JOIN_SIDES = {
 392        TokenType.LEFT,
 393        TokenType.RIGHT,
 394        TokenType.FULL,
 395    }
 396
 397    JOIN_KINDS = {
 398        TokenType.INNER,
 399        TokenType.OUTER,
 400        TokenType.CROSS,
 401        TokenType.SEMI,
 402        TokenType.ANTI,
 403    }
 404
 405    LAMBDAS = {
 406        TokenType.ARROW: lambda self, expressions: self.expression(
 407            exp.Lambda,
 408            this=self._replace_lambda(
 409                self._parse_conjunction(),
 410                {node.name for node in expressions},
 411            ),
 412            expressions=expressions,
 413        ),
 414        TokenType.FARROW: lambda self, expressions: self.expression(
 415            exp.Kwarg,
 416            this=exp.Var(this=expressions[0].name),
 417            expression=self._parse_conjunction(),
 418        ),
 419    }
 420
 421    COLUMN_OPERATORS = {
 422        TokenType.DOT: None,
 423        TokenType.DCOLON: lambda self, this, to: self.expression(
 424            exp.Cast if self.STRICT_CAST else exp.TryCast,
 425            this=this,
 426            to=to,
 427        ),
 428        TokenType.ARROW: lambda self, this, path: self.expression(
 429            exp.JSONExtract,
 430            this=this,
 431            expression=path,
 432        ),
 433        TokenType.DARROW: lambda self, this, path: self.expression(
 434            exp.JSONExtractScalar,
 435            this=this,
 436            expression=path,
 437        ),
 438        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 439            exp.JSONBExtract,
 440            this=this,
 441            expression=path,
 442        ),
 443        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 444            exp.JSONBExtractScalar,
 445            this=this,
 446            expression=path,
 447        ),
 448        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 449            exp.JSONBContains,
 450            this=this,
 451            expression=key,
 452        ),
 453    }
 454
 455    EXPRESSION_PARSERS = {
 456        exp.Column: lambda self: self._parse_column(),
 457        exp.DataType: lambda self: self._parse_types(),
 458        exp.From: lambda self: self._parse_from(),
 459        exp.Group: lambda self: self._parse_group(),
 460        exp.Identifier: lambda self: self._parse_id_var(),
 461        exp.Lateral: lambda self: self._parse_lateral(),
 462        exp.Join: lambda self: self._parse_join(),
 463        exp.Order: lambda self: self._parse_order(),
 464        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 465        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 466        exp.Lambda: lambda self: self._parse_lambda(),
 467        exp.Limit: lambda self: self._parse_limit(),
 468        exp.Offset: lambda self: self._parse_offset(),
 469        exp.TableAlias: lambda self: self._parse_table_alias(),
 470        exp.Table: lambda self: self._parse_table(),
 471        exp.Condition: lambda self: self._parse_conjunction(),
 472        exp.Expression: lambda self: self._parse_statement(),
 473        exp.Properties: lambda self: self._parse_properties(),
 474        exp.Where: lambda self: self._parse_where(),
 475        exp.Ordered: lambda self: self._parse_ordered(),
 476        exp.Having: lambda self: self._parse_having(),
 477        exp.With: lambda self: self._parse_with(),
 478        exp.Window: lambda self: self._parse_named_window(),
 479        exp.Qualify: lambda self: self._parse_qualify(),
 480        exp.Returning: lambda self: self._parse_returning(),
 481        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 482    }
 483
 484    STATEMENT_PARSERS = {
 485        TokenType.ALTER: lambda self: self._parse_alter(),
 486        TokenType.BEGIN: lambda self: self._parse_transaction(),
 487        TokenType.CACHE: lambda self: self._parse_cache(),
 488        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 489        TokenType.COMMENT: lambda self: self._parse_comment(),
 490        TokenType.CREATE: lambda self: self._parse_create(),
 491        TokenType.DELETE: lambda self: self._parse_delete(),
 492        TokenType.DESC: lambda self: self._parse_describe(),
 493        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 494        TokenType.DROP: lambda self: self._parse_drop(),
 495        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 496        TokenType.INSERT: lambda self: self._parse_insert(),
 497        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 498        TokenType.MERGE: lambda self: self._parse_merge(),
 499        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 500        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 501        TokenType.SET: lambda self: self._parse_set(),
 502        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 503        TokenType.UPDATE: lambda self: self._parse_update(),
 504        TokenType.USE: lambda self: self.expression(
 505            exp.Use,
 506            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 507            and exp.Var(this=self._prev.text),
 508            this=self._parse_table(schema=False),
 509        ),
 510    }
 511
 512    UNARY_PARSERS = {
 513        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 514        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 515        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 516        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 517    }
 518
 519    PRIMARY_PARSERS = {
 520        TokenType.STRING: lambda self, token: self.expression(
 521            exp.Literal, this=token.text, is_string=True
 522        ),
 523        TokenType.NUMBER: lambda self, token: self.expression(
 524            exp.Literal, this=token.text, is_string=False
 525        ),
 526        TokenType.STAR: lambda self, _: self.expression(
 527            exp.Star,
 528            **{"except": self._parse_except(), "replace": self._parse_replace()},
 529        ),
 530        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 531        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 532        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 533        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 534        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 535        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 536        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 537        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 538        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 539    }
 540
 541    PLACEHOLDER_PARSERS = {
 542        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 543        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 544        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 545        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 546        else None,
 547    }
 548
 549    RANGE_PARSERS = {
 550        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 551        TokenType.GLOB: binary_range_parser(exp.Glob),
 552        TokenType.ILIKE: binary_range_parser(exp.ILike),
 553        TokenType.IN: lambda self, this: self._parse_in(this),
 554        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 555        TokenType.IS: lambda self, this: self._parse_is(this),
 556        TokenType.LIKE: binary_range_parser(exp.Like),
 557        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 558        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 559        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 560    }
 561
 562    PROPERTY_PARSERS = {
 563        "AFTER": lambda self: self._parse_afterjournal(
 564            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 565        ),
 566        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 567        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 568        "BEFORE": lambda self: self._parse_journal(
 569            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 570        ),
 571        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 572        "CHARACTER SET": lambda self: self._parse_character_set(),
 573        "CHECKSUM": lambda self: self._parse_checksum(),
 574        "CLUSTER BY": lambda self: self.expression(
 575            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 576        ),
 577        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 578        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 579        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 580            default=self._prev.text.upper() == "DEFAULT"
 581        ),
 582        "DEFINER": lambda self: self._parse_definer(),
 583        "DETERMINISTIC": lambda self: self.expression(
 584            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 585        ),
 586        "DISTKEY": lambda self: self._parse_distkey(),
 587        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 588        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 589        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 590        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 591        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 592        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 593        "FREESPACE": lambda self: self._parse_freespace(),
 594        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 595        "IMMUTABLE": lambda self: self.expression(
 596            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 597        ),
 598        "JOURNAL": lambda self: self._parse_journal(
 599            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 600        ),
 601        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 602        "LIKE": lambda self: self._parse_create_like(),
 603        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 604        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 605        "LOCK": lambda self: self._parse_locking(),
 606        "LOCKING": lambda self: self._parse_locking(),
 607        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 608        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 609        "MAX": lambda self: self._parse_datablocksize(),
 610        "MAXIMUM": lambda self: self._parse_datablocksize(),
 611        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 612            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 613        ),
 614        "MIN": lambda self: self._parse_datablocksize(),
 615        "MINIMUM": lambda self: self._parse_datablocksize(),
 616        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 617        "NO": lambda self: self._parse_noprimaryindex(),
 618        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 619        "ON": lambda self: self._parse_oncommit(),
 620        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 621        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 622        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 623        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 624        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 625        "RETURNS": lambda self: self._parse_returns(),
 626        "ROW": lambda self: self._parse_row(),
 627        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 628        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 629        "SETTINGS": lambda self: self.expression(
 630            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 631        ),
 632        "SORTKEY": lambda self: self._parse_sortkey(),
 633        "STABLE": lambda self: self.expression(
 634            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 635        ),
 636        "STORED": lambda self: self._parse_stored(),
 637        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 638        "TEMP": lambda self: self._parse_temporary(global_=False),
 639        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 640        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 641        "TTL": lambda self: self._parse_ttl(),
 642        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 643        "VOLATILE": lambda self: self._parse_volatile_property(),
 644        "WITH": lambda self: self._parse_with_property(),
 645    }
 646
 647    CONSTRAINT_PARSERS = {
 648        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 649        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 650        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 651        "CHARACTER SET": lambda self: self.expression(
 652            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 653        ),
 654        "CHECK": lambda self: self.expression(
 655            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 656        ),
 657        "COLLATE": lambda self: self.expression(
 658            exp.CollateColumnConstraint, this=self._parse_var()
 659        ),
 660        "COMMENT": lambda self: self.expression(
 661            exp.CommentColumnConstraint, this=self._parse_string()
 662        ),
 663        "COMPRESS": lambda self: self._parse_compress(),
 664        "DEFAULT": lambda self: self.expression(
 665            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 666        ),
 667        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 668        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 669        "FORMAT": lambda self: self.expression(
 670            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 671        ),
 672        "GENERATED": lambda self: self._parse_generated_as_identity(),
 673        "IDENTITY": lambda self: self._parse_auto_increment(),
 674        "INLINE": lambda self: self._parse_inline(),
 675        "LIKE": lambda self: self._parse_create_like(),
 676        "NOT": lambda self: self._parse_not_constraint(),
 677        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 678        "ON": lambda self: self._match(TokenType.UPDATE)
 679        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 680        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 681        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 682        "REFERENCES": lambda self: self._parse_references(match=False),
 683        "TITLE": lambda self: self.expression(
 684            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 685        ),
 686        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 687        "UNIQUE": lambda self: self._parse_unique(),
 688        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 689    }
 690
 691    ALTER_PARSERS = {
 692        "ADD": lambda self: self._parse_alter_table_add(),
 693        "ALTER": lambda self: self._parse_alter_table_alter(),
 694        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 695        "DROP": lambda self: self._parse_alter_table_drop(),
 696        "RENAME": lambda self: self._parse_alter_table_rename(),
 697    }
 698
 699    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 700
 701    NO_PAREN_FUNCTION_PARSERS = {
 702        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 703        TokenType.CASE: lambda self: self._parse_case(),
 704        TokenType.IF: lambda self: self._parse_if(),
 705        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 706            exp.NextValueFor,
 707            this=self._parse_column(),
 708            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 709        ),
 710    }
 711
 712    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 713        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 714        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 715        "DECODE": lambda self: self._parse_decode(),
 716        "EXTRACT": lambda self: self._parse_extract(),
 717        "JSON_OBJECT": lambda self: self._parse_json_object(),
 718        "LOG": lambda self: self._parse_logarithm(),
 719        "MATCH": lambda self: self._parse_match_against(),
 720        "OPENJSON": lambda self: self._parse_open_json(),
 721        "POSITION": lambda self: self._parse_position(),
 722        "STRING_AGG": lambda self: self._parse_string_agg(),
 723        "SUBSTRING": lambda self: self._parse_substring(),
 724        "TRIM": lambda self: self._parse_trim(),
 725        "TRY_CAST": lambda self: self._parse_cast(False),
 726        "TRY_CONVERT": lambda self: self._parse_convert(False),
 727    }
 728
 729    QUERY_MODIFIER_PARSERS = {
 730        "joins": lambda self: list(iter(self._parse_join, None)),
 731        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 732        "match": lambda self: self._parse_match_recognize(),
 733        "where": lambda self: self._parse_where(),
 734        "group": lambda self: self._parse_group(),
 735        "having": lambda self: self._parse_having(),
 736        "qualify": lambda self: self._parse_qualify(),
 737        "windows": lambda self: self._parse_window_clause(),
 738        "order": lambda self: self._parse_order(),
 739        "limit": lambda self: self._parse_limit(),
 740        "offset": lambda self: self._parse_offset(),
 741        "locks": lambda self: self._parse_locks(),
 742        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 743    }
 744
 745    SET_PARSERS = {
 746        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 747        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 748        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 749        "TRANSACTION": lambda self: self._parse_set_transaction(),
 750    }
 751
 752    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 753
 754    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 755
 756    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 757
 758    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 759
 760    TRANSACTION_CHARACTERISTICS = {
 761        "ISOLATION LEVEL REPEATABLE READ",
 762        "ISOLATION LEVEL READ COMMITTED",
 763        "ISOLATION LEVEL READ UNCOMMITTED",
 764        "ISOLATION LEVEL SERIALIZABLE",
 765        "READ WRITE",
 766        "READ ONLY",
 767    }
 768
 769    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 770
 771    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 772
 773    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 774    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 775
 776    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 777
 778    STRICT_CAST = True
 779
 780    CONVERT_TYPE_FIRST = False
 781
 782    PREFIXED_PIVOT_COLUMNS = False
 783    IDENTIFY_PIVOT_STRINGS = False
 784
 785    LOG_BASE_FIRST = True
 786    LOG_DEFAULTS_TO_LN = False
 787
 788    __slots__ = (
 789        "error_level",
 790        "error_message_context",
 791        "sql",
 792        "errors",
 793        "index_offset",
 794        "unnest_column_only",
 795        "alias_post_tablesample",
 796        "max_errors",
 797        "null_ordering",
 798        "_tokens",
 799        "_index",
 800        "_curr",
 801        "_next",
 802        "_prev",
 803        "_prev_comments",
 804        "_show_trie",
 805        "_set_trie",
 806    )
 807
 808    def __init__(
 809        self,
 810        error_level: t.Optional[ErrorLevel] = None,
 811        error_message_context: int = 100,
 812        index_offset: int = 0,
 813        unnest_column_only: bool = False,
 814        alias_post_tablesample: bool = False,
 815        max_errors: int = 3,
 816        null_ordering: t.Optional[str] = None,
 817    ):
 818        self.error_level = error_level or ErrorLevel.IMMEDIATE
 819        self.error_message_context = error_message_context
 820        self.index_offset = index_offset
 821        self.unnest_column_only = unnest_column_only
 822        self.alias_post_tablesample = alias_post_tablesample
 823        self.max_errors = max_errors
 824        self.null_ordering = null_ordering
 825        self.reset()
 826
 827    def reset(self):
 828        self.sql = ""
 829        self.errors = []
 830        self._tokens = []
 831        self._index = 0
 832        self._curr = None
 833        self._next = None
 834        self._prev = None
 835        self._prev_comments = None
 836
 837    def parse(
 838        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 839    ) -> t.List[t.Optional[exp.Expression]]:
 840        """
 841        Parses a list of tokens and returns a list of syntax trees, one tree
 842        per parsed SQL statement.
 843
 844        Args:
 845            raw_tokens: the list of tokens.
 846            sql: the original SQL string, used to produce helpful debug messages.
 847
 848        Returns:
 849            The list of syntax trees.
 850        """
 851        return self._parse(
 852            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 853        )
 854
 855    def parse_into(
 856        self,
 857        expression_types: exp.IntoType,
 858        raw_tokens: t.List[Token],
 859        sql: t.Optional[str] = None,
 860    ) -> t.List[t.Optional[exp.Expression]]:
 861        """
 862        Parses a list of tokens into a given Expression type. If a collection of Expression
 863        types is given instead, this method will try to parse the token list into each one
 864        of them, stopping at the first for which the parsing succeeds.
 865
 866        Args:
 867            expression_types: the expression type(s) to try and parse the token list into.
 868            raw_tokens: the list of tokens.
 869            sql: the original SQL string, used to produce helpful debug messages.
 870
 871        Returns:
 872            The target Expression.
 873        """
 874        errors = []
 875        for expression_type in ensure_collection(expression_types):
 876            parser = self.EXPRESSION_PARSERS.get(expression_type)
 877            if not parser:
 878                raise TypeError(f"No parser registered for {expression_type}")
 879            try:
 880                return self._parse(parser, raw_tokens, sql)
 881            except ParseError as e:
 882                e.errors[0]["into_expression"] = expression_type
 883                errors.append(e)
 884        raise ParseError(
 885            f"Failed to parse into {expression_types}",
 886            errors=merge_errors(errors),
 887        ) from errors[-1]
 888
 889    def _parse(
 890        self,
 891        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 892        raw_tokens: t.List[Token],
 893        sql: t.Optional[str] = None,
 894    ) -> t.List[t.Optional[exp.Expression]]:
 895        self.reset()
 896        self.sql = sql or ""
 897        total = len(raw_tokens)
 898        chunks: t.List[t.List[Token]] = [[]]
 899
 900        for i, token in enumerate(raw_tokens):
 901            if token.token_type == TokenType.SEMICOLON:
 902                if i < total - 1:
 903                    chunks.append([])
 904            else:
 905                chunks[-1].append(token)
 906
 907        expressions = []
 908
 909        for tokens in chunks:
 910            self._index = -1
 911            self._tokens = tokens
 912            self._advance()
 913
 914            expressions.append(parse_method(self))
 915
 916            if self._index < len(self._tokens):
 917                self.raise_error("Invalid expression / Unexpected token")
 918
 919            self.check_errors()
 920
 921        return expressions
 922
 923    def check_errors(self) -> None:
 924        """
 925        Logs or raises any found errors, depending on the chosen error level setting.
 926        """
 927        if self.error_level == ErrorLevel.WARN:
 928            for error in self.errors:
 929                logger.error(str(error))
 930        elif self.error_level == ErrorLevel.RAISE and self.errors:
 931            raise ParseError(
 932                concat_messages(self.errors, self.max_errors),
 933                errors=merge_errors(self.errors),
 934            )
 935
 936    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 937        """
 938        Appends an error in the list of recorded errors or raises it, depending on the chosen
 939        error level setting.
 940        """
 941        token = token or self._curr or self._prev or Token.string("")
 942        start = token.start
 943        end = token.end
 944        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 945        highlight = self.sql[start:end]
 946        end_context = self.sql[end : end + self.error_message_context]
 947
 948        error = ParseError.new(
 949            f"{message}. Line {token.line}, Col: {token.col}.\n"
 950            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 951            description=message,
 952            line=token.line,
 953            col=token.col,
 954            start_context=start_context,
 955            highlight=highlight,
 956            end_context=end_context,
 957        )
 958
 959        if self.error_level == ErrorLevel.IMMEDIATE:
 960            raise error
 961
 962        self.errors.append(error)
 963
 964    def expression(
 965        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 966    ) -> E:
 967        """
 968        Creates a new, validated Expression.
 969
 970        Args:
 971            exp_class: the expression class to instantiate.
 972            comments: an optional list of comments to attach to the expression.
 973            kwargs: the arguments to set for the expression along with their respective values.
 974
 975        Returns:
 976            The target expression.
 977        """
 978        instance = exp_class(**kwargs)
 979        instance.add_comments(comments) if comments else self._add_comments(instance)
 980        self.validate_expression(instance)
 981        return instance
 982
 983    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 984        if expression and self._prev_comments:
 985            expression.add_comments(self._prev_comments)
 986            self._prev_comments = None
 987
 988    def validate_expression(
 989        self, expression: exp.Expression, args: t.Optional[t.List] = None
 990    ) -> None:
 991        """
 992        Validates an already instantiated expression, making sure that all its mandatory arguments
 993        are set.
 994
 995        Args:
 996            expression: the expression to validate.
 997            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 998        """
 999        if self.error_level == ErrorLevel.IGNORE:
1000            return
1001
1002        for error_message in expression.error_messages(args):
1003            self.raise_error(error_message)
1004
1005    def _find_sql(self, start: Token, end: Token) -> str:
1006        return self.sql[start.start : end.end]
1007
1008    def _advance(self, times: int = 1) -> None:
1009        self._index += times
1010        self._curr = seq_get(self._tokens, self._index)
1011        self._next = seq_get(self._tokens, self._index + 1)
1012        if self._index > 0:
1013            self._prev = self._tokens[self._index - 1]
1014            self._prev_comments = self._prev.comments
1015        else:
1016            self._prev = None
1017            self._prev_comments = None
1018
1019    def _retreat(self, index: int) -> None:
1020        if index != self._index:
1021            self._advance(index - self._index)
1022
1023    def _parse_command(self) -> exp.Command:
1024        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1025
1026    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1027        start = self._prev
1028        exists = self._parse_exists() if allow_exists else None
1029
1030        self._match(TokenType.ON)
1031
1032        kind = self._match_set(self.CREATABLES) and self._prev
1033
1034        if not kind:
1035            return self._parse_as_command(start)
1036
1037        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1038            this = self._parse_user_defined_function(kind=kind.token_type)
1039        elif kind.token_type == TokenType.TABLE:
1040            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1041        elif kind.token_type == TokenType.COLUMN:
1042            this = self._parse_column()
1043        else:
1044            this = self._parse_id_var()
1045
1046        self._match(TokenType.IS)
1047
1048        return self.expression(
1049            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1050        )
1051
1052    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1053    def _parse_ttl(self) -> exp.Expression:
1054        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1055            this = self._parse_bitwise()
1056
1057            if self._match_text_seq("DELETE"):
1058                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1059            if self._match_text_seq("RECOMPRESS"):
1060                return self.expression(
1061                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1062                )
1063            if self._match_text_seq("TO", "DISK"):
1064                return self.expression(
1065                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1066                )
1067            if self._match_text_seq("TO", "VOLUME"):
1068                return self.expression(
1069                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1070                )
1071
1072            return this
1073
1074        expressions = self._parse_csv(_parse_ttl_action)
1075        where = self._parse_where()
1076        group = self._parse_group()
1077
1078        aggregates = None
1079        if group and self._match(TokenType.SET):
1080            aggregates = self._parse_csv(self._parse_set_item)
1081
1082        return self.expression(
1083            exp.MergeTreeTTL,
1084            expressions=expressions,
1085            where=where,
1086            group=group,
1087            aggregates=aggregates,
1088        )
1089
1090    def _parse_statement(self) -> t.Optional[exp.Expression]:
1091        if self._curr is None:
1092            return None
1093
1094        if self._match_set(self.STATEMENT_PARSERS):
1095            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1096
1097        if self._match_set(Tokenizer.COMMANDS):
1098            return self._parse_command()
1099
1100        expression = self._parse_expression()
1101        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1102        return self._parse_query_modifiers(expression)
1103
1104    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1105        start = self._prev
1106        temporary = self._match(TokenType.TEMPORARY)
1107        materialized = self._match(TokenType.MATERIALIZED)
1108        kind = self._match_set(self.CREATABLES) and self._prev.text
1109        if not kind:
1110            return self._parse_as_command(start)
1111
1112        return self.expression(
1113            exp.Drop,
1114            exists=self._parse_exists(),
1115            this=self._parse_table(schema=True),
1116            kind=kind,
1117            temporary=temporary,
1118            materialized=materialized,
1119            cascade=self._match(TokenType.CASCADE),
1120            constraints=self._match_text_seq("CONSTRAINTS"),
1121            purge=self._match_text_seq("PURGE"),
1122        )
1123
1124    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1125        return (
1126            self._match(TokenType.IF)
1127            and (not not_ or self._match(TokenType.NOT))
1128            and self._match(TokenType.EXISTS)
1129        )
1130
1131    def _parse_create(self) -> t.Optional[exp.Expression]:
1132        start = self._prev
1133        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1134            TokenType.OR, TokenType.REPLACE
1135        )
1136        unique = self._match(TokenType.UNIQUE)
1137
1138        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1139            self._match(TokenType.TABLE)
1140
1141        properties = None
1142        create_token = self._match_set(self.CREATABLES) and self._prev
1143
1144        if not create_token:
1145            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1146            create_token = self._match_set(self.CREATABLES) and self._prev
1147
1148            if not properties or not create_token:
1149                return self._parse_as_command(start)
1150
1151        exists = self._parse_exists(not_=True)
1152        this = None
1153        expression = None
1154        indexes = None
1155        no_schema_binding = None
1156        begin = None
1157        clone = None
1158
1159        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1160            this = self._parse_user_defined_function(kind=create_token.token_type)
1161            temp_properties = self._parse_properties()
1162            if properties and temp_properties:
1163                properties.expressions.extend(temp_properties.expressions)
1164            elif temp_properties:
1165                properties = temp_properties
1166
1167            self._match(TokenType.ALIAS)
1168            begin = self._match(TokenType.BEGIN)
1169            return_ = self._match_text_seq("RETURN")
1170            expression = self._parse_statement()
1171
1172            if return_:
1173                expression = self.expression(exp.Return, this=expression)
1174        elif create_token.token_type == TokenType.INDEX:
1175            this = self._parse_index()
1176        elif create_token.token_type in self.DB_CREATABLES:
1177            table_parts = self._parse_table_parts(schema=True)
1178
1179            # exp.Properties.Location.POST_NAME
1180            if self._match(TokenType.COMMA):
1181                temp_properties = self._parse_properties(before=True)
1182                if properties and temp_properties:
1183                    properties.expressions.extend(temp_properties.expressions)
1184                elif temp_properties:
1185                    properties = temp_properties
1186
1187            this = self._parse_schema(this=table_parts)
1188
1189            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1190            temp_properties = self._parse_properties()
1191            if properties and temp_properties:
1192                properties.expressions.extend(temp_properties.expressions)
1193            elif temp_properties:
1194                properties = temp_properties
1195
1196            self._match(TokenType.ALIAS)
1197
1198            # exp.Properties.Location.POST_ALIAS
1199            if not (
1200                self._match(TokenType.SELECT, advance=False)
1201                or self._match(TokenType.WITH, advance=False)
1202                or self._match(TokenType.L_PAREN, advance=False)
1203            ):
1204                temp_properties = self._parse_properties()
1205                if properties and temp_properties:
1206                    properties.expressions.extend(temp_properties.expressions)
1207                elif temp_properties:
1208                    properties = temp_properties
1209
1210            expression = self._parse_ddl_select()
1211
1212            if create_token.token_type == TokenType.TABLE:
1213                # exp.Properties.Location.POST_EXPRESSION
1214                temp_properties = self._parse_properties()
1215                if properties and temp_properties:
1216                    properties.expressions.extend(temp_properties.expressions)
1217                elif temp_properties:
1218                    properties = temp_properties
1219
1220                indexes = []
1221                while True:
1222                    index = self._parse_create_table_index()
1223
1224                    # exp.Properties.Location.POST_INDEX
1225                    if self._match(TokenType.PARTITION_BY, advance=False):
1226                        temp_properties = self._parse_properties()
1227                        if properties and temp_properties:
1228                            properties.expressions.extend(temp_properties.expressions)
1229                        elif temp_properties:
1230                            properties = temp_properties
1231
1232                    if not index:
1233                        break
1234                    else:
1235                        indexes.append(index)
1236            elif create_token.token_type == TokenType.VIEW:
1237                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1238                    no_schema_binding = True
1239
1240            if self._match_text_seq("CLONE"):
1241                clone = self._parse_table(schema=True)
1242                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1243                clone_kind = (
1244                    self._match(TokenType.L_PAREN)
1245                    and self._match_texts(self.CLONE_KINDS)
1246                    and self._prev.text.upper()
1247                )
1248                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1249                self._match(TokenType.R_PAREN)
1250                clone = self.expression(
1251                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1252                )
1253
1254        return self.expression(
1255            exp.Create,
1256            this=this,
1257            kind=create_token.text,
1258            replace=replace,
1259            unique=unique,
1260            expression=expression,
1261            exists=exists,
1262            properties=properties,
1263            indexes=indexes,
1264            no_schema_binding=no_schema_binding,
1265            begin=begin,
1266            clone=clone,
1267        )
1268
1269    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1270        self._match(TokenType.COMMA)
1271
1272        # parsers look to _prev for no/dual/default, so need to consume first
1273        self._match_text_seq("NO")
1274        self._match_text_seq("DUAL")
1275        self._match_text_seq("DEFAULT")
1276
1277        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1278            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1279
1280        return None
1281
1282    def _parse_property(self) -> t.Optional[exp.Expression]:
1283        if self._match_texts(self.PROPERTY_PARSERS):
1284            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1285
1286        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1287            return self._parse_character_set(default=True)
1288
1289        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1290            return self._parse_sortkey(compound=True)
1291
1292        if self._match_text_seq("SQL", "SECURITY"):
1293            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1294
1295        assignment = self._match_pair(
1296            TokenType.VAR, TokenType.EQ, advance=False
1297        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1298
1299        if assignment:
1300            key = self._parse_var_or_string()
1301            self._match(TokenType.EQ)
1302            return self.expression(exp.Property, this=key, value=self._parse_column())
1303
1304        return None
1305
1306    def _parse_stored(self) -> exp.Expression:
1307        self._match(TokenType.ALIAS)
1308
1309        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1310        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1311
1312        return self.expression(
1313            exp.FileFormatProperty,
1314            this=self.expression(
1315                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1316            )
1317            if input_format or output_format
1318            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1319        )
1320
1321    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1322        self._match(TokenType.EQ)
1323        self._match(TokenType.ALIAS)
1324        return self.expression(exp_class, this=self._parse_field())
1325
1326    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1327        properties = []
1328
1329        while True:
1330            if before:
1331                identified_property = self._parse_property_before()
1332            else:
1333                identified_property = self._parse_property()
1334
1335            if not identified_property:
1336                break
1337            for p in ensure_list(identified_property):
1338                properties.append(p)
1339
1340        if properties:
1341            return self.expression(exp.Properties, expressions=properties)
1342
1343        return None
1344
1345    def _parse_fallback(self, no=False) -> exp.Expression:
1346        self._match_text_seq("FALLBACK")
1347        return self.expression(
1348            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1349        )
1350
1351    def _parse_volatile_property(self) -> exp.Expression:
1352        if self._index >= 2:
1353            pre_volatile_token = self._tokens[self._index - 2]
1354        else:
1355            pre_volatile_token = None
1356
1357        if pre_volatile_token and pre_volatile_token.token_type in (
1358            TokenType.CREATE,
1359            TokenType.REPLACE,
1360            TokenType.UNIQUE,
1361        ):
1362            return exp.VolatileProperty()
1363
1364        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1365
1366    def _parse_with_property(
1367        self,
1368    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1369        self._match(TokenType.WITH)
1370        if self._match(TokenType.L_PAREN, advance=False):
1371            return self._parse_wrapped_csv(self._parse_property)
1372
1373        if self._match_text_seq("JOURNAL"):
1374            return self._parse_withjournaltable()
1375
1376        if self._match_text_seq("DATA"):
1377            return self._parse_withdata(no=False)
1378        elif self._match_text_seq("NO", "DATA"):
1379            return self._parse_withdata(no=True)
1380
1381        if not self._next:
1382            return None
1383
1384        return self._parse_withisolatedloading()
1385
1386    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1387    def _parse_definer(self) -> t.Optional[exp.Expression]:
1388        self._match(TokenType.EQ)
1389
1390        user = self._parse_id_var()
1391        self._match(TokenType.PARAMETER)
1392        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1393
1394        if not user or not host:
1395            return None
1396
1397        return exp.DefinerProperty(this=f"{user}@{host}")
1398
1399    def _parse_withjournaltable(self) -> exp.Expression:
1400        self._match(TokenType.TABLE)
1401        self._match(TokenType.EQ)
1402        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1403
1404    def _parse_log(self, no=False) -> exp.Expression:
1405        self._match_text_seq("LOG")
1406        return self.expression(exp.LogProperty, no=no)
1407
1408    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1409        before = self._match_text_seq("BEFORE")
1410        self._match_text_seq("JOURNAL")
1411        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1412
1413    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1414        self._match_text_seq("NOT")
1415        self._match_text_seq("LOCAL")
1416        self._match_text_seq("AFTER", "JOURNAL")
1417        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1418
1419    def _parse_checksum(self) -> exp.Expression:
1420        self._match_text_seq("CHECKSUM")
1421        self._match(TokenType.EQ)
1422
1423        on = None
1424        if self._match(TokenType.ON):
1425            on = True
1426        elif self._match_text_seq("OFF"):
1427            on = False
1428        default = self._match(TokenType.DEFAULT)
1429
1430        return self.expression(
1431            exp.ChecksumProperty,
1432            on=on,
1433            default=default,
1434        )
1435
1436    def _parse_freespace(self) -> exp.Expression:
1437        self._match_text_seq("FREESPACE")
1438        self._match(TokenType.EQ)
1439        return self.expression(
1440            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1441        )
1442
1443    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1444        self._match_text_seq("MERGEBLOCKRATIO")
1445        if self._match(TokenType.EQ):
1446            return self.expression(
1447                exp.MergeBlockRatioProperty,
1448                this=self._parse_number(),
1449                percent=self._match(TokenType.PERCENT),
1450            )
1451        else:
1452            return self.expression(
1453                exp.MergeBlockRatioProperty,
1454                no=no,
1455                default=default,
1456            )
1457
1458    def _parse_datablocksize(self, default=None) -> exp.Expression:
1459        if default:
1460            self._match_text_seq("DATABLOCKSIZE")
1461            return self.expression(exp.DataBlocksizeProperty, default=True)
1462        elif self._match_texts(("MIN", "MINIMUM")):
1463            self._match_text_seq("DATABLOCKSIZE")
1464            return self.expression(exp.DataBlocksizeProperty, min=True)
1465        elif self._match_texts(("MAX", "MAXIMUM")):
1466            self._match_text_seq("DATABLOCKSIZE")
1467            return self.expression(exp.DataBlocksizeProperty, min=False)
1468
1469        self._match_text_seq("DATABLOCKSIZE")
1470        self._match(TokenType.EQ)
1471        size = self._parse_number()
1472        units = None
1473        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1474            units = self._prev.text
1475        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1476
1477    def _parse_blockcompression(self) -> exp.Expression:
1478        self._match_text_seq("BLOCKCOMPRESSION")
1479        self._match(TokenType.EQ)
1480        always = self._match_text_seq("ALWAYS")
1481        manual = self._match_text_seq("MANUAL")
1482        never = self._match_text_seq("NEVER")
1483        default = self._match_text_seq("DEFAULT")
1484        autotemp = None
1485        if self._match_text_seq("AUTOTEMP"):
1486            autotemp = self._parse_schema()
1487
1488        return self.expression(
1489            exp.BlockCompressionProperty,
1490            always=always,
1491            manual=manual,
1492            never=never,
1493            default=default,
1494            autotemp=autotemp,
1495        )
1496
1497    def _parse_withisolatedloading(self) -> exp.Expression:
1498        no = self._match_text_seq("NO")
1499        concurrent = self._match_text_seq("CONCURRENT")
1500        self._match_text_seq("ISOLATED", "LOADING")
1501        for_all = self._match_text_seq("FOR", "ALL")
1502        for_insert = self._match_text_seq("FOR", "INSERT")
1503        for_none = self._match_text_seq("FOR", "NONE")
1504        return self.expression(
1505            exp.IsolatedLoadingProperty,
1506            no=no,
1507            concurrent=concurrent,
1508            for_all=for_all,
1509            for_insert=for_insert,
1510            for_none=for_none,
1511        )
1512
1513    def _parse_locking(self) -> exp.Expression:
1514        if self._match(TokenType.TABLE):
1515            kind = "TABLE"
1516        elif self._match(TokenType.VIEW):
1517            kind = "VIEW"
1518        elif self._match(TokenType.ROW):
1519            kind = "ROW"
1520        elif self._match_text_seq("DATABASE"):
1521            kind = "DATABASE"
1522        else:
1523            kind = None
1524
1525        if kind in ("DATABASE", "TABLE", "VIEW"):
1526            this = self._parse_table_parts()
1527        else:
1528            this = None
1529
1530        if self._match(TokenType.FOR):
1531            for_or_in = "FOR"
1532        elif self._match(TokenType.IN):
1533            for_or_in = "IN"
1534        else:
1535            for_or_in = None
1536
1537        if self._match_text_seq("ACCESS"):
1538            lock_type = "ACCESS"
1539        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1540            lock_type = "EXCLUSIVE"
1541        elif self._match_text_seq("SHARE"):
1542            lock_type = "SHARE"
1543        elif self._match_text_seq("READ"):
1544            lock_type = "READ"
1545        elif self._match_text_seq("WRITE"):
1546            lock_type = "WRITE"
1547        elif self._match_text_seq("CHECKSUM"):
1548            lock_type = "CHECKSUM"
1549        else:
1550            lock_type = None
1551
1552        override = self._match_text_seq("OVERRIDE")
1553
1554        return self.expression(
1555            exp.LockingProperty,
1556            this=this,
1557            kind=kind,
1558            for_or_in=for_or_in,
1559            lock_type=lock_type,
1560            override=override,
1561        )
1562
1563    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1564        if self._match(TokenType.PARTITION_BY):
1565            return self._parse_csv(self._parse_conjunction)
1566        return []
1567
1568    def _parse_partitioned_by(self) -> exp.Expression:
1569        self._match(TokenType.EQ)
1570        return self.expression(
1571            exp.PartitionedByProperty,
1572            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1573        )
1574
1575    def _parse_withdata(self, no=False) -> exp.Expression:
1576        if self._match_text_seq("AND", "STATISTICS"):
1577            statistics = True
1578        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1579            statistics = False
1580        else:
1581            statistics = None
1582
1583        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1584
1585    def _parse_noprimaryindex(self) -> exp.Expression:
1586        self._match_text_seq("PRIMARY", "INDEX")
1587        return exp.NoPrimaryIndexProperty()
1588
1589    def _parse_oncommit(self) -> exp.Expression:
1590        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1591        return exp.OnCommitProperty()
1592
1593    def _parse_distkey(self) -> exp.Expression:
1594        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1595
1596    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1597        table = self._parse_table(schema=True)
1598        options = []
1599        while self._match_texts(("INCLUDING", "EXCLUDING")):
1600            this = self._prev.text.upper()
1601            id_var = self._parse_id_var()
1602
1603            if not id_var:
1604                return None
1605
1606            options.append(
1607                self.expression(
1608                    exp.Property,
1609                    this=this,
1610                    value=exp.Var(this=id_var.this.upper()),
1611                )
1612            )
1613        return self.expression(exp.LikeProperty, this=table, expressions=options)
1614
1615    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1616        return self.expression(
1617            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1618        )
1619
1620    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1621        self._match(TokenType.EQ)
1622        return self.expression(
1623            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1624        )
1625
1626    def _parse_returns(self) -> exp.Expression:
1627        value: t.Optional[exp.Expression]
1628        is_table = self._match(TokenType.TABLE)
1629
1630        if is_table:
1631            if self._match(TokenType.LT):
1632                value = self.expression(
1633                    exp.Schema,
1634                    this="TABLE",
1635                    expressions=self._parse_csv(self._parse_struct_types),
1636                )
1637                if not self._match(TokenType.GT):
1638                    self.raise_error("Expecting >")
1639            else:
1640                value = self._parse_schema(exp.Var(this="TABLE"))
1641        else:
1642            value = self._parse_types()
1643
1644        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1645
1646    def _parse_temporary(self, global_=False) -> exp.Expression:
1647        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1648        return self.expression(exp.TemporaryProperty, global_=global_)
1649
1650    def _parse_describe(self) -> exp.Expression:
1651        kind = self._match_set(self.CREATABLES) and self._prev.text
1652        this = self._parse_table()
1653
1654        return self.expression(exp.Describe, this=this, kind=kind)
1655
1656    def _parse_insert(self) -> exp.Expression:
1657        overwrite = self._match(TokenType.OVERWRITE)
1658        local = self._match(TokenType.LOCAL)
1659        alternative = None
1660
1661        if self._match_text_seq("DIRECTORY"):
1662            this: t.Optional[exp.Expression] = self.expression(
1663                exp.Directory,
1664                this=self._parse_var_or_string(),
1665                local=local,
1666                row_format=self._parse_row_format(match_row=True),
1667            )
1668        else:
1669            if self._match(TokenType.OR):
1670                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1671
1672            self._match(TokenType.INTO)
1673            self._match(TokenType.TABLE)
1674            this = self._parse_table(schema=True)
1675
1676        return self.expression(
1677            exp.Insert,
1678            this=this,
1679            exists=self._parse_exists(),
1680            partition=self._parse_partition(),
1681            expression=self._parse_ddl_select(),
1682            conflict=self._parse_on_conflict(),
1683            returning=self._parse_returning(),
1684            overwrite=overwrite,
1685            alternative=alternative,
1686        )
1687
1688    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1689        conflict = self._match_text_seq("ON", "CONFLICT")
1690        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1691
1692        if not (conflict or duplicate):
1693            return None
1694
1695        nothing = None
1696        expressions = None
1697        key = None
1698        constraint = None
1699
1700        if conflict:
1701            if self._match_text_seq("ON", "CONSTRAINT"):
1702                constraint = self._parse_id_var()
1703            else:
1704                key = self._parse_csv(self._parse_value)
1705
1706        self._match_text_seq("DO")
1707        if self._match_text_seq("NOTHING"):
1708            nothing = True
1709        else:
1710            self._match(TokenType.UPDATE)
1711            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1712
1713        return self.expression(
1714            exp.OnConflict,
1715            duplicate=duplicate,
1716            expressions=expressions,
1717            nothing=nothing,
1718            key=key,
1719            constraint=constraint,
1720        )
1721
1722    def _parse_returning(self) -> t.Optional[exp.Expression]:
1723        if not self._match(TokenType.RETURNING):
1724            return None
1725
1726        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1727
1728    def _parse_row(self) -> t.Optional[exp.Expression]:
1729        if not self._match(TokenType.FORMAT):
1730            return None
1731        return self._parse_row_format()
1732
1733    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1734        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1735            return None
1736
1737        if self._match_text_seq("SERDE"):
1738            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1739
1740        self._match_text_seq("DELIMITED")
1741
1742        kwargs = {}
1743
1744        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1745            kwargs["fields"] = self._parse_string()
1746            if self._match_text_seq("ESCAPED", "BY"):
1747                kwargs["escaped"] = self._parse_string()
1748        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1749            kwargs["collection_items"] = self._parse_string()
1750        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1751            kwargs["map_keys"] = self._parse_string()
1752        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1753            kwargs["lines"] = self._parse_string()
1754        if self._match_text_seq("NULL", "DEFINED", "AS"):
1755            kwargs["null"] = self._parse_string()
1756
1757        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1758
1759    def _parse_load_data(self) -> exp.Expression:
1760        local = self._match(TokenType.LOCAL)
1761        self._match_text_seq("INPATH")
1762        inpath = self._parse_string()
1763        overwrite = self._match(TokenType.OVERWRITE)
1764        self._match_pair(TokenType.INTO, TokenType.TABLE)
1765
1766        return self.expression(
1767            exp.LoadData,
1768            this=self._parse_table(schema=True),
1769            local=local,
1770            overwrite=overwrite,
1771            inpath=inpath,
1772            partition=self._parse_partition(),
1773            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1774            serde=self._match_text_seq("SERDE") and self._parse_string(),
1775        )
1776
1777    def _parse_delete(self) -> exp.Expression:
1778        self._match(TokenType.FROM)
1779
1780        return self.expression(
1781            exp.Delete,
1782            this=self._parse_table(),
1783            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1784            where=self._parse_where(),
1785            returning=self._parse_returning(),
1786        )
1787
1788    def _parse_update(self) -> exp.Expression:
1789        return self.expression(
1790            exp.Update,
1791            **{  # type: ignore
1792                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1793                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1794                "from": self._parse_from(modifiers=True),
1795                "where": self._parse_where(),
1796                "returning": self._parse_returning(),
1797            },
1798        )
1799
1800    def _parse_uncache(self) -> exp.Expression:
1801        if not self._match(TokenType.TABLE):
1802            self.raise_error("Expecting TABLE after UNCACHE")
1803
1804        return self.expression(
1805            exp.Uncache,
1806            exists=self._parse_exists(),
1807            this=self._parse_table(schema=True),
1808        )
1809
1810    def _parse_cache(self) -> exp.Expression:
1811        lazy = self._match(TokenType.LAZY)
1812        self._match(TokenType.TABLE)
1813        table = self._parse_table(schema=True)
1814        options = []
1815
1816        if self._match(TokenType.OPTIONS):
1817            self._match_l_paren()
1818            k = self._parse_string()
1819            self._match(TokenType.EQ)
1820            v = self._parse_string()
1821            options = [k, v]
1822            self._match_r_paren()
1823
1824        self._match(TokenType.ALIAS)
1825        return self.expression(
1826            exp.Cache,
1827            this=table,
1828            lazy=lazy,
1829            options=options,
1830            expression=self._parse_select(nested=True),
1831        )
1832
1833    def _parse_partition(self) -> t.Optional[exp.Expression]:
1834        if not self._match(TokenType.PARTITION):
1835            return None
1836
1837        return self.expression(
1838            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1839        )
1840
1841    def _parse_value(self) -> exp.Expression:
1842        if self._match(TokenType.L_PAREN):
1843            expressions = self._parse_csv(self._parse_conjunction)
1844            self._match_r_paren()
1845            return self.expression(exp.Tuple, expressions=expressions)
1846
1847        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1848        # Source: https://prestodb.io/docs/current/sql/values.html
1849        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1850
1851    def _parse_select(
1852        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1853    ) -> t.Optional[exp.Expression]:
1854        cte = self._parse_with()
1855        if cte:
1856            this = self._parse_statement()
1857
1858            if not this:
1859                self.raise_error("Failed to parse any statement following CTE")
1860                return cte
1861
1862            if "with" in this.arg_types:
1863                this.set("with", cte)
1864            else:
1865                self.raise_error(f"{this.key} does not support CTE")
1866                this = cte
1867        elif self._match(TokenType.SELECT):
1868            comments = self._prev_comments
1869
1870            hint = self._parse_hint()
1871            all_ = self._match(TokenType.ALL)
1872            distinct = self._match(TokenType.DISTINCT)
1873
1874            kind = (
1875                self._match(TokenType.ALIAS)
1876                and self._match_texts(("STRUCT", "VALUE"))
1877                and self._prev.text
1878            )
1879
1880            if distinct:
1881                distinct = self.expression(
1882                    exp.Distinct,
1883                    on=self._parse_value() if self._match(TokenType.ON) else None,
1884                )
1885
1886            if all_ and distinct:
1887                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1888
1889            limit = self._parse_limit(top=True)
1890            expressions = self._parse_csv(self._parse_expression)
1891
1892            this = self.expression(
1893                exp.Select,
1894                kind=kind,
1895                hint=hint,
1896                distinct=distinct,
1897                expressions=expressions,
1898                limit=limit,
1899            )
1900            this.comments = comments
1901
1902            into = self._parse_into()
1903            if into:
1904                this.set("into", into)
1905
1906            from_ = self._parse_from()
1907            if from_:
1908                this.set("from", from_)
1909
1910            this = self._parse_query_modifiers(this)
1911        elif (table or nested) and self._match(TokenType.L_PAREN):
1912            this = self._parse_table() if table else self._parse_select(nested=True)
1913            this = self._parse_set_operations(self._parse_query_modifiers(this))
1914            self._match_r_paren()
1915
1916            # early return so that subquery unions aren't parsed again
1917            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1918            # Union ALL should be a property of the top select node, not the subquery
1919            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1920        elif self._match(TokenType.VALUES):
1921            this = self.expression(
1922                exp.Values,
1923                expressions=self._parse_csv(self._parse_value),
1924                alias=self._parse_table_alias(),
1925            )
1926        else:
1927            this = None
1928
1929        return self._parse_set_operations(this)
1930
1931    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1932        if not skip_with_token and not self._match(TokenType.WITH):
1933            return None
1934
1935        comments = self._prev_comments
1936        recursive = self._match(TokenType.RECURSIVE)
1937
1938        expressions = []
1939        while True:
1940            expressions.append(self._parse_cte())
1941
1942            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1943                break
1944            else:
1945                self._match(TokenType.WITH)
1946
1947        return self.expression(
1948            exp.With, comments=comments, expressions=expressions, recursive=recursive
1949        )
1950
1951    def _parse_cte(self) -> exp.Expression:
1952        alias = self._parse_table_alias()
1953        if not alias or not alias.this:
1954            self.raise_error("Expected CTE to have alias")
1955
1956        self._match(TokenType.ALIAS)
1957
1958        return self.expression(
1959            exp.CTE,
1960            this=self._parse_wrapped(self._parse_statement),
1961            alias=alias,
1962        )
1963
1964    def _parse_table_alias(
1965        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1966    ) -> t.Optional[exp.Expression]:
1967        any_token = self._match(TokenType.ALIAS)
1968        alias = (
1969            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1970            or self._parse_string_as_identifier()
1971        )
1972
1973        index = self._index
1974        if self._match(TokenType.L_PAREN):
1975            columns = self._parse_csv(self._parse_function_parameter)
1976            self._match_r_paren() if columns else self._retreat(index)
1977        else:
1978            columns = None
1979
1980        if not alias and not columns:
1981            return None
1982
1983        return self.expression(exp.TableAlias, this=alias, columns=columns)
1984
1985    def _parse_subquery(
1986        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1987    ) -> exp.Expression:
1988        return self.expression(
1989            exp.Subquery,
1990            this=this,
1991            pivots=self._parse_pivots(),
1992            alias=self._parse_table_alias() if parse_alias else None,
1993        )
1994
1995    def _parse_query_modifiers(
1996        self, this: t.Optional[exp.Expression]
1997    ) -> t.Optional[exp.Expression]:
1998        if isinstance(this, self.MODIFIABLES):
1999            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
2000                expression = parser(self)
2001
2002                if expression:
2003                    this.set(key, expression)
2004        return this
2005
2006    def _parse_hint(self) -> t.Optional[exp.Expression]:
2007        if self._match(TokenType.HINT):
2008            hints = self._parse_csv(self._parse_function)
2009            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2010                self.raise_error("Expected */ after HINT")
2011            return self.expression(exp.Hint, expressions=hints)
2012
2013        return None
2014
2015    def _parse_into(self) -> t.Optional[exp.Expression]:
2016        if not self._match(TokenType.INTO):
2017            return None
2018
2019        temp = self._match(TokenType.TEMPORARY)
2020        unlogged = self._match(TokenType.UNLOGGED)
2021        self._match(TokenType.TABLE)
2022
2023        return self.expression(
2024            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2025        )
2026
2027    def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]:
2028        if not self._match(TokenType.FROM):
2029            return None
2030
2031        comments = self._prev_comments
2032        this = self._parse_table()
2033
2034        return self.expression(
2035            exp.From,
2036            comments=comments,
2037            this=self._parse_query_modifiers(this) if modifiers else this,
2038        )
2039
2040    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2041        if not self._match(TokenType.MATCH_RECOGNIZE):
2042            return None
2043
2044        self._match_l_paren()
2045
2046        partition = self._parse_partition_by()
2047        order = self._parse_order()
2048        measures = (
2049            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2050        )
2051
2052        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2053            rows = exp.Var(this="ONE ROW PER MATCH")
2054        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2055            text = "ALL ROWS PER MATCH"
2056            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2057                text += f" SHOW EMPTY MATCHES"
2058            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2059                text += f" OMIT EMPTY MATCHES"
2060            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2061                text += f" WITH UNMATCHED ROWS"
2062            rows = exp.Var(this=text)
2063        else:
2064            rows = None
2065
2066        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2067            text = "AFTER MATCH SKIP"
2068            if self._match_text_seq("PAST", "LAST", "ROW"):
2069                text += f" PAST LAST ROW"
2070            elif self._match_text_seq("TO", "NEXT", "ROW"):
2071                text += f" TO NEXT ROW"
2072            elif self._match_text_seq("TO", "FIRST"):
2073                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2074            elif self._match_text_seq("TO", "LAST"):
2075                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2076            after = exp.Var(this=text)
2077        else:
2078            after = None
2079
2080        if self._match_text_seq("PATTERN"):
2081            self._match_l_paren()
2082
2083            if not self._curr:
2084                self.raise_error("Expecting )", self._curr)
2085
2086            paren = 1
2087            start = self._curr
2088
2089            while self._curr and paren > 0:
2090                if self._curr.token_type == TokenType.L_PAREN:
2091                    paren += 1
2092                if self._curr.token_type == TokenType.R_PAREN:
2093                    paren -= 1
2094                end = self._prev
2095                self._advance()
2096            if paren > 0:
2097                self.raise_error("Expecting )", self._curr)
2098            pattern = exp.Var(this=self._find_sql(start, end))
2099        else:
2100            pattern = None
2101
2102        define = (
2103            self._parse_csv(
2104                lambda: self.expression(
2105                    exp.Alias,
2106                    alias=self._parse_id_var(any_token=True),
2107                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2108                )
2109            )
2110            if self._match_text_seq("DEFINE")
2111            else None
2112        )
2113
2114        self._match_r_paren()
2115
2116        return self.expression(
2117            exp.MatchRecognize,
2118            partition_by=partition,
2119            order=order,
2120            measures=measures,
2121            rows=rows,
2122            after=after,
2123            pattern=pattern,
2124            define=define,
2125            alias=self._parse_table_alias(),
2126        )
2127
2128    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2129        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2130        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2131
2132        if outer_apply or cross_apply:
2133            this = self._parse_select(table=True)
2134            view = None
2135            outer = not cross_apply
2136        elif self._match(TokenType.LATERAL):
2137            this = self._parse_select(table=True)
2138            view = self._match(TokenType.VIEW)
2139            outer = self._match(TokenType.OUTER)
2140        else:
2141            return None
2142
2143        if not this:
2144            this = self._parse_function() or self._parse_id_var(any_token=False)
2145            while self._match(TokenType.DOT):
2146                this = exp.Dot(
2147                    this=this,
2148                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2149                )
2150
2151        table_alias: t.Optional[exp.Expression]
2152
2153        if view:
2154            table = self._parse_id_var(any_token=False)
2155            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2156            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2157        else:
2158            table_alias = self._parse_table_alias()
2159
2160        expression = self.expression(
2161            exp.Lateral,
2162            this=this,
2163            view=view,
2164            outer=outer,
2165            alias=table_alias,
2166        )
2167
2168        return expression
2169
2170    def _parse_join_side_and_kind(
2171        self,
2172    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2173        return (
2174            self._match(TokenType.NATURAL) and self._prev,
2175            self._match_set(self.JOIN_SIDES) and self._prev,
2176            self._match_set(self.JOIN_KINDS) and self._prev,
2177        )
2178
2179    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2180        if self._match(TokenType.COMMA):
2181            return self.expression(exp.Join, this=self._parse_table())
2182
2183        index = self._index
2184        natural, side, kind = self._parse_join_side_and_kind()
2185        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2186        join = self._match(TokenType.JOIN)
2187
2188        if not skip_join_token and not join:
2189            self._retreat(index)
2190            kind = None
2191            natural = None
2192            side = None
2193
2194        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2195        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2196
2197        if not skip_join_token and not join and not outer_apply and not cross_apply:
2198            return None
2199
2200        if outer_apply:
2201            side = Token(TokenType.LEFT, "LEFT")
2202
2203        kwargs: t.Dict[
2204            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2205        ] = {"this": self._parse_table()}
2206
2207        if natural:
2208            kwargs["natural"] = True
2209        if side:
2210            kwargs["side"] = side.text
2211        if kind:
2212            kwargs["kind"] = kind.text
2213        if hint:
2214            kwargs["hint"] = hint
2215
2216        if self._match(TokenType.ON):
2217            kwargs["on"] = self._parse_conjunction()
2218        elif self._match(TokenType.USING):
2219            kwargs["using"] = self._parse_wrapped_id_vars()
2220
2221        return self.expression(exp.Join, **kwargs)  # type: ignore
2222
2223    def _parse_index(self) -> exp.Expression:
2224        index = self._parse_id_var()
2225        self._match(TokenType.ON)
2226        self._match(TokenType.TABLE)  # hive
2227
2228        return self.expression(
2229            exp.Index,
2230            this=index,
2231            table=self.expression(exp.Table, this=self._parse_id_var()),
2232            columns=self._parse_expression(),
2233        )
2234
2235    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2236        unique = self._match(TokenType.UNIQUE)
2237        primary = self._match_text_seq("PRIMARY")
2238        amp = self._match_text_seq("AMP")
2239        if not self._match(TokenType.INDEX):
2240            return None
2241        index = self._parse_id_var()
2242        columns = None
2243        if self._match(TokenType.L_PAREN, advance=False):
2244            columns = self._parse_wrapped_csv(self._parse_column)
2245        return self.expression(
2246            exp.Index,
2247            this=index,
2248            columns=columns,
2249            unique=unique,
2250            primary=primary,
2251            amp=amp,
2252        )
2253
2254    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2255        return (
2256            (not schema and self._parse_function())
2257            or self._parse_id_var(any_token=False)
2258            or self._parse_string_as_identifier()
2259            or self._parse_placeholder()
2260        )
2261
2262    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2263        catalog = None
2264        db = None
2265        table = self._parse_table_part(schema=schema)
2266
2267        while self._match(TokenType.DOT):
2268            if catalog:
2269                # This allows nesting the table in arbitrarily many dot expressions if needed
2270                table = self.expression(
2271                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2272                )
2273            else:
2274                catalog = db
2275                db = table
2276                table = self._parse_table_part(schema=schema)
2277
2278        if not table:
2279            self.raise_error(f"Expected table name but got {self._curr}")
2280
2281        return self.expression(
2282            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2283        )
2284
2285    def _parse_table(
2286        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2287    ) -> t.Optional[exp.Expression]:
2288        lateral = self._parse_lateral()
2289        if lateral:
2290            return lateral
2291
2292        unnest = self._parse_unnest()
2293        if unnest:
2294            return unnest
2295
2296        values = self._parse_derived_table_values()
2297        if values:
2298            return values
2299
2300        subquery = self._parse_select(table=True)
2301        if subquery:
2302            if not subquery.args.get("pivots"):
2303                subquery.set("pivots", self._parse_pivots())
2304            return subquery
2305
2306        this = self._parse_table_parts(schema=schema)
2307
2308        if schema:
2309            return self._parse_schema(this=this)
2310
2311        if self.alias_post_tablesample:
2312            table_sample = self._parse_table_sample()
2313
2314        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2315        if alias:
2316            this.set("alias", alias)
2317
2318        if not this.args.get("pivots"):
2319            this.set("pivots", self._parse_pivots())
2320
2321        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2322            this.set(
2323                "hints",
2324                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2325            )
2326            self._match_r_paren()
2327
2328        if not self.alias_post_tablesample:
2329            table_sample = self._parse_table_sample()
2330
2331        if table_sample:
2332            table_sample.set("this", this)
2333            this = table_sample
2334
2335        return this
2336
2337    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2338        if not self._match(TokenType.UNNEST):
2339            return None
2340
2341        expressions = self._parse_wrapped_csv(self._parse_type)
2342        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2343        alias = self._parse_table_alias()
2344
2345        if alias and self.unnest_column_only:
2346            if alias.args.get("columns"):
2347                self.raise_error("Unexpected extra column alias in unnest.")
2348            alias.set("columns", [alias.this])
2349            alias.set("this", None)
2350
2351        offset = None
2352        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2353            self._match(TokenType.ALIAS)
2354            offset = self._parse_id_var() or exp.Identifier(this="offset")
2355
2356        return self.expression(
2357            exp.Unnest,
2358            expressions=expressions,
2359            ordinality=ordinality,
2360            alias=alias,
2361            offset=offset,
2362        )
2363
2364    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2365        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2366        if not is_derived and not self._match(TokenType.VALUES):
2367            return None
2368
2369        expressions = self._parse_csv(self._parse_value)
2370
2371        if is_derived:
2372            self._match_r_paren()
2373
2374        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2375
2376    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2377        if not self._match(TokenType.TABLE_SAMPLE) and not (
2378            as_modifier and self._match_text_seq("USING", "SAMPLE")
2379        ):
2380            return None
2381
2382        bucket_numerator = None
2383        bucket_denominator = None
2384        bucket_field = None
2385        percent = None
2386        rows = None
2387        size = None
2388        seed = None
2389
2390        kind = (
2391            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2392        )
2393        method = self._parse_var(tokens=(TokenType.ROW,))
2394
2395        self._match(TokenType.L_PAREN)
2396
2397        num = self._parse_number()
2398
2399        if self._match(TokenType.BUCKET):
2400            bucket_numerator = self._parse_number()
2401            self._match(TokenType.OUT_OF)
2402            bucket_denominator = bucket_denominator = self._parse_number()
2403            self._match(TokenType.ON)
2404            bucket_field = self._parse_field()
2405        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2406            percent = num
2407        elif self._match(TokenType.ROWS):
2408            rows = num
2409        else:
2410            size = num
2411
2412        self._match(TokenType.R_PAREN)
2413
2414        if self._match(TokenType.L_PAREN):
2415            method = self._parse_var()
2416            seed = self._match(TokenType.COMMA) and self._parse_number()
2417            self._match_r_paren()
2418        elif self._match_texts(("SEED", "REPEATABLE")):
2419            seed = self._parse_wrapped(self._parse_number)
2420
2421        return self.expression(
2422            exp.TableSample,
2423            method=method,
2424            bucket_numerator=bucket_numerator,
2425            bucket_denominator=bucket_denominator,
2426            bucket_field=bucket_field,
2427            percent=percent,
2428            rows=rows,
2429            size=size,
2430            seed=seed,
2431            kind=kind,
2432        )
2433
2434    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2435        return list(iter(self._parse_pivot, None))
2436
2437    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2438        index = self._index
2439
2440        if self._match(TokenType.PIVOT):
2441            unpivot = False
2442        elif self._match(TokenType.UNPIVOT):
2443            unpivot = True
2444        else:
2445            return None
2446
2447        expressions = []
2448        field = None
2449
2450        if not self._match(TokenType.L_PAREN):
2451            self._retreat(index)
2452            return None
2453
2454        if unpivot:
2455            expressions = self._parse_csv(self._parse_column)
2456        else:
2457            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2458
2459        if not expressions:
2460            self.raise_error("Failed to parse PIVOT's aggregation list")
2461
2462        if not self._match(TokenType.FOR):
2463            self.raise_error("Expecting FOR")
2464
2465        value = self._parse_column()
2466
2467        if not self._match(TokenType.IN):
2468            self.raise_error("Expecting IN")
2469
2470        field = self._parse_in(value)
2471
2472        self._match_r_paren()
2473
2474        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2475
2476        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2477            pivot.set("alias", self._parse_table_alias())
2478
2479        if not unpivot:
2480            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2481
2482            columns: t.List[exp.Expression] = []
2483            for fld in pivot.args["field"].expressions:
2484                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2485                for name in names:
2486                    if self.PREFIXED_PIVOT_COLUMNS:
2487                        name = f"{name}_{field_name}" if name else field_name
2488                    else:
2489                        name = f"{field_name}_{name}" if name else field_name
2490
2491                    columns.append(exp.to_identifier(name))
2492
2493            pivot.set("columns", columns)
2494
2495        return pivot
2496
2497    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2498        return [agg.alias for agg in aggregations]
2499
2500    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2501        if not skip_where_token and not self._match(TokenType.WHERE):
2502            return None
2503
2504        return self.expression(
2505            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2506        )
2507
2508    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2509        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2510            return None
2511
2512        elements = defaultdict(list)
2513
2514        while True:
2515            expressions = self._parse_csv(self._parse_conjunction)
2516            if expressions:
2517                elements["expressions"].extend(expressions)
2518
2519            grouping_sets = self._parse_grouping_sets()
2520            if grouping_sets:
2521                elements["grouping_sets"].extend(grouping_sets)
2522
2523            rollup = None
2524            cube = None
2525            totals = None
2526
2527            with_ = self._match(TokenType.WITH)
2528            if self._match(TokenType.ROLLUP):
2529                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2530                elements["rollup"].extend(ensure_list(rollup))
2531
2532            if self._match(TokenType.CUBE):
2533                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2534                elements["cube"].extend(ensure_list(cube))
2535
2536            if self._match_text_seq("TOTALS"):
2537                totals = True
2538                elements["totals"] = True  # type: ignore
2539
2540            if not (grouping_sets or rollup or cube or totals):
2541                break
2542
2543        return self.expression(exp.Group, **elements)  # type: ignore
2544
2545    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2546        if not self._match(TokenType.GROUPING_SETS):
2547            return None
2548
2549        return self._parse_wrapped_csv(self._parse_grouping_set)
2550
2551    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2552        if self._match(TokenType.L_PAREN):
2553            grouping_set = self._parse_csv(self._parse_column)
2554            self._match_r_paren()
2555            return self.expression(exp.Tuple, expressions=grouping_set)
2556
2557        return self._parse_column()
2558
2559    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2560        if not skip_having_token and not self._match(TokenType.HAVING):
2561            return None
2562        return self.expression(exp.Having, this=self._parse_conjunction())
2563
2564    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2565        if not self._match(TokenType.QUALIFY):
2566            return None
2567        return self.expression(exp.Qualify, this=self._parse_conjunction())
2568
2569    def _parse_order(
2570        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2571    ) -> t.Optional[exp.Expression]:
2572        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2573            return this
2574
2575        return self.expression(
2576            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2577        )
2578
2579    def _parse_sort(
2580        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2581    ) -> t.Optional[exp.Expression]:
2582        if not self._match(token_type):
2583            return None
2584        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2585
2586    def _parse_ordered(self) -> exp.Expression:
2587        this = self._parse_conjunction()
2588        self._match(TokenType.ASC)
2589        is_desc = self._match(TokenType.DESC)
2590        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2591        is_nulls_last = self._match(TokenType.NULLS_LAST)
2592        desc = is_desc or False
2593        asc = not desc
2594        nulls_first = is_nulls_first or False
2595        explicitly_null_ordered = is_nulls_first or is_nulls_last
2596        if (
2597            not explicitly_null_ordered
2598            and (
2599                (asc and self.null_ordering == "nulls_are_small")
2600                or (desc and self.null_ordering != "nulls_are_small")
2601            )
2602            and self.null_ordering != "nulls_are_last"
2603        ):
2604            nulls_first = True
2605
2606        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2607
2608    def _parse_limit(
2609        self, this: t.Optional[exp.Expression] = None, top: bool = False
2610    ) -> t.Optional[exp.Expression]:
2611        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2612            limit_paren = self._match(TokenType.L_PAREN)
2613            limit_exp = self.expression(
2614                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2615            )
2616
2617            if limit_paren:
2618                self._match_r_paren()
2619
2620            return limit_exp
2621
2622        if self._match(TokenType.FETCH):
2623            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2624            direction = self._prev.text if direction else "FIRST"
2625
2626            count = self._parse_number()
2627            percent = self._match(TokenType.PERCENT)
2628
2629            self._match_set((TokenType.ROW, TokenType.ROWS))
2630
2631            only = self._match(TokenType.ONLY)
2632            with_ties = self._match_text_seq("WITH", "TIES")
2633
2634            if only and with_ties:
2635                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2636
2637            return self.expression(
2638                exp.Fetch,
2639                direction=direction,
2640                count=count,
2641                percent=percent,
2642                with_ties=with_ties,
2643            )
2644
2645        return this
2646
2647    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2648        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2649            return this
2650
2651        count = self._parse_number()
2652        self._match_set((TokenType.ROW, TokenType.ROWS))
2653        return self.expression(exp.Offset, this=this, expression=count)
2654
2655    def _parse_locks(self) -> t.List[exp.Expression]:
2656        # Lists are invariant, so we need to use a type hint here
2657        locks: t.List[exp.Expression] = []
2658
2659        while True:
2660            if self._match_text_seq("FOR", "UPDATE"):
2661                update = True
2662            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2663                "LOCK", "IN", "SHARE", "MODE"
2664            ):
2665                update = False
2666            else:
2667                break
2668
2669            expressions = None
2670            if self._match_text_seq("OF"):
2671                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2672
2673            wait: t.Optional[bool | exp.Expression] = None
2674            if self._match_text_seq("NOWAIT"):
2675                wait = True
2676            elif self._match_text_seq("WAIT"):
2677                wait = self._parse_primary()
2678            elif self._match_text_seq("SKIP", "LOCKED"):
2679                wait = False
2680
2681            locks.append(
2682                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2683            )
2684
2685        return locks
2686
2687    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2688        if not self._match_set(self.SET_OPERATIONS):
2689            return this
2690
2691        token_type = self._prev.token_type
2692
2693        if token_type == TokenType.UNION:
2694            expression = exp.Union
2695        elif token_type == TokenType.EXCEPT:
2696            expression = exp.Except
2697        else:
2698            expression = exp.Intersect
2699
2700        return self.expression(
2701            expression,
2702            this=this,
2703            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2704            expression=self._parse_set_operations(self._parse_select(nested=True)),
2705        )
2706
2707    def _parse_expression(self) -> t.Optional[exp.Expression]:
2708        return self._parse_alias(self._parse_conjunction())
2709
2710    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2711        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2712
2713    def _parse_equality(self) -> t.Optional[exp.Expression]:
2714        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2715
2716    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2717        return self._parse_tokens(self._parse_range, self.COMPARISON)
2718
2719    def _parse_range(self) -> t.Optional[exp.Expression]:
2720        this = self._parse_bitwise()
2721        negate = self._match(TokenType.NOT)
2722
2723        if self._match_set(self.RANGE_PARSERS):
2724            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2725            if not expression:
2726                return this
2727
2728            this = expression
2729        elif self._match(TokenType.ISNULL):
2730            this = self.expression(exp.Is, this=this, expression=exp.Null())
2731
2732        # Postgres supports ISNULL and NOTNULL for conditions.
2733        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2734        if self._match(TokenType.NOTNULL):
2735            this = self.expression(exp.Is, this=this, expression=exp.Null())
2736            this = self.expression(exp.Not, this=this)
2737
2738        if negate:
2739            this = self.expression(exp.Not, this=this)
2740
2741        if self._match(TokenType.IS):
2742            this = self._parse_is(this)
2743
2744        return this
2745
2746    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2747        index = self._index - 1
2748        negate = self._match(TokenType.NOT)
2749        if self._match(TokenType.DISTINCT_FROM):
2750            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2751            return self.expression(klass, this=this, expression=self._parse_expression())
2752
2753        expression = self._parse_null() or self._parse_boolean()
2754        if not expression:
2755            self._retreat(index)
2756            return None
2757
2758        this = self.expression(exp.Is, this=this, expression=expression)
2759        return self.expression(exp.Not, this=this) if negate else this
2760
2761    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2762        unnest = self._parse_unnest()
2763        if unnest:
2764            this = self.expression(exp.In, this=this, unnest=unnest)
2765        elif self._match(TokenType.L_PAREN):
2766            expressions = self._parse_csv(self._parse_select_or_expression)
2767
2768            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2769                this = self.expression(exp.In, this=this, query=expressions[0])
2770            else:
2771                this = self.expression(exp.In, this=this, expressions=expressions)
2772
2773            self._match_r_paren(this)
2774        else:
2775            this = self.expression(exp.In, this=this, field=self._parse_field())
2776
2777        return this
2778
2779    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2780        low = self._parse_bitwise()
2781        self._match(TokenType.AND)
2782        high = self._parse_bitwise()
2783        return self.expression(exp.Between, this=this, low=low, high=high)
2784
2785    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2786        if not self._match(TokenType.ESCAPE):
2787            return this
2788        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2789
2790    def _parse_interval(self) -> t.Optional[exp.Expression]:
2791        if not self._match(TokenType.INTERVAL):
2792            return None
2793
2794        this = self._parse_primary() or self._parse_term()
2795        unit = self._parse_function() or self._parse_var()
2796
2797        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2798        # each INTERVAL expression into this canonical form so it's easy to transpile
2799        if this and isinstance(this, exp.Literal):
2800            if this.is_number:
2801                this = exp.Literal.string(this.name)
2802
2803            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2804            parts = this.name.split()
2805            if not unit and len(parts) <= 2:
2806                this = exp.Literal.string(seq_get(parts, 0))
2807                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2808
2809        return self.expression(exp.Interval, this=this, unit=unit)
2810
2811    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2812        this = self._parse_term()
2813
2814        while True:
2815            if self._match_set(self.BITWISE):
2816                this = self.expression(
2817                    self.BITWISE[self._prev.token_type],
2818                    this=this,
2819                    expression=self._parse_term(),
2820                )
2821            elif self._match_pair(TokenType.LT, TokenType.LT):
2822                this = self.expression(
2823                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2824                )
2825            elif self._match_pair(TokenType.GT, TokenType.GT):
2826                this = self.expression(
2827                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2828                )
2829            else:
2830                break
2831
2832        return this
2833
2834    def _parse_term(self) -> t.Optional[exp.Expression]:
2835        return self._parse_tokens(self._parse_factor, self.TERM)
2836
2837    def _parse_factor(self) -> t.Optional[exp.Expression]:
2838        return self._parse_tokens(self._parse_unary, self.FACTOR)
2839
2840    def _parse_unary(self) -> t.Optional[exp.Expression]:
2841        if self._match_set(self.UNARY_PARSERS):
2842            return self.UNARY_PARSERS[self._prev.token_type](self)
2843        return self._parse_at_time_zone(self._parse_type())
2844
2845    def _parse_type(self) -> t.Optional[exp.Expression]:
2846        interval = self._parse_interval()
2847        if interval:
2848            return interval
2849
2850        index = self._index
2851        data_type = self._parse_types(check_func=True)
2852        this = self._parse_column()
2853
2854        if data_type:
2855            if isinstance(this, exp.Literal):
2856                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2857                if parser:
2858                    return parser(self, this, data_type)
2859                return self.expression(exp.Cast, this=this, to=data_type)
2860            if not data_type.expressions:
2861                self._retreat(index)
2862                return self._parse_column()
2863            return data_type
2864
2865        return this
2866
2867    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2868        this = self._parse_type()
2869        if not this:
2870            return None
2871
2872        return self.expression(
2873            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2874        )
2875
2876    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2877        index = self._index
2878
2879        prefix = self._match_text_seq("SYSUDTLIB", ".")
2880
2881        if not self._match_set(self.TYPE_TOKENS):
2882            return None
2883
2884        type_token = self._prev.token_type
2885
2886        if type_token == TokenType.PSEUDO_TYPE:
2887            return self.expression(exp.PseudoType, this=self._prev.text)
2888
2889        nested = type_token in self.NESTED_TYPE_TOKENS
2890        is_struct = type_token == TokenType.STRUCT
2891        expressions = None
2892        maybe_func = False
2893
2894        if self._match(TokenType.L_PAREN):
2895            if is_struct:
2896                expressions = self._parse_csv(self._parse_struct_types)
2897            elif nested:
2898                expressions = self._parse_csv(self._parse_types)
2899            else:
2900                expressions = self._parse_csv(self._parse_type_size)
2901
2902            if not expressions or not self._match(TokenType.R_PAREN):
2903                self._retreat(index)
2904                return None
2905
2906            maybe_func = True
2907
2908        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2909            this = exp.DataType(
2910                this=exp.DataType.Type.ARRAY,
2911                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2912                nested=True,
2913            )
2914
2915            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2916                this = exp.DataType(
2917                    this=exp.DataType.Type.ARRAY,
2918                    expressions=[this],
2919                    nested=True,
2920                )
2921
2922            return this
2923
2924        if self._match(TokenType.L_BRACKET):
2925            self._retreat(index)
2926            return None
2927
2928        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2929        if nested and self._match(TokenType.LT):
2930            if is_struct:
2931                expressions = self._parse_csv(self._parse_struct_types)
2932            else:
2933                expressions = self._parse_csv(self._parse_types)
2934
2935            if not self._match(TokenType.GT):
2936                self.raise_error("Expecting >")
2937
2938            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2939                values = self._parse_csv(self._parse_conjunction)
2940                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2941
2942        value: t.Optional[exp.Expression] = None
2943        if type_token in self.TIMESTAMPS:
2944            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2945                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2946            elif (
2947                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2948            ):
2949                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2950            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2951                if type_token == TokenType.TIME:
2952                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2953                else:
2954                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2955
2956            maybe_func = maybe_func and value is None
2957
2958            if value is None:
2959                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2960        elif type_token == TokenType.INTERVAL:
2961            unit = self._parse_var()
2962
2963            if not unit:
2964                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2965            else:
2966                value = self.expression(exp.Interval, unit=unit)
2967
2968        if maybe_func and check_func:
2969            index2 = self._index
2970            peek = self._parse_string()
2971
2972            if not peek:
2973                self._retreat(index)
2974                return None
2975
2976            self._retreat(index2)
2977
2978        if value:
2979            return value
2980
2981        return exp.DataType(
2982            this=exp.DataType.Type[type_token.value.upper()],
2983            expressions=expressions,
2984            nested=nested,
2985            values=values,
2986            prefix=prefix,
2987        )
2988
2989    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
2990        this = self._parse_type() or self._parse_id_var()
2991        self._match(TokenType.COLON)
2992        return self._parse_column_def(this)
2993
2994    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2995        if not self._match(TokenType.AT_TIME_ZONE):
2996            return this
2997        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2998
2999    def _parse_column(self) -> t.Optional[exp.Expression]:
3000        this = self._parse_field()
3001        if isinstance(this, exp.Identifier):
3002            this = self.expression(exp.Column, this=this)
3003        elif not this:
3004            return self._parse_bracket(this)
3005        this = self._parse_bracket(this)
3006
3007        while self._match_set(self.COLUMN_OPERATORS):
3008            op_token = self._prev.token_type
3009            op = self.COLUMN_OPERATORS.get(op_token)
3010
3011            if op_token == TokenType.DCOLON:
3012                field = self._parse_types()
3013                if not field:
3014                    self.raise_error("Expected type")
3015            elif op and self._curr:
3016                self._advance()
3017                value = self._prev.text
3018                field = (
3019                    exp.Literal.number(value)
3020                    if self._prev.token_type == TokenType.NUMBER
3021                    else exp.Literal.string(value)
3022                )
3023            else:
3024                field = (
3025                    self._parse_star()
3026                    or self._parse_function(anonymous=True)
3027                    or self._parse_id_var()
3028                )
3029
3030            if isinstance(field, exp.Func):
3031                # bigquery allows function calls like x.y.count(...)
3032                # SAFE.SUBSTR(...)
3033                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3034                this = self._replace_columns_with_dots(this)
3035
3036            if op:
3037                this = op(self, this, field)
3038            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3039                this = self.expression(
3040                    exp.Column,
3041                    this=field,
3042                    table=this.this,
3043                    db=this.args.get("table"),
3044                    catalog=this.args.get("db"),
3045                )
3046            else:
3047                this = self.expression(exp.Dot, this=this, expression=field)
3048            this = self._parse_bracket(this)
3049
3050        return this
3051
3052    def _parse_primary(self) -> t.Optional[exp.Expression]:
3053        if self._match_set(self.PRIMARY_PARSERS):
3054            token_type = self._prev.token_type
3055            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3056
3057            if token_type == TokenType.STRING:
3058                expressions = [primary]
3059                while self._match(TokenType.STRING):
3060                    expressions.append(exp.Literal.string(self._prev.text))
3061                if len(expressions) > 1:
3062                    return self.expression(exp.Concat, expressions=expressions)
3063            return primary
3064
3065        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3066            return exp.Literal.number(f"0.{self._prev.text}")
3067
3068        if self._match(TokenType.L_PAREN):
3069            comments = self._prev_comments
3070            query = self._parse_select()
3071
3072            if query:
3073                expressions = [query]
3074            else:
3075                expressions = self._parse_csv(self._parse_expression)
3076
3077            this = self._parse_query_modifiers(seq_get(expressions, 0))
3078
3079            if isinstance(this, exp.Subqueryable):
3080                this = self._parse_set_operations(
3081                    self._parse_subquery(this=this, parse_alias=False)
3082                )
3083            elif len(expressions) > 1:
3084                this = self.expression(exp.Tuple, expressions=expressions)
3085            else:
3086                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3087
3088            if this:
3089                this.add_comments(comments)
3090            self._match_r_paren(expression=this)
3091
3092            return this
3093
3094        return None
3095
3096    def _parse_field(
3097        self,
3098        any_token: bool = False,
3099        tokens: t.Optional[t.Collection[TokenType]] = None,
3100    ) -> t.Optional[exp.Expression]:
3101        return (
3102            self._parse_primary()
3103            or self._parse_function()
3104            or self._parse_id_var(any_token=any_token, tokens=tokens)
3105        )
3106
3107    def _parse_function(
3108        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3109    ) -> t.Optional[exp.Expression]:
3110        if not self._curr:
3111            return None
3112
3113        token_type = self._curr.token_type
3114
3115        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3116            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3117
3118        if not self._next or self._next.token_type != TokenType.L_PAREN:
3119            if token_type in self.NO_PAREN_FUNCTIONS:
3120                self._advance()
3121                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3122
3123            return None
3124
3125        if token_type not in self.FUNC_TOKENS:
3126            return None
3127
3128        this = self._curr.text
3129        upper = this.upper()
3130        self._advance(2)
3131
3132        parser = self.FUNCTION_PARSERS.get(upper)
3133
3134        if parser and not anonymous:
3135            this = parser(self)
3136        else:
3137            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3138
3139            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3140                this = self.expression(subquery_predicate, this=self._parse_select())
3141                self._match_r_paren()
3142                return this
3143
3144            if functions is None:
3145                functions = self.FUNCTIONS
3146
3147            function = functions.get(upper)
3148            args = self._parse_csv(self._parse_lambda)
3149
3150            if function and not anonymous:
3151                this = function(args)
3152                self.validate_expression(this, args)
3153            else:
3154                this = self.expression(exp.Anonymous, this=this, expressions=args)
3155
3156        self._match_r_paren(this)
3157        return self._parse_window(this)
3158
3159    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3160        return self._parse_column_def(self._parse_id_var())
3161
3162    def _parse_user_defined_function(
3163        self, kind: t.Optional[TokenType] = None
3164    ) -> t.Optional[exp.Expression]:
3165        this = self._parse_id_var()
3166
3167        while self._match(TokenType.DOT):
3168            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3169
3170        if not self._match(TokenType.L_PAREN):
3171            return this
3172
3173        expressions = self._parse_csv(self._parse_function_parameter)
3174        self._match_r_paren()
3175        return self.expression(
3176            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3177        )
3178
3179    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3180        literal = self._parse_primary()
3181        if literal:
3182            return self.expression(exp.Introducer, this=token.text, expression=literal)
3183
3184        return self.expression(exp.Identifier, this=token.text)
3185
3186    def _parse_national(self, token: Token) -> exp.Expression:
3187        return self.expression(exp.National, this=exp.Literal.string(token.text))
3188
3189    def _parse_session_parameter(self) -> exp.Expression:
3190        kind = None
3191        this = self._parse_id_var() or self._parse_primary()
3192
3193        if this and self._match(TokenType.DOT):
3194            kind = this.name
3195            this = self._parse_var() or self._parse_primary()
3196
3197        return self.expression(exp.SessionParameter, this=this, kind=kind)
3198
3199    def _parse_lambda(self) -> t.Optional[exp.Expression]:
3200        index = self._index
3201
3202        if self._match(TokenType.L_PAREN):
3203            expressions = self._parse_csv(self._parse_id_var)
3204
3205            if not self._match(TokenType.R_PAREN):
3206                self._retreat(index)
3207        else:
3208            expressions = [self._parse_id_var()]
3209
3210        if self._match_set(self.LAMBDAS):
3211            return self.LAMBDAS[self._prev.token_type](self, expressions)
3212
3213        self._retreat(index)
3214
3215        this: t.Optional[exp.Expression]
3216
3217        if self._match(TokenType.DISTINCT):
3218            this = self.expression(
3219                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3220            )
3221        else:
3222            this = self._parse_select_or_expression()
3223
3224            if isinstance(this, exp.EQ):
3225                left = this.this
3226                if isinstance(left, exp.Column):
3227                    left.replace(exp.Var(this=left.text("this")))
3228
3229        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3230
3231    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3232        index = self._index
3233
3234        try:
3235            if self._parse_select(nested=True):
3236                return this
3237        except Exception:
3238            pass
3239        finally:
3240            self._retreat(index)
3241
3242        if not self._match(TokenType.L_PAREN):
3243            return this
3244
3245        args = self._parse_csv(
3246            lambda: self._parse_constraint()
3247            or self._parse_column_def(self._parse_field(any_token=True))
3248        )
3249        self._match_r_paren()
3250        return self.expression(exp.Schema, this=this, expressions=args)
3251
3252    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3253        # column defs are not really columns, they're identifiers
3254        if isinstance(this, exp.Column):
3255            this = this.this
3256        kind = self._parse_types()
3257
3258        if self._match_text_seq("FOR", "ORDINALITY"):
3259            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3260
3261        constraints = []
3262        while True:
3263            constraint = self._parse_column_constraint()
3264            if not constraint:
3265                break
3266            constraints.append(constraint)
3267
3268        if not kind and not constraints:
3269            return this
3270
3271        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3272
3273    def _parse_auto_increment(self) -> exp.Expression:
3274        start = None
3275        increment = None
3276
3277        if self._match(TokenType.L_PAREN, advance=False):
3278            args = self._parse_wrapped_csv(self._parse_bitwise)
3279            start = seq_get(args, 0)
3280            increment = seq_get(args, 1)
3281        elif self._match_text_seq("START"):
3282            start = self._parse_bitwise()
3283            self._match_text_seq("INCREMENT")
3284            increment = self._parse_bitwise()
3285
3286        if start and increment:
3287            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3288
3289        return exp.AutoIncrementColumnConstraint()
3290
3291    def _parse_compress(self) -> exp.Expression:
3292        if self._match(TokenType.L_PAREN, advance=False):
3293            return self.expression(
3294                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3295            )
3296
3297        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3298
3299    def _parse_generated_as_identity(self) -> exp.Expression:
3300        if self._match(TokenType.BY_DEFAULT):
3301            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3302            this = self.expression(
3303                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3304            )
3305        else:
3306            self._match_text_seq("ALWAYS")
3307            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3308
3309        self._match_text_seq("AS", "IDENTITY")
3310        if self._match(TokenType.L_PAREN):
3311            if self._match_text_seq("START", "WITH"):
3312                this.set("start", self._parse_bitwise())
3313            if self._match_text_seq("INCREMENT", "BY"):
3314                this.set("increment", self._parse_bitwise())
3315            if self._match_text_seq("MINVALUE"):
3316                this.set("minvalue", self._parse_bitwise())
3317            if self._match_text_seq("MAXVALUE"):
3318                this.set("maxvalue", self._parse_bitwise())
3319
3320            if self._match_text_seq("CYCLE"):
3321                this.set("cycle", True)
3322            elif self._match_text_seq("NO", "CYCLE"):
3323                this.set("cycle", False)
3324
3325            self._match_r_paren()
3326
3327        return this
3328
3329    def _parse_inline(self) -> t.Optional[exp.Expression]:
3330        self._match_text_seq("LENGTH")
3331        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3332
3333    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3334        if self._match_text_seq("NULL"):
3335            return self.expression(exp.NotNullColumnConstraint)
3336        if self._match_text_seq("CASESPECIFIC"):
3337            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3338        return None
3339
3340    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3341        if self._match(TokenType.CONSTRAINT):
3342            this = self._parse_id_var()
3343        else:
3344            this = None
3345
3346        if self._match_texts(self.CONSTRAINT_PARSERS):
3347            return self.expression(
3348                exp.ColumnConstraint,
3349                this=this,
3350                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3351            )
3352
3353        return this
3354
3355    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3356        if not self._match(TokenType.CONSTRAINT):
3357            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3358
3359        this = self._parse_id_var()
3360        expressions = []
3361
3362        while True:
3363            constraint = self._parse_unnamed_constraint() or self._parse_function()
3364            if not constraint:
3365                break
3366            expressions.append(constraint)
3367
3368        return self.expression(exp.Constraint, this=this, expressions=expressions)
3369
3370    def _parse_unnamed_constraint(
3371        self, constraints: t.Optional[t.Collection[str]] = None
3372    ) -> t.Optional[exp.Expression]:
3373        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3374            return None
3375
3376        constraint = self._prev.text.upper()
3377        if constraint not in self.CONSTRAINT_PARSERS:
3378            self.raise_error(f"No parser found for schema constraint {constraint}.")
3379
3380        return self.CONSTRAINT_PARSERS[constraint](self)
3381
3382    def _parse_unique(self) -> exp.Expression:
3383        if not self._match(TokenType.L_PAREN, advance=False):
3384            return self.expression(exp.UniqueColumnConstraint)
3385        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3386
3387    def _parse_key_constraint_options(self) -> t.List[str]:
3388        options = []
3389        while True:
3390            if not self._curr:
3391                break
3392
3393            if self._match(TokenType.ON):
3394                action = None
3395                on = self._advance_any() and self._prev.text
3396
3397                if self._match(TokenType.NO_ACTION):
3398                    action = "NO ACTION"
3399                elif self._match(TokenType.CASCADE):
3400                    action = "CASCADE"
3401                elif self._match_pair(TokenType.SET, TokenType.NULL):
3402                    action = "SET NULL"
3403                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3404                    action = "SET DEFAULT"
3405                else:
3406                    self.raise_error("Invalid key constraint")
3407
3408                options.append(f"ON {on} {action}")
3409            elif self._match_text_seq("NOT", "ENFORCED"):
3410                options.append("NOT ENFORCED")
3411            elif self._match_text_seq("DEFERRABLE"):
3412                options.append("DEFERRABLE")
3413            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3414                options.append("INITIALLY DEFERRED")
3415            elif self._match_text_seq("NORELY"):
3416                options.append("NORELY")
3417            elif self._match_text_seq("MATCH", "FULL"):
3418                options.append("MATCH FULL")
3419            else:
3420                break
3421
3422        return options
3423
3424    def _parse_references(self, match=True) -> t.Optional[exp.Expression]:
3425        if match and not self._match(TokenType.REFERENCES):
3426            return None
3427
3428        expressions = None
3429        this = self._parse_id_var()
3430
3431        if self._match(TokenType.L_PAREN, advance=False):
3432            expressions = self._parse_wrapped_id_vars()
3433
3434        options = self._parse_key_constraint_options()
3435        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3436
3437    def _parse_foreign_key(self) -> exp.Expression:
3438        expressions = self._parse_wrapped_id_vars()
3439        reference = self._parse_references()
3440        options = {}
3441
3442        while self._match(TokenType.ON):
3443            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3444                self.raise_error("Expected DELETE or UPDATE")
3445
3446            kind = self._prev.text.lower()
3447
3448            if self._match(TokenType.NO_ACTION):
3449                action = "NO ACTION"
3450            elif self._match(TokenType.SET):
3451                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3452                action = "SET " + self._prev.text.upper()
3453            else:
3454                self._advance()
3455                action = self._prev.text.upper()
3456
3457            options[kind] = action
3458
3459        return self.expression(
3460            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3461        )
3462
3463    def _parse_primary_key(self) -> exp.Expression:
3464        desc = (
3465            self._match_set((TokenType.ASC, TokenType.DESC))
3466            and self._prev.token_type == TokenType.DESC
3467        )
3468
3469        if not self._match(TokenType.L_PAREN, advance=False):
3470            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3471
3472        expressions = self._parse_wrapped_csv(self._parse_field)
3473        options = self._parse_key_constraint_options()
3474        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3475
3476    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3477        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3478            return this
3479
3480        bracket_kind = self._prev.token_type
3481        expressions: t.List[t.Optional[exp.Expression]]
3482
3483        if self._match(TokenType.COLON):
3484            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3485        else:
3486            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3487
3488        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3489        if bracket_kind == TokenType.L_BRACE:
3490            this = self.expression(exp.Struct, expressions=expressions)
3491        elif not this or this.name.upper() == "ARRAY":
3492            this = self.expression(exp.Array, expressions=expressions)
3493        else:
3494            expressions = apply_index_offset(this, expressions, -self.index_offset)
3495            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3496
3497        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3498            self.raise_error("Expected ]")
3499        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3500            self.raise_error("Expected }")
3501
3502        self._add_comments(this)
3503        return self._parse_bracket(this)
3504
3505    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3506        if self._match(TokenType.COLON):
3507            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3508        return this
3509
3510    def _parse_case(self) -> t.Optional[exp.Expression]:
3511        ifs = []
3512        default = None
3513
3514        expression = self._parse_conjunction()
3515
3516        while self._match(TokenType.WHEN):
3517            this = self._parse_conjunction()
3518            self._match(TokenType.THEN)
3519            then = self._parse_conjunction()
3520            ifs.append(self.expression(exp.If, this=this, true=then))
3521
3522        if self._match(TokenType.ELSE):
3523            default = self._parse_conjunction()
3524
3525        if not self._match(TokenType.END):
3526            self.raise_error("Expected END after CASE", self._prev)
3527
3528        return self._parse_window(
3529            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3530        )
3531
3532    def _parse_if(self) -> t.Optional[exp.Expression]:
3533        if self._match(TokenType.L_PAREN):
3534            args = self._parse_csv(self._parse_conjunction)
3535            this = exp.If.from_arg_list(args)
3536            self.validate_expression(this, args)
3537            self._match_r_paren()
3538        else:
3539            index = self._index - 1
3540            condition = self._parse_conjunction()
3541
3542            if not condition:
3543                self._retreat(index)
3544                return None
3545
3546            self._match(TokenType.THEN)
3547            true = self._parse_conjunction()
3548            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3549            self._match(TokenType.END)
3550            this = self.expression(exp.If, this=condition, true=true, false=false)
3551
3552        return self._parse_window(this)
3553
3554    def _parse_extract(self) -> exp.Expression:
3555        this = self._parse_function() or self._parse_var() or self._parse_type()
3556
3557        if self._match(TokenType.FROM):
3558            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3559
3560        if not self._match(TokenType.COMMA):
3561            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3562
3563        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3564
3565    def _parse_cast(self, strict: bool) -> exp.Expression:
3566        this = self._parse_conjunction()
3567
3568        if not self._match(TokenType.ALIAS):
3569            if self._match(TokenType.COMMA):
3570                return self.expression(
3571                    exp.CastToStrType, this=this, expression=self._parse_string()
3572                )
3573            else:
3574                self.raise_error("Expected AS after CAST")
3575
3576        to = self._parse_types()
3577
3578        if not to:
3579            self.raise_error("Expected TYPE after CAST")
3580        elif to.this == exp.DataType.Type.CHAR:
3581            if self._match(TokenType.CHARACTER_SET):
3582                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3583
3584        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3585
3586    def _parse_string_agg(self) -> exp.Expression:
3587        expression: t.Optional[exp.Expression]
3588
3589        if self._match(TokenType.DISTINCT):
3590            args = self._parse_csv(self._parse_conjunction)
3591            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3592        else:
3593            args = self._parse_csv(self._parse_conjunction)
3594            expression = seq_get(args, 0)
3595
3596        index = self._index
3597        if not self._match(TokenType.R_PAREN):
3598            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3599            order = self._parse_order(this=expression)
3600            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3601
3602        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3603        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3604        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3605        if not self._match(TokenType.WITHIN_GROUP):
3606            self._retreat(index)
3607            this = exp.GroupConcat.from_arg_list(args)
3608            self.validate_expression(this, args)
3609            return this
3610
3611        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3612        order = self._parse_order(this=expression)
3613        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3614
3615    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3616        to: t.Optional[exp.Expression]
3617        this = self._parse_bitwise()
3618
3619        if self._match(TokenType.USING):
3620            to = self.expression(exp.CharacterSet, this=self._parse_var())
3621        elif self._match(TokenType.COMMA):
3622            to = self._parse_bitwise()
3623        else:
3624            to = None
3625
3626        # Swap the argument order if needed to produce the correct AST
3627        if self.CONVERT_TYPE_FIRST:
3628            this, to = to, this
3629
3630        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3631
3632    def _parse_decode(self) -> t.Optional[exp.Expression]:
3633        """
3634        There are generally two variants of the DECODE function:
3635
3636        - DECODE(bin, charset)
3637        - DECODE(expression, search, result [, search, result] ... [, default])
3638
3639        The second variant will always be parsed into a CASE expression. Note that NULL
3640        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3641        instead of relying on pattern matching.
3642        """
3643        args = self._parse_csv(self._parse_conjunction)
3644
3645        if len(args) < 3:
3646            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3647
3648        expression, *expressions = args
3649        if not expression:
3650            return None
3651
3652        ifs = []
3653        for search, result in zip(expressions[::2], expressions[1::2]):
3654            if not search or not result:
3655                return None
3656
3657            if isinstance(search, exp.Literal):
3658                ifs.append(
3659                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3660                )
3661            elif isinstance(search, exp.Null):
3662                ifs.append(
3663                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3664                )
3665            else:
3666                cond = exp.or_(
3667                    exp.EQ(this=expression.copy(), expression=search),
3668                    exp.and_(
3669                        exp.Is(this=expression.copy(), expression=exp.Null()),
3670                        exp.Is(this=search.copy(), expression=exp.Null()),
3671                        copy=False,
3672                    ),
3673                    copy=False,
3674                )
3675                ifs.append(exp.If(this=cond, true=result))
3676
3677        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3678
3679    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3680        self._match_text_seq("KEY")
3681        key = self._parse_field()
3682        self._match(TokenType.COLON)
3683        self._match_text_seq("VALUE")
3684        value = self._parse_field()
3685        if not key and not value:
3686            return None
3687        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3688
3689    def _parse_json_object(self) -> exp.Expression:
3690        expressions = self._parse_csv(self._parse_json_key_value)
3691
3692        null_handling = None
3693        if self._match_text_seq("NULL", "ON", "NULL"):
3694            null_handling = "NULL ON NULL"
3695        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3696            null_handling = "ABSENT ON NULL"
3697
3698        unique_keys = None
3699        if self._match_text_seq("WITH", "UNIQUE"):
3700            unique_keys = True
3701        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3702            unique_keys = False
3703
3704        self._match_text_seq("KEYS")
3705
3706        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3707        format_json = self._match_text_seq("FORMAT", "JSON")
3708        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3709
3710        return self.expression(
3711            exp.JSONObject,
3712            expressions=expressions,
3713            null_handling=null_handling,
3714            unique_keys=unique_keys,
3715            return_type=return_type,
3716            format_json=format_json,
3717            encoding=encoding,
3718        )
3719
3720    def _parse_logarithm(self) -> exp.Expression:
3721        # Default argument order is base, expression
3722        args = self._parse_csv(self._parse_range)
3723
3724        if len(args) > 1:
3725            if not self.LOG_BASE_FIRST:
3726                args.reverse()
3727            return exp.Log.from_arg_list(args)
3728
3729        return self.expression(
3730            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3731        )
3732
3733    def _parse_match_against(self) -> exp.Expression:
3734        expressions = self._parse_csv(self._parse_column)
3735
3736        self._match_text_seq(")", "AGAINST", "(")
3737
3738        this = self._parse_string()
3739
3740        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3741            modifier = "IN NATURAL LANGUAGE MODE"
3742            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3743                modifier = f"{modifier} WITH QUERY EXPANSION"
3744        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3745            modifier = "IN BOOLEAN MODE"
3746        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3747            modifier = "WITH QUERY EXPANSION"
3748        else:
3749            modifier = None
3750
3751        return self.expression(
3752            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3753        )
3754
3755    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3756    def _parse_open_json(self) -> exp.Expression:
3757        this = self._parse_bitwise()
3758        path = self._match(TokenType.COMMA) and self._parse_string()
3759
3760        def _parse_open_json_column_def() -> exp.Expression:
3761            this = self._parse_field(any_token=True)
3762            kind = self._parse_types()
3763            path = self._parse_string()
3764            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3765            return self.expression(
3766                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3767            )
3768
3769        expressions = None
3770        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3771            self._match_l_paren()
3772            expressions = self._parse_csv(_parse_open_json_column_def)
3773
3774        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3775
3776    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3777        args = self._parse_csv(self._parse_bitwise)
3778
3779        if self._match(TokenType.IN):
3780            return self.expression(
3781                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3782            )
3783
3784        if haystack_first:
3785            haystack = seq_get(args, 0)
3786            needle = seq_get(args, 1)
3787        else:
3788            needle = seq_get(args, 0)
3789            haystack = seq_get(args, 1)
3790
3791        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3792
3793        self.validate_expression(this, args)
3794
3795        return this
3796
3797    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3798        args = self._parse_csv(self._parse_table)
3799        return exp.JoinHint(this=func_name.upper(), expressions=args)
3800
3801    def _parse_substring(self) -> exp.Expression:
3802        # Postgres supports the form: substring(string [from int] [for int])
3803        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3804
3805        args = self._parse_csv(self._parse_bitwise)
3806
3807        if self._match(TokenType.FROM):
3808            args.append(self._parse_bitwise())
3809            if self._match(TokenType.FOR):
3810                args.append(self._parse_bitwise())
3811
3812        this = exp.Substring.from_arg_list(args)
3813        self.validate_expression(this, args)
3814
3815        return this
3816
3817    def _parse_trim(self) -> exp.Expression:
3818        # https://www.w3resource.com/sql/character-functions/trim.php
3819        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3820
3821        position = None
3822        collation = None
3823
3824        if self._match_set(self.TRIM_TYPES):
3825            position = self._prev.text.upper()
3826
3827        expression = self._parse_bitwise()
3828        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3829            this = self._parse_bitwise()
3830        else:
3831            this = expression
3832            expression = None
3833
3834        if self._match(TokenType.COLLATE):
3835            collation = self._parse_bitwise()
3836
3837        return self.expression(
3838            exp.Trim,
3839            this=this,
3840            position=position,
3841            expression=expression,
3842            collation=collation,
3843        )
3844
3845    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3846        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3847
3848    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3849        return self._parse_window(self._parse_id_var(), alias=True)
3850
3851    def _parse_respect_or_ignore_nulls(
3852        self, this: t.Optional[exp.Expression]
3853    ) -> t.Optional[exp.Expression]:
3854        if self._match(TokenType.IGNORE_NULLS):
3855            return self.expression(exp.IgnoreNulls, this=this)
3856        if self._match(TokenType.RESPECT_NULLS):
3857            return self.expression(exp.RespectNulls, this=this)
3858        return this
3859
3860    def _parse_window(
3861        self, this: t.Optional[exp.Expression], alias: bool = False
3862    ) -> t.Optional[exp.Expression]:
3863        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3864            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3865            self._match_r_paren()
3866
3867        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3868        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3869        if self._match(TokenType.WITHIN_GROUP):
3870            order = self._parse_wrapped(self._parse_order)
3871            this = self.expression(exp.WithinGroup, this=this, expression=order)
3872
3873        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3874        # Some dialects choose to implement and some do not.
3875        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3876
3877        # There is some code above in _parse_lambda that handles
3878        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3879
3880        # The below changes handle
3881        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3882
3883        # Oracle allows both formats
3884        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3885        #   and Snowflake chose to do the same for familiarity
3886        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3887        this = self._parse_respect_or_ignore_nulls(this)
3888
3889        # bigquery select from window x AS (partition by ...)
3890        if alias:
3891            over = None
3892            self._match(TokenType.ALIAS)
3893        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3894            return this
3895        else:
3896            over = self._prev.text.upper()
3897
3898        if not self._match(TokenType.L_PAREN):
3899            return self.expression(
3900                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3901            )
3902
3903        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3904
3905        first = self._match(TokenType.FIRST)
3906        if self._match_text_seq("LAST"):
3907            first = False
3908
3909        partition = self._parse_partition_by()
3910        order = self._parse_order()
3911        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3912
3913        if kind:
3914            self._match(TokenType.BETWEEN)
3915            start = self._parse_window_spec()
3916            self._match(TokenType.AND)
3917            end = self._parse_window_spec()
3918
3919            spec = self.expression(
3920                exp.WindowSpec,
3921                kind=kind,
3922                start=start["value"],
3923                start_side=start["side"],
3924                end=end["value"],
3925                end_side=end["side"],
3926            )
3927        else:
3928            spec = None
3929
3930        self._match_r_paren()
3931
3932        return self.expression(
3933            exp.Window,
3934            this=this,
3935            partition_by=partition,
3936            order=order,
3937            spec=spec,
3938            alias=window_alias,
3939            over=over,
3940            first=first,
3941        )
3942
3943    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3944        self._match(TokenType.BETWEEN)
3945
3946        return {
3947            "value": (
3948                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3949            )
3950            or self._parse_bitwise(),
3951            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3952        }
3953
3954    def _parse_alias(
3955        self, this: t.Optional[exp.Expression], explicit: bool = False
3956    ) -> t.Optional[exp.Expression]:
3957        any_token = self._match(TokenType.ALIAS)
3958
3959        if explicit and not any_token:
3960            return this
3961
3962        if self._match(TokenType.L_PAREN):
3963            aliases = self.expression(
3964                exp.Aliases,
3965                this=this,
3966                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3967            )
3968            self._match_r_paren(aliases)
3969            return aliases
3970
3971        alias = self._parse_id_var(any_token)
3972
3973        if alias:
3974            return self.expression(exp.Alias, this=this, alias=alias)
3975
3976        return this
3977
3978    def _parse_id_var(
3979        self,
3980        any_token: bool = True,
3981        tokens: t.Optional[t.Collection[TokenType]] = None,
3982        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3983    ) -> t.Optional[exp.Expression]:
3984        identifier = self._parse_identifier()
3985
3986        if identifier:
3987            return identifier
3988
3989        prefix = ""
3990
3991        if prefix_tokens:
3992            while self._match_set(prefix_tokens):
3993                prefix += self._prev.text
3994
3995        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3996            quoted = self._prev.token_type == TokenType.STRING
3997            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3998
3999        return None
4000
4001    def _parse_string(self) -> t.Optional[exp.Expression]:
4002        if self._match(TokenType.STRING):
4003            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
4004        return self._parse_placeholder()
4005
4006    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
4007        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
4008
4009    def _parse_number(self) -> t.Optional[exp.Expression]:
4010        if self._match(TokenType.NUMBER):
4011            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
4012        return self._parse_placeholder()
4013
4014    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4015        if self._match(TokenType.IDENTIFIER):
4016            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4017        return self._parse_placeholder()
4018
4019    def _parse_var(
4020        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4021    ) -> t.Optional[exp.Expression]:
4022        if (
4023            (any_token and self._advance_any())
4024            or self._match(TokenType.VAR)
4025            or (self._match_set(tokens) if tokens else False)
4026        ):
4027            return self.expression(exp.Var, this=self._prev.text)
4028        return self._parse_placeholder()
4029
4030    def _advance_any(self) -> t.Optional[Token]:
4031        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4032            self._advance()
4033            return self._prev
4034        return None
4035
4036    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4037        return self._parse_var() or self._parse_string()
4038
4039    def _parse_null(self) -> t.Optional[exp.Expression]:
4040        if self._match(TokenType.NULL):
4041            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4042        return None
4043
4044    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4045        if self._match(TokenType.TRUE):
4046            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4047        if self._match(TokenType.FALSE):
4048            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4049        return None
4050
4051    def _parse_star(self) -> t.Optional[exp.Expression]:
4052        if self._match(TokenType.STAR):
4053            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4054        return None
4055
4056    def _parse_parameter(self) -> exp.Expression:
4057        wrapped = self._match(TokenType.L_BRACE)
4058        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4059        self._match(TokenType.R_BRACE)
4060        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4061
4062    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4063        if self._match_set(self.PLACEHOLDER_PARSERS):
4064            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4065            if placeholder:
4066                return placeholder
4067            self._advance(-1)
4068        return None
4069
4070    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4071        if not self._match(TokenType.EXCEPT):
4072            return None
4073        if self._match(TokenType.L_PAREN, advance=False):
4074            return self._parse_wrapped_csv(self._parse_column)
4075        return self._parse_csv(self._parse_column)
4076
4077    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4078        if not self._match(TokenType.REPLACE):
4079            return None
4080        if self._match(TokenType.L_PAREN, advance=False):
4081            return self._parse_wrapped_csv(self._parse_expression)
4082        return self._parse_csv(self._parse_expression)
4083
4084    def _parse_csv(
4085        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4086    ) -> t.List[t.Optional[exp.Expression]]:
4087        parse_result = parse_method()
4088        items = [parse_result] if parse_result is not None else []
4089
4090        while self._match(sep):
4091            self._add_comments(parse_result)
4092            parse_result = parse_method()
4093            if parse_result is not None:
4094                items.append(parse_result)
4095
4096        return items
4097
4098    def _parse_tokens(
4099        self, parse_method: t.Callable, expressions: t.Dict
4100    ) -> t.Optional[exp.Expression]:
4101        this = parse_method()
4102
4103        while self._match_set(expressions):
4104            this = self.expression(
4105                expressions[self._prev.token_type],
4106                this=this,
4107                comments=self._prev_comments,
4108                expression=parse_method(),
4109            )
4110
4111        return this
4112
4113    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4114        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4115
4116    def _parse_wrapped_csv(
4117        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4118    ) -> t.List[t.Optional[exp.Expression]]:
4119        return self._parse_wrapped(
4120            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4121        )
4122
4123    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4124        wrapped = self._match(TokenType.L_PAREN)
4125        if not wrapped and not optional:
4126            self.raise_error("Expecting (")
4127        parse_result = parse_method()
4128        if wrapped:
4129            self._match_r_paren()
4130        return parse_result
4131
4132    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
4133        return self._parse_select() or self._parse_set_operations(self._parse_expression())
4134
4135    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4136        return self._parse_set_operations(
4137            self._parse_select(nested=True, parse_subquery_alias=False)
4138        )
4139
4140    def _parse_transaction(self) -> exp.Expression:
4141        this = None
4142        if self._match_texts(self.TRANSACTION_KIND):
4143            this = self._prev.text
4144
4145        self._match_texts({"TRANSACTION", "WORK"})
4146
4147        modes = []
4148        while True:
4149            mode = []
4150            while self._match(TokenType.VAR):
4151                mode.append(self._prev.text)
4152
4153            if mode:
4154                modes.append(" ".join(mode))
4155            if not self._match(TokenType.COMMA):
4156                break
4157
4158        return self.expression(exp.Transaction, this=this, modes=modes)
4159
4160    def _parse_commit_or_rollback(self) -> exp.Expression:
4161        chain = None
4162        savepoint = None
4163        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4164
4165        self._match_texts({"TRANSACTION", "WORK"})
4166
4167        if self._match_text_seq("TO"):
4168            self._match_text_seq("SAVEPOINT")
4169            savepoint = self._parse_id_var()
4170
4171        if self._match(TokenType.AND):
4172            chain = not self._match_text_seq("NO")
4173            self._match_text_seq("CHAIN")
4174
4175        if is_rollback:
4176            return self.expression(exp.Rollback, savepoint=savepoint)
4177        return self.expression(exp.Commit, chain=chain)
4178
4179    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4180        if not self._match_text_seq("ADD"):
4181            return None
4182
4183        self._match(TokenType.COLUMN)
4184        exists_column = self._parse_exists(not_=True)
4185        expression = self._parse_column_def(self._parse_field(any_token=True))
4186
4187        if expression:
4188            expression.set("exists", exists_column)
4189
4190            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4191            if self._match_texts(("FIRST", "AFTER")):
4192                position = self._prev.text
4193                column_position = self.expression(
4194                    exp.ColumnPosition, this=self._parse_column(), position=position
4195                )
4196                expression.set("position", column_position)
4197
4198        return expression
4199
4200    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4201        drop = self._match(TokenType.DROP) and self._parse_drop()
4202        if drop and not isinstance(drop, exp.Command):
4203            drop.set("kind", drop.args.get("kind", "COLUMN"))
4204        return drop
4205
4206    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4207    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4208        return self.expression(
4209            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4210        )
4211
4212    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4213        this = None
4214        kind = self._prev.token_type
4215
4216        if kind == TokenType.CONSTRAINT:
4217            this = self._parse_id_var()
4218
4219            if self._match_text_seq("CHECK"):
4220                expression = self._parse_wrapped(self._parse_conjunction)
4221                enforced = self._match_text_seq("ENFORCED")
4222
4223                return self.expression(
4224                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4225                )
4226
4227        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4228            expression = self._parse_foreign_key()
4229        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4230            expression = self._parse_primary_key()
4231        else:
4232            expression = None
4233
4234        return self.expression(exp.AddConstraint, this=this, expression=expression)
4235
4236    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4237        index = self._index - 1
4238
4239        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4240            return self._parse_csv(self._parse_add_constraint)
4241
4242        self._retreat(index)
4243        return self._parse_csv(self._parse_add_column)
4244
4245    def _parse_alter_table_alter(self) -> exp.Expression:
4246        self._match(TokenType.COLUMN)
4247        column = self._parse_field(any_token=True)
4248
4249        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4250            return self.expression(exp.AlterColumn, this=column, drop=True)
4251        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4252            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4253
4254        self._match_text_seq("SET", "DATA")
4255        return self.expression(
4256            exp.AlterColumn,
4257            this=column,
4258            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4259            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4260            using=self._match(TokenType.USING) and self._parse_conjunction(),
4261        )
4262
4263    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4264        index = self._index - 1
4265
4266        partition_exists = self._parse_exists()
4267        if self._match(TokenType.PARTITION, advance=False):
4268            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4269
4270        self._retreat(index)
4271        return self._parse_csv(self._parse_drop_column)
4272
4273    def _parse_alter_table_rename(self) -> exp.Expression:
4274        self._match_text_seq("TO")
4275        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4276
4277    def _parse_alter(self) -> t.Optional[exp.Expression]:
4278        start = self._prev
4279
4280        if not self._match(TokenType.TABLE):
4281            return self._parse_as_command(start)
4282
4283        exists = self._parse_exists()
4284        this = self._parse_table(schema=True)
4285
4286        if self._next:
4287            self._advance()
4288        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4289
4290        if parser:
4291            actions = ensure_list(parser(self))
4292
4293            if not self._curr:
4294                return self.expression(
4295                    exp.AlterTable,
4296                    this=this,
4297                    exists=exists,
4298                    actions=actions,
4299                )
4300        return self._parse_as_command(start)
4301
4302    def _parse_merge(self) -> exp.Expression:
4303        self._match(TokenType.INTO)
4304        target = self._parse_table()
4305
4306        self._match(TokenType.USING)
4307        using = self._parse_table()
4308
4309        self._match(TokenType.ON)
4310        on = self._parse_conjunction()
4311
4312        whens = []
4313        while self._match(TokenType.WHEN):
4314            matched = not self._match(TokenType.NOT)
4315            self._match_text_seq("MATCHED")
4316            source = (
4317                False
4318                if self._match_text_seq("BY", "TARGET")
4319                else self._match_text_seq("BY", "SOURCE")
4320            )
4321            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4322
4323            self._match(TokenType.THEN)
4324
4325            if self._match(TokenType.INSERT):
4326                _this = self._parse_star()
4327                if _this:
4328                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4329                else:
4330                    then = self.expression(
4331                        exp.Insert,
4332                        this=self._parse_value(),
4333                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4334                    )
4335            elif self._match(TokenType.UPDATE):
4336                expressions = self._parse_star()
4337                if expressions:
4338                    then = self.expression(exp.Update, expressions=expressions)
4339                else:
4340                    then = self.expression(
4341                        exp.Update,
4342                        expressions=self._match(TokenType.SET)
4343                        and self._parse_csv(self._parse_equality),
4344                    )
4345            elif self._match(TokenType.DELETE):
4346                then = self.expression(exp.Var, this=self._prev.text)
4347            else:
4348                then = None
4349
4350            whens.append(
4351                self.expression(
4352                    exp.When,
4353                    matched=matched,
4354                    source=source,
4355                    condition=condition,
4356                    then=then,
4357                )
4358            )
4359
4360        return self.expression(
4361            exp.Merge,
4362            this=target,
4363            using=using,
4364            on=on,
4365            expressions=whens,
4366        )
4367
4368    def _parse_show(self) -> t.Optional[exp.Expression]:
4369        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4370        if parser:
4371            return parser(self)
4372        self._advance()
4373        return self.expression(exp.Show, this=self._prev.text.upper())
4374
4375    def _parse_set_item_assignment(
4376        self, kind: t.Optional[str] = None
4377    ) -> t.Optional[exp.Expression]:
4378        index = self._index
4379
4380        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4381            return self._parse_set_transaction(global_=kind == "GLOBAL")
4382
4383        left = self._parse_primary() or self._parse_id_var()
4384
4385        if not self._match_texts(("=", "TO")):
4386            self._retreat(index)
4387            return None
4388
4389        right = self._parse_statement() or self._parse_id_var()
4390        this = self.expression(
4391            exp.EQ,
4392            this=left,
4393            expression=right,
4394        )
4395
4396        return self.expression(
4397            exp.SetItem,
4398            this=this,
4399            kind=kind,
4400        )
4401
4402    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4403        self._match_text_seq("TRANSACTION")
4404        characteristics = self._parse_csv(
4405            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4406        )
4407        return self.expression(
4408            exp.SetItem,
4409            expressions=characteristics,
4410            kind="TRANSACTION",
4411            **{"global": global_},  # type: ignore
4412        )
4413
4414    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4415        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4416        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4417
4418    def _parse_set(self) -> exp.Expression:
4419        index = self._index
4420        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4421
4422        if self._curr:
4423            self._retreat(index)
4424            return self._parse_as_command(self._prev)
4425
4426        return set_
4427
4428    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4429        for option in options:
4430            if self._match_text_seq(*option.split(" ")):
4431                return exp.Var(this=option)
4432        return None
4433
4434    def _parse_as_command(self, start: Token) -> exp.Command:
4435        while self._curr:
4436            self._advance()
4437        text = self._find_sql(start, self._prev)
4438        size = len(start.text)
4439        return exp.Command(this=text[:size], expression=text[size:])
4440
4441    def _find_parser(
4442        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4443    ) -> t.Optional[t.Callable]:
4444        if not self._curr:
4445            return None
4446
4447        index = self._index
4448        this = []
4449        while True:
4450            # The current token might be multiple words
4451            curr = self._curr.text.upper()
4452            key = curr.split(" ")
4453            this.append(curr)
4454            self._advance()
4455            result, trie = in_trie(trie, key)
4456            if result == 0:
4457                break
4458            if result == 2:
4459                subparser = parsers[" ".join(this)]
4460                return subparser
4461        self._retreat(index)
4462        return None
4463
4464    def _match(self, token_type, advance=True, expression=None):
4465        if not self._curr:
4466            return None
4467
4468        if self._curr.token_type == token_type:
4469            if advance:
4470                self._advance()
4471            self._add_comments(expression)
4472            return True
4473
4474        return None
4475
4476    def _match_set(self, types, advance=True):
4477        if not self._curr:
4478            return None
4479
4480        if self._curr.token_type in types:
4481            if advance:
4482                self._advance()
4483            return True
4484
4485        return None
4486
4487    def _match_pair(self, token_type_a, token_type_b, advance=True):
4488        if not self._curr or not self._next:
4489            return None
4490
4491        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4492            if advance:
4493                self._advance(2)
4494            return True
4495
4496        return None
4497
4498    def _match_l_paren(self, expression=None):
4499        if not self._match(TokenType.L_PAREN, expression=expression):
4500            self.raise_error("Expecting (")
4501
4502    def _match_r_paren(self, expression=None):
4503        if not self._match(TokenType.R_PAREN, expression=expression):
4504            self.raise_error("Expecting )")
4505
4506    def _match_texts(self, texts, advance=True):
4507        if self._curr and self._curr.text.upper() in texts:
4508            if advance:
4509                self._advance()
4510            return True
4511        return False
4512
4513    def _match_text_seq(self, *texts, advance=True):
4514        index = self._index
4515        for text in texts:
4516            if self._curr and self._curr.text.upper() == text:
4517                self._advance()
4518            else:
4519                self._retreat(index)
4520                return False
4521
4522        if not advance:
4523            self._retreat(index)
4524
4525        return True
4526
4527    def _replace_columns_with_dots(self, this):
4528        if isinstance(this, exp.Dot):
4529            exp.replace_children(this, self._replace_columns_with_dots)
4530        elif isinstance(this, exp.Column):
4531            exp.replace_children(this, self._replace_columns_with_dots)
4532            table = this.args.get("table")
4533            this = (
4534                self.expression(exp.Dot, this=table, expression=this.this)
4535                if table
4536                else self.expression(exp.Var, this=this.name)
4537            )
4538        elif isinstance(this, exp.Identifier):
4539            this = self.expression(exp.Var, this=this.name)
4540        return this
4541
4542    def _replace_lambda(self, node, lambda_variables):
4543        for column in node.find_all(exp.Column):
4544            if column.parts[0].name in lambda_variables:
4545                dot_or_id = column.to_dot() if column.table else column.this
4546                parent = column.parent
4547
4548                while isinstance(parent, exp.Dot):
4549                    if not isinstance(parent.parent, exp.Dot):
4550                        parent.replace(dot_or_id)
4551                        break
4552                    parent = parent.parent
4553                else:
4554                    if column is node:
4555                        node = dot_or_id
4556                    else:
4557                        column.replace(dot_or_id)
4558        return node
def parse_var_map(args: Sequence) -> sqlglot.expressions.Expression:
19def parse_var_map(args: t.Sequence) -> exp.Expression:
20    if len(args) == 1 and args[0].is_star:
21        return exp.StarMap(this=args[0])
22
23    keys = []
24    values = []
25    for i in range(0, len(args), 2):
26        keys.append(args[i])
27        values.append(args[i + 1])
28    return exp.VarMap(
29        keys=exp.Array(expressions=keys),
30        values=exp.Array(expressions=values),
31    )
def parse_like(args):
34def parse_like(args):
35    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
36    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
def binary_range_parser( expr_type: Type[sqlglot.expressions.Expression]) -> Callable[[sqlglot.parser.Parser, Optional[sqlglot.expressions.Expression]], Optional[sqlglot.expressions.Expression]]:
39def binary_range_parser(
40    expr_type: t.Type[exp.Expression],
41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
42    return lambda self, this: self._parse_escape(
43        self.expression(expr_type, this=this, expression=self._parse_bitwise())
44    )
class Parser:
  56class Parser(metaclass=_Parser):
  57    """
  58    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  59    a parsed syntax tree.
  60
  61    Args:
  62        error_level: the desired error level.
  63            Default: ErrorLevel.RAISE
  64        error_message_context: determines the amount of context to capture from a
  65            query string when displaying the error message (in number of characters).
  66            Default: 50.
  67        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  68            Default: 0
  69        alias_post_tablesample: If the table alias comes after tablesample.
  70            Default: False
  71        max_errors: Maximum number of error messages to include in a raised ParseError.
  72            This is only relevant if error_level is ErrorLevel.RAISE.
  73            Default: 3
  74        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  75            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  76            Default: "nulls_are_small"
  77    """
  78
  79    FUNCTIONS: t.Dict[str, t.Callable] = {
  80        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  81        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  82            this=seq_get(args, 0),
  83            to=exp.DataType(this=exp.DataType.Type.TEXT),
  84        ),
  85        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  86        "IFNULL": exp.Coalesce.from_arg_list,
  87        "LIKE": parse_like,
  88        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  89            this=seq_get(args, 0),
  90            to=exp.DataType(this=exp.DataType.Type.TEXT),
  91        ),
  92        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  93            this=exp.Cast(
  94                this=seq_get(args, 0),
  95                to=exp.DataType(this=exp.DataType.Type.TEXT),
  96            ),
  97            start=exp.Literal.number(1),
  98            length=exp.Literal.number(10),
  99        ),
 100        "VAR_MAP": parse_var_map,
 101    }
 102
 103    NO_PAREN_FUNCTIONS = {
 104        TokenType.CURRENT_DATE: exp.CurrentDate,
 105        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 106        TokenType.CURRENT_TIME: exp.CurrentTime,
 107        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 108        TokenType.CURRENT_USER: exp.CurrentUser,
 109    }
 110
 111    JOIN_HINTS: t.Set[str] = set()
 112
 113    NESTED_TYPE_TOKENS = {
 114        TokenType.ARRAY,
 115        TokenType.MAP,
 116        TokenType.NULLABLE,
 117        TokenType.STRUCT,
 118    }
 119
 120    TYPE_TOKENS = {
 121        TokenType.BIT,
 122        TokenType.BOOLEAN,
 123        TokenType.TINYINT,
 124        TokenType.UTINYINT,
 125        TokenType.SMALLINT,
 126        TokenType.USMALLINT,
 127        TokenType.INT,
 128        TokenType.UINT,
 129        TokenType.BIGINT,
 130        TokenType.UBIGINT,
 131        TokenType.INT128,
 132        TokenType.UINT128,
 133        TokenType.INT256,
 134        TokenType.UINT256,
 135        TokenType.FLOAT,
 136        TokenType.DOUBLE,
 137        TokenType.CHAR,
 138        TokenType.NCHAR,
 139        TokenType.VARCHAR,
 140        TokenType.NVARCHAR,
 141        TokenType.TEXT,
 142        TokenType.MEDIUMTEXT,
 143        TokenType.LONGTEXT,
 144        TokenType.MEDIUMBLOB,
 145        TokenType.LONGBLOB,
 146        TokenType.BINARY,
 147        TokenType.VARBINARY,
 148        TokenType.JSON,
 149        TokenType.JSONB,
 150        TokenType.INTERVAL,
 151        TokenType.TIME,
 152        TokenType.TIMESTAMP,
 153        TokenType.TIMESTAMPTZ,
 154        TokenType.TIMESTAMPLTZ,
 155        TokenType.DATETIME,
 156        TokenType.DATETIME64,
 157        TokenType.DATE,
 158        TokenType.DECIMAL,
 159        TokenType.BIGDECIMAL,
 160        TokenType.UUID,
 161        TokenType.GEOGRAPHY,
 162        TokenType.GEOMETRY,
 163        TokenType.HLLSKETCH,
 164        TokenType.HSTORE,
 165        TokenType.PSEUDO_TYPE,
 166        TokenType.SUPER,
 167        TokenType.SERIAL,
 168        TokenType.SMALLSERIAL,
 169        TokenType.BIGSERIAL,
 170        TokenType.XML,
 171        TokenType.UNIQUEIDENTIFIER,
 172        TokenType.MONEY,
 173        TokenType.SMALLMONEY,
 174        TokenType.ROWVERSION,
 175        TokenType.IMAGE,
 176        TokenType.VARIANT,
 177        TokenType.OBJECT,
 178        TokenType.INET,
 179        *NESTED_TYPE_TOKENS,
 180    }
 181
 182    SUBQUERY_PREDICATES = {
 183        TokenType.ANY: exp.Any,
 184        TokenType.ALL: exp.All,
 185        TokenType.EXISTS: exp.Exists,
 186        TokenType.SOME: exp.Any,
 187    }
 188
 189    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 190
 191    DB_CREATABLES = {
 192        TokenType.DATABASE,
 193        TokenType.SCHEMA,
 194        TokenType.TABLE,
 195        TokenType.VIEW,
 196    }
 197
 198    CREATABLES = {
 199        TokenType.COLUMN,
 200        TokenType.FUNCTION,
 201        TokenType.INDEX,
 202        TokenType.PROCEDURE,
 203        *DB_CREATABLES,
 204    }
 205
 206    ID_VAR_TOKENS = {
 207        TokenType.VAR,
 208        TokenType.ANTI,
 209        TokenType.APPLY,
 210        TokenType.AUTO_INCREMENT,
 211        TokenType.BEGIN,
 212        TokenType.BOTH,
 213        TokenType.BUCKET,
 214        TokenType.CACHE,
 215        TokenType.CASCADE,
 216        TokenType.COLLATE,
 217        TokenType.COMMAND,
 218        TokenType.COMMENT,
 219        TokenType.COMMIT,
 220        TokenType.COMPOUND,
 221        TokenType.CONSTRAINT,
 222        TokenType.DEFAULT,
 223        TokenType.DELETE,
 224        TokenType.DESCRIBE,
 225        TokenType.DIV,
 226        TokenType.END,
 227        TokenType.EXECUTE,
 228        TokenType.ESCAPE,
 229        TokenType.FALSE,
 230        TokenType.FIRST,
 231        TokenType.FILTER,
 232        TokenType.FOLLOWING,
 233        TokenType.FORMAT,
 234        TokenType.FULL,
 235        TokenType.IF,
 236        TokenType.IS,
 237        TokenType.ISNULL,
 238        TokenType.INTERVAL,
 239        TokenType.KEEP,
 240        TokenType.LAZY,
 241        TokenType.LEADING,
 242        TokenType.LEFT,
 243        TokenType.LOCAL,
 244        TokenType.MATERIALIZED,
 245        TokenType.MERGE,
 246        TokenType.NATURAL,
 247        TokenType.NEXT,
 248        TokenType.OFFSET,
 249        TokenType.ONLY,
 250        TokenType.OPTIONS,
 251        TokenType.ORDINALITY,
 252        TokenType.OVERWRITE,
 253        TokenType.PARTITION,
 254        TokenType.PERCENT,
 255        TokenType.PIVOT,
 256        TokenType.PRAGMA,
 257        TokenType.PRECEDING,
 258        TokenType.RANGE,
 259        TokenType.REFERENCES,
 260        TokenType.RIGHT,
 261        TokenType.ROW,
 262        TokenType.ROWS,
 263        TokenType.SEED,
 264        TokenType.SEMI,
 265        TokenType.SET,
 266        TokenType.SETTINGS,
 267        TokenType.SHOW,
 268        TokenType.SORTKEY,
 269        TokenType.TEMPORARY,
 270        TokenType.TOP,
 271        TokenType.TRAILING,
 272        TokenType.TRUE,
 273        TokenType.UNBOUNDED,
 274        TokenType.UNIQUE,
 275        TokenType.UNLOGGED,
 276        TokenType.UNPIVOT,
 277        TokenType.VOLATILE,
 278        TokenType.WINDOW,
 279        *CREATABLES,
 280        *SUBQUERY_PREDICATES,
 281        *TYPE_TOKENS,
 282        *NO_PAREN_FUNCTIONS,
 283    }
 284
 285    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 286
 287    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 288        TokenType.APPLY,
 289        TokenType.FULL,
 290        TokenType.LEFT,
 291        TokenType.LOCK,
 292        TokenType.NATURAL,
 293        TokenType.OFFSET,
 294        TokenType.RIGHT,
 295        TokenType.WINDOW,
 296    }
 297
 298    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 299
 300    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 301
 302    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 303
 304    FUNC_TOKENS = {
 305        TokenType.COMMAND,
 306        TokenType.CURRENT_DATE,
 307        TokenType.CURRENT_DATETIME,
 308        TokenType.CURRENT_TIMESTAMP,
 309        TokenType.CURRENT_TIME,
 310        TokenType.CURRENT_USER,
 311        TokenType.FILTER,
 312        TokenType.FIRST,
 313        TokenType.FORMAT,
 314        TokenType.GLOB,
 315        TokenType.IDENTIFIER,
 316        TokenType.INDEX,
 317        TokenType.ISNULL,
 318        TokenType.ILIKE,
 319        TokenType.LIKE,
 320        TokenType.MERGE,
 321        TokenType.OFFSET,
 322        TokenType.PRIMARY_KEY,
 323        TokenType.RANGE,
 324        TokenType.REPLACE,
 325        TokenType.ROW,
 326        TokenType.UNNEST,
 327        TokenType.VAR,
 328        TokenType.LEFT,
 329        TokenType.RIGHT,
 330        TokenType.DATE,
 331        TokenType.DATETIME,
 332        TokenType.TABLE,
 333        TokenType.TIMESTAMP,
 334        TokenType.TIMESTAMPTZ,
 335        TokenType.WINDOW,
 336        *TYPE_TOKENS,
 337        *SUBQUERY_PREDICATES,
 338    }
 339
 340    CONJUNCTION = {
 341        TokenType.AND: exp.And,
 342        TokenType.OR: exp.Or,
 343    }
 344
 345    EQUALITY = {
 346        TokenType.EQ: exp.EQ,
 347        TokenType.NEQ: exp.NEQ,
 348        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 349    }
 350
 351    COMPARISON = {
 352        TokenType.GT: exp.GT,
 353        TokenType.GTE: exp.GTE,
 354        TokenType.LT: exp.LT,
 355        TokenType.LTE: exp.LTE,
 356    }
 357
 358    BITWISE = {
 359        TokenType.AMP: exp.BitwiseAnd,
 360        TokenType.CARET: exp.BitwiseXor,
 361        TokenType.PIPE: exp.BitwiseOr,
 362        TokenType.DPIPE: exp.DPipe,
 363    }
 364
 365    TERM = {
 366        TokenType.DASH: exp.Sub,
 367        TokenType.PLUS: exp.Add,
 368        TokenType.MOD: exp.Mod,
 369        TokenType.COLLATE: exp.Collate,
 370    }
 371
 372    FACTOR = {
 373        TokenType.DIV: exp.IntDiv,
 374        TokenType.LR_ARROW: exp.Distance,
 375        TokenType.SLASH: exp.Div,
 376        TokenType.STAR: exp.Mul,
 377    }
 378
 379    TIMESTAMPS = {
 380        TokenType.TIME,
 381        TokenType.TIMESTAMP,
 382        TokenType.TIMESTAMPTZ,
 383        TokenType.TIMESTAMPLTZ,
 384    }
 385
 386    SET_OPERATIONS = {
 387        TokenType.UNION,
 388        TokenType.INTERSECT,
 389        TokenType.EXCEPT,
 390    }
 391
 392    JOIN_SIDES = {
 393        TokenType.LEFT,
 394        TokenType.RIGHT,
 395        TokenType.FULL,
 396    }
 397
 398    JOIN_KINDS = {
 399        TokenType.INNER,
 400        TokenType.OUTER,
 401        TokenType.CROSS,
 402        TokenType.SEMI,
 403        TokenType.ANTI,
 404    }
 405
 406    LAMBDAS = {
 407        TokenType.ARROW: lambda self, expressions: self.expression(
 408            exp.Lambda,
 409            this=self._replace_lambda(
 410                self._parse_conjunction(),
 411                {node.name for node in expressions},
 412            ),
 413            expressions=expressions,
 414        ),
 415        TokenType.FARROW: lambda self, expressions: self.expression(
 416            exp.Kwarg,
 417            this=exp.Var(this=expressions[0].name),
 418            expression=self._parse_conjunction(),
 419        ),
 420    }
 421
 422    COLUMN_OPERATORS = {
 423        TokenType.DOT: None,
 424        TokenType.DCOLON: lambda self, this, to: self.expression(
 425            exp.Cast if self.STRICT_CAST else exp.TryCast,
 426            this=this,
 427            to=to,
 428        ),
 429        TokenType.ARROW: lambda self, this, path: self.expression(
 430            exp.JSONExtract,
 431            this=this,
 432            expression=path,
 433        ),
 434        TokenType.DARROW: lambda self, this, path: self.expression(
 435            exp.JSONExtractScalar,
 436            this=this,
 437            expression=path,
 438        ),
 439        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 440            exp.JSONBExtract,
 441            this=this,
 442            expression=path,
 443        ),
 444        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 445            exp.JSONBExtractScalar,
 446            this=this,
 447            expression=path,
 448        ),
 449        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 450            exp.JSONBContains,
 451            this=this,
 452            expression=key,
 453        ),
 454    }
 455
 456    EXPRESSION_PARSERS = {
 457        exp.Column: lambda self: self._parse_column(),
 458        exp.DataType: lambda self: self._parse_types(),
 459        exp.From: lambda self: self._parse_from(),
 460        exp.Group: lambda self: self._parse_group(),
 461        exp.Identifier: lambda self: self._parse_id_var(),
 462        exp.Lateral: lambda self: self._parse_lateral(),
 463        exp.Join: lambda self: self._parse_join(),
 464        exp.Order: lambda self: self._parse_order(),
 465        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 466        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 467        exp.Lambda: lambda self: self._parse_lambda(),
 468        exp.Limit: lambda self: self._parse_limit(),
 469        exp.Offset: lambda self: self._parse_offset(),
 470        exp.TableAlias: lambda self: self._parse_table_alias(),
 471        exp.Table: lambda self: self._parse_table(),
 472        exp.Condition: lambda self: self._parse_conjunction(),
 473        exp.Expression: lambda self: self._parse_statement(),
 474        exp.Properties: lambda self: self._parse_properties(),
 475        exp.Where: lambda self: self._parse_where(),
 476        exp.Ordered: lambda self: self._parse_ordered(),
 477        exp.Having: lambda self: self._parse_having(),
 478        exp.With: lambda self: self._parse_with(),
 479        exp.Window: lambda self: self._parse_named_window(),
 480        exp.Qualify: lambda self: self._parse_qualify(),
 481        exp.Returning: lambda self: self._parse_returning(),
 482        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 483    }
 484
 485    STATEMENT_PARSERS = {
 486        TokenType.ALTER: lambda self: self._parse_alter(),
 487        TokenType.BEGIN: lambda self: self._parse_transaction(),
 488        TokenType.CACHE: lambda self: self._parse_cache(),
 489        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 490        TokenType.COMMENT: lambda self: self._parse_comment(),
 491        TokenType.CREATE: lambda self: self._parse_create(),
 492        TokenType.DELETE: lambda self: self._parse_delete(),
 493        TokenType.DESC: lambda self: self._parse_describe(),
 494        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 495        TokenType.DROP: lambda self: self._parse_drop(),
 496        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 497        TokenType.INSERT: lambda self: self._parse_insert(),
 498        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 499        TokenType.MERGE: lambda self: self._parse_merge(),
 500        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 501        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 502        TokenType.SET: lambda self: self._parse_set(),
 503        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 504        TokenType.UPDATE: lambda self: self._parse_update(),
 505        TokenType.USE: lambda self: self.expression(
 506            exp.Use,
 507            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 508            and exp.Var(this=self._prev.text),
 509            this=self._parse_table(schema=False),
 510        ),
 511    }
 512
 513    UNARY_PARSERS = {
 514        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 515        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 516        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 517        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 518    }
 519
 520    PRIMARY_PARSERS = {
 521        TokenType.STRING: lambda self, token: self.expression(
 522            exp.Literal, this=token.text, is_string=True
 523        ),
 524        TokenType.NUMBER: lambda self, token: self.expression(
 525            exp.Literal, this=token.text, is_string=False
 526        ),
 527        TokenType.STAR: lambda self, _: self.expression(
 528            exp.Star,
 529            **{"except": self._parse_except(), "replace": self._parse_replace()},
 530        ),
 531        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 532        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 533        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 534        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 535        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 536        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 537        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 538        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 539        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 540    }
 541
 542    PLACEHOLDER_PARSERS = {
 543        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 544        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 545        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 546        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 547        else None,
 548    }
 549
 550    RANGE_PARSERS = {
 551        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 552        TokenType.GLOB: binary_range_parser(exp.Glob),
 553        TokenType.ILIKE: binary_range_parser(exp.ILike),
 554        TokenType.IN: lambda self, this: self._parse_in(this),
 555        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 556        TokenType.IS: lambda self, this: self._parse_is(this),
 557        TokenType.LIKE: binary_range_parser(exp.Like),
 558        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 559        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 560        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 561    }
 562
 563    PROPERTY_PARSERS = {
 564        "AFTER": lambda self: self._parse_afterjournal(
 565            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 566        ),
 567        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 568        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 569        "BEFORE": lambda self: self._parse_journal(
 570            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 571        ),
 572        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 573        "CHARACTER SET": lambda self: self._parse_character_set(),
 574        "CHECKSUM": lambda self: self._parse_checksum(),
 575        "CLUSTER BY": lambda self: self.expression(
 576            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 577        ),
 578        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 579        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 580        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 581            default=self._prev.text.upper() == "DEFAULT"
 582        ),
 583        "DEFINER": lambda self: self._parse_definer(),
 584        "DETERMINISTIC": lambda self: self.expression(
 585            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 586        ),
 587        "DISTKEY": lambda self: self._parse_distkey(),
 588        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 589        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 590        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 591        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 592        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 593        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 594        "FREESPACE": lambda self: self._parse_freespace(),
 595        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 596        "IMMUTABLE": lambda self: self.expression(
 597            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 598        ),
 599        "JOURNAL": lambda self: self._parse_journal(
 600            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 601        ),
 602        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 603        "LIKE": lambda self: self._parse_create_like(),
 604        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 605        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 606        "LOCK": lambda self: self._parse_locking(),
 607        "LOCKING": lambda self: self._parse_locking(),
 608        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 609        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 610        "MAX": lambda self: self._parse_datablocksize(),
 611        "MAXIMUM": lambda self: self._parse_datablocksize(),
 612        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 613            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 614        ),
 615        "MIN": lambda self: self._parse_datablocksize(),
 616        "MINIMUM": lambda self: self._parse_datablocksize(),
 617        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 618        "NO": lambda self: self._parse_noprimaryindex(),
 619        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 620        "ON": lambda self: self._parse_oncommit(),
 621        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 622        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 623        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 624        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 625        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 626        "RETURNS": lambda self: self._parse_returns(),
 627        "ROW": lambda self: self._parse_row(),
 628        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 629        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 630        "SETTINGS": lambda self: self.expression(
 631            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 632        ),
 633        "SORTKEY": lambda self: self._parse_sortkey(),
 634        "STABLE": lambda self: self.expression(
 635            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 636        ),
 637        "STORED": lambda self: self._parse_stored(),
 638        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 639        "TEMP": lambda self: self._parse_temporary(global_=False),
 640        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 641        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 642        "TTL": lambda self: self._parse_ttl(),
 643        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 644        "VOLATILE": lambda self: self._parse_volatile_property(),
 645        "WITH": lambda self: self._parse_with_property(),
 646    }
 647
 648    CONSTRAINT_PARSERS = {
 649        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 650        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 651        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 652        "CHARACTER SET": lambda self: self.expression(
 653            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 654        ),
 655        "CHECK": lambda self: self.expression(
 656            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 657        ),
 658        "COLLATE": lambda self: self.expression(
 659            exp.CollateColumnConstraint, this=self._parse_var()
 660        ),
 661        "COMMENT": lambda self: self.expression(
 662            exp.CommentColumnConstraint, this=self._parse_string()
 663        ),
 664        "COMPRESS": lambda self: self._parse_compress(),
 665        "DEFAULT": lambda self: self.expression(
 666            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 667        ),
 668        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 669        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 670        "FORMAT": lambda self: self.expression(
 671            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 672        ),
 673        "GENERATED": lambda self: self._parse_generated_as_identity(),
 674        "IDENTITY": lambda self: self._parse_auto_increment(),
 675        "INLINE": lambda self: self._parse_inline(),
 676        "LIKE": lambda self: self._parse_create_like(),
 677        "NOT": lambda self: self._parse_not_constraint(),
 678        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 679        "ON": lambda self: self._match(TokenType.UPDATE)
 680        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 681        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 682        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 683        "REFERENCES": lambda self: self._parse_references(match=False),
 684        "TITLE": lambda self: self.expression(
 685            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 686        ),
 687        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 688        "UNIQUE": lambda self: self._parse_unique(),
 689        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 690    }
 691
 692    ALTER_PARSERS = {
 693        "ADD": lambda self: self._parse_alter_table_add(),
 694        "ALTER": lambda self: self._parse_alter_table_alter(),
 695        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 696        "DROP": lambda self: self._parse_alter_table_drop(),
 697        "RENAME": lambda self: self._parse_alter_table_rename(),
 698    }
 699
 700    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 701
 702    NO_PAREN_FUNCTION_PARSERS = {
 703        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 704        TokenType.CASE: lambda self: self._parse_case(),
 705        TokenType.IF: lambda self: self._parse_if(),
 706        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 707            exp.NextValueFor,
 708            this=self._parse_column(),
 709            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 710        ),
 711    }
 712
 713    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 714        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 715        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 716        "DECODE": lambda self: self._parse_decode(),
 717        "EXTRACT": lambda self: self._parse_extract(),
 718        "JSON_OBJECT": lambda self: self._parse_json_object(),
 719        "LOG": lambda self: self._parse_logarithm(),
 720        "MATCH": lambda self: self._parse_match_against(),
 721        "OPENJSON": lambda self: self._parse_open_json(),
 722        "POSITION": lambda self: self._parse_position(),
 723        "STRING_AGG": lambda self: self._parse_string_agg(),
 724        "SUBSTRING": lambda self: self._parse_substring(),
 725        "TRIM": lambda self: self._parse_trim(),
 726        "TRY_CAST": lambda self: self._parse_cast(False),
 727        "TRY_CONVERT": lambda self: self._parse_convert(False),
 728    }
 729
 730    QUERY_MODIFIER_PARSERS = {
 731        "joins": lambda self: list(iter(self._parse_join, None)),
 732        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 733        "match": lambda self: self._parse_match_recognize(),
 734        "where": lambda self: self._parse_where(),
 735        "group": lambda self: self._parse_group(),
 736        "having": lambda self: self._parse_having(),
 737        "qualify": lambda self: self._parse_qualify(),
 738        "windows": lambda self: self._parse_window_clause(),
 739        "order": lambda self: self._parse_order(),
 740        "limit": lambda self: self._parse_limit(),
 741        "offset": lambda self: self._parse_offset(),
 742        "locks": lambda self: self._parse_locks(),
 743        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 744    }
 745
 746    SET_PARSERS = {
 747        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 748        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 749        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 750        "TRANSACTION": lambda self: self._parse_set_transaction(),
 751    }
 752
 753    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 754
 755    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 756
 757    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 758
 759    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 760
 761    TRANSACTION_CHARACTERISTICS = {
 762        "ISOLATION LEVEL REPEATABLE READ",
 763        "ISOLATION LEVEL READ COMMITTED",
 764        "ISOLATION LEVEL READ UNCOMMITTED",
 765        "ISOLATION LEVEL SERIALIZABLE",
 766        "READ WRITE",
 767        "READ ONLY",
 768    }
 769
 770    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 771
 772    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 773
 774    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 775    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 776
 777    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 778
 779    STRICT_CAST = True
 780
 781    CONVERT_TYPE_FIRST = False
 782
 783    PREFIXED_PIVOT_COLUMNS = False
 784    IDENTIFY_PIVOT_STRINGS = False
 785
 786    LOG_BASE_FIRST = True
 787    LOG_DEFAULTS_TO_LN = False
 788
 789    __slots__ = (
 790        "error_level",
 791        "error_message_context",
 792        "sql",
 793        "errors",
 794        "index_offset",
 795        "unnest_column_only",
 796        "alias_post_tablesample",
 797        "max_errors",
 798        "null_ordering",
 799        "_tokens",
 800        "_index",
 801        "_curr",
 802        "_next",
 803        "_prev",
 804        "_prev_comments",
 805        "_show_trie",
 806        "_set_trie",
 807    )
 808
 809    def __init__(
 810        self,
 811        error_level: t.Optional[ErrorLevel] = None,
 812        error_message_context: int = 100,
 813        index_offset: int = 0,
 814        unnest_column_only: bool = False,
 815        alias_post_tablesample: bool = False,
 816        max_errors: int = 3,
 817        null_ordering: t.Optional[str] = None,
 818    ):
 819        self.error_level = error_level or ErrorLevel.IMMEDIATE
 820        self.error_message_context = error_message_context
 821        self.index_offset = index_offset
 822        self.unnest_column_only = unnest_column_only
 823        self.alias_post_tablesample = alias_post_tablesample
 824        self.max_errors = max_errors
 825        self.null_ordering = null_ordering
 826        self.reset()
 827
 828    def reset(self):
 829        self.sql = ""
 830        self.errors = []
 831        self._tokens = []
 832        self._index = 0
 833        self._curr = None
 834        self._next = None
 835        self._prev = None
 836        self._prev_comments = None
 837
 838    def parse(
 839        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 840    ) -> t.List[t.Optional[exp.Expression]]:
 841        """
 842        Parses a list of tokens and returns a list of syntax trees, one tree
 843        per parsed SQL statement.
 844
 845        Args:
 846            raw_tokens: the list of tokens.
 847            sql: the original SQL string, used to produce helpful debug messages.
 848
 849        Returns:
 850            The list of syntax trees.
 851        """
 852        return self._parse(
 853            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 854        )
 855
 856    def parse_into(
 857        self,
 858        expression_types: exp.IntoType,
 859        raw_tokens: t.List[Token],
 860        sql: t.Optional[str] = None,
 861    ) -> t.List[t.Optional[exp.Expression]]:
 862        """
 863        Parses a list of tokens into a given Expression type. If a collection of Expression
 864        types is given instead, this method will try to parse the token list into each one
 865        of them, stopping at the first for which the parsing succeeds.
 866
 867        Args:
 868            expression_types: the expression type(s) to try and parse the token list into.
 869            raw_tokens: the list of tokens.
 870            sql: the original SQL string, used to produce helpful debug messages.
 871
 872        Returns:
 873            The target Expression.
 874        """
 875        errors = []
 876        for expression_type in ensure_collection(expression_types):
 877            parser = self.EXPRESSION_PARSERS.get(expression_type)
 878            if not parser:
 879                raise TypeError(f"No parser registered for {expression_type}")
 880            try:
 881                return self._parse(parser, raw_tokens, sql)
 882            except ParseError as e:
 883                e.errors[0]["into_expression"] = expression_type
 884                errors.append(e)
 885        raise ParseError(
 886            f"Failed to parse into {expression_types}",
 887            errors=merge_errors(errors),
 888        ) from errors[-1]
 889
 890    def _parse(
 891        self,
 892        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 893        raw_tokens: t.List[Token],
 894        sql: t.Optional[str] = None,
 895    ) -> t.List[t.Optional[exp.Expression]]:
 896        self.reset()
 897        self.sql = sql or ""
 898        total = len(raw_tokens)
 899        chunks: t.List[t.List[Token]] = [[]]
 900
 901        for i, token in enumerate(raw_tokens):
 902            if token.token_type == TokenType.SEMICOLON:
 903                if i < total - 1:
 904                    chunks.append([])
 905            else:
 906                chunks[-1].append(token)
 907
 908        expressions = []
 909
 910        for tokens in chunks:
 911            self._index = -1
 912            self._tokens = tokens
 913            self._advance()
 914
 915            expressions.append(parse_method(self))
 916
 917            if self._index < len(self._tokens):
 918                self.raise_error("Invalid expression / Unexpected token")
 919
 920            self.check_errors()
 921
 922        return expressions
 923
 924    def check_errors(self) -> None:
 925        """
 926        Logs or raises any found errors, depending on the chosen error level setting.
 927        """
 928        if self.error_level == ErrorLevel.WARN:
 929            for error in self.errors:
 930                logger.error(str(error))
 931        elif self.error_level == ErrorLevel.RAISE and self.errors:
 932            raise ParseError(
 933                concat_messages(self.errors, self.max_errors),
 934                errors=merge_errors(self.errors),
 935            )
 936
 937    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 938        """
 939        Appends an error in the list of recorded errors or raises it, depending on the chosen
 940        error level setting.
 941        """
 942        token = token or self._curr or self._prev or Token.string("")
 943        start = token.start
 944        end = token.end
 945        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 946        highlight = self.sql[start:end]
 947        end_context = self.sql[end : end + self.error_message_context]
 948
 949        error = ParseError.new(
 950            f"{message}. Line {token.line}, Col: {token.col}.\n"
 951            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 952            description=message,
 953            line=token.line,
 954            col=token.col,
 955            start_context=start_context,
 956            highlight=highlight,
 957            end_context=end_context,
 958        )
 959
 960        if self.error_level == ErrorLevel.IMMEDIATE:
 961            raise error
 962
 963        self.errors.append(error)
 964
 965    def expression(
 966        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 967    ) -> E:
 968        """
 969        Creates a new, validated Expression.
 970
 971        Args:
 972            exp_class: the expression class to instantiate.
 973            comments: an optional list of comments to attach to the expression.
 974            kwargs: the arguments to set for the expression along with their respective values.
 975
 976        Returns:
 977            The target expression.
 978        """
 979        instance = exp_class(**kwargs)
 980        instance.add_comments(comments) if comments else self._add_comments(instance)
 981        self.validate_expression(instance)
 982        return instance
 983
 984    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 985        if expression and self._prev_comments:
 986            expression.add_comments(self._prev_comments)
 987            self._prev_comments = None
 988
 989    def validate_expression(
 990        self, expression: exp.Expression, args: t.Optional[t.List] = None
 991    ) -> None:
 992        """
 993        Validates an already instantiated expression, making sure that all its mandatory arguments
 994        are set.
 995
 996        Args:
 997            expression: the expression to validate.
 998            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 999        """
1000        if self.error_level == ErrorLevel.IGNORE:
1001            return
1002
1003        for error_message in expression.error_messages(args):
1004            self.raise_error(error_message)
1005
1006    def _find_sql(self, start: Token, end: Token) -> str:
1007        return self.sql[start.start : end.end]
1008
1009    def _advance(self, times: int = 1) -> None:
1010        self._index += times
1011        self._curr = seq_get(self._tokens, self._index)
1012        self._next = seq_get(self._tokens, self._index + 1)
1013        if self._index > 0:
1014            self._prev = self._tokens[self._index - 1]
1015            self._prev_comments = self._prev.comments
1016        else:
1017            self._prev = None
1018            self._prev_comments = None
1019
1020    def _retreat(self, index: int) -> None:
1021        if index != self._index:
1022            self._advance(index - self._index)
1023
1024    def _parse_command(self) -> exp.Command:
1025        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1026
1027    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1028        start = self._prev
1029        exists = self._parse_exists() if allow_exists else None
1030
1031        self._match(TokenType.ON)
1032
1033        kind = self._match_set(self.CREATABLES) and self._prev
1034
1035        if not kind:
1036            return self._parse_as_command(start)
1037
1038        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1039            this = self._parse_user_defined_function(kind=kind.token_type)
1040        elif kind.token_type == TokenType.TABLE:
1041            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1042        elif kind.token_type == TokenType.COLUMN:
1043            this = self._parse_column()
1044        else:
1045            this = self._parse_id_var()
1046
1047        self._match(TokenType.IS)
1048
1049        return self.expression(
1050            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1051        )
1052
1053    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1054    def _parse_ttl(self) -> exp.Expression:
1055        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1056            this = self._parse_bitwise()
1057
1058            if self._match_text_seq("DELETE"):
1059                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1060            if self._match_text_seq("RECOMPRESS"):
1061                return self.expression(
1062                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1063                )
1064            if self._match_text_seq("TO", "DISK"):
1065                return self.expression(
1066                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1067                )
1068            if self._match_text_seq("TO", "VOLUME"):
1069                return self.expression(
1070                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1071                )
1072
1073            return this
1074
1075        expressions = self._parse_csv(_parse_ttl_action)
1076        where = self._parse_where()
1077        group = self._parse_group()
1078
1079        aggregates = None
1080        if group and self._match(TokenType.SET):
1081            aggregates = self._parse_csv(self._parse_set_item)
1082
1083        return self.expression(
1084            exp.MergeTreeTTL,
1085            expressions=expressions,
1086            where=where,
1087            group=group,
1088            aggregates=aggregates,
1089        )
1090
1091    def _parse_statement(self) -> t.Optional[exp.Expression]:
1092        if self._curr is None:
1093            return None
1094
1095        if self._match_set(self.STATEMENT_PARSERS):
1096            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1097
1098        if self._match_set(Tokenizer.COMMANDS):
1099            return self._parse_command()
1100
1101        expression = self._parse_expression()
1102        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1103        return self._parse_query_modifiers(expression)
1104
1105    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1106        start = self._prev
1107        temporary = self._match(TokenType.TEMPORARY)
1108        materialized = self._match(TokenType.MATERIALIZED)
1109        kind = self._match_set(self.CREATABLES) and self._prev.text
1110        if not kind:
1111            return self._parse_as_command(start)
1112
1113        return self.expression(
1114            exp.Drop,
1115            exists=self._parse_exists(),
1116            this=self._parse_table(schema=True),
1117            kind=kind,
1118            temporary=temporary,
1119            materialized=materialized,
1120            cascade=self._match(TokenType.CASCADE),
1121            constraints=self._match_text_seq("CONSTRAINTS"),
1122            purge=self._match_text_seq("PURGE"),
1123        )
1124
1125    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1126        return (
1127            self._match(TokenType.IF)
1128            and (not not_ or self._match(TokenType.NOT))
1129            and self._match(TokenType.EXISTS)
1130        )
1131
1132    def _parse_create(self) -> t.Optional[exp.Expression]:
1133        start = self._prev
1134        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1135            TokenType.OR, TokenType.REPLACE
1136        )
1137        unique = self._match(TokenType.UNIQUE)
1138
1139        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1140            self._match(TokenType.TABLE)
1141
1142        properties = None
1143        create_token = self._match_set(self.CREATABLES) and self._prev
1144
1145        if not create_token:
1146            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1147            create_token = self._match_set(self.CREATABLES) and self._prev
1148
1149            if not properties or not create_token:
1150                return self._parse_as_command(start)
1151
1152        exists = self._parse_exists(not_=True)
1153        this = None
1154        expression = None
1155        indexes = None
1156        no_schema_binding = None
1157        begin = None
1158        clone = None
1159
1160        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1161            this = self._parse_user_defined_function(kind=create_token.token_type)
1162            temp_properties = self._parse_properties()
1163            if properties and temp_properties:
1164                properties.expressions.extend(temp_properties.expressions)
1165            elif temp_properties:
1166                properties = temp_properties
1167
1168            self._match(TokenType.ALIAS)
1169            begin = self._match(TokenType.BEGIN)
1170            return_ = self._match_text_seq("RETURN")
1171            expression = self._parse_statement()
1172
1173            if return_:
1174                expression = self.expression(exp.Return, this=expression)
1175        elif create_token.token_type == TokenType.INDEX:
1176            this = self._parse_index()
1177        elif create_token.token_type in self.DB_CREATABLES:
1178            table_parts = self._parse_table_parts(schema=True)
1179
1180            # exp.Properties.Location.POST_NAME
1181            if self._match(TokenType.COMMA):
1182                temp_properties = self._parse_properties(before=True)
1183                if properties and temp_properties:
1184                    properties.expressions.extend(temp_properties.expressions)
1185                elif temp_properties:
1186                    properties = temp_properties
1187
1188            this = self._parse_schema(this=table_parts)
1189
1190            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1191            temp_properties = self._parse_properties()
1192            if properties and temp_properties:
1193                properties.expressions.extend(temp_properties.expressions)
1194            elif temp_properties:
1195                properties = temp_properties
1196
1197            self._match(TokenType.ALIAS)
1198
1199            # exp.Properties.Location.POST_ALIAS
1200            if not (
1201                self._match(TokenType.SELECT, advance=False)
1202                or self._match(TokenType.WITH, advance=False)
1203                or self._match(TokenType.L_PAREN, advance=False)
1204            ):
1205                temp_properties = self._parse_properties()
1206                if properties and temp_properties:
1207                    properties.expressions.extend(temp_properties.expressions)
1208                elif temp_properties:
1209                    properties = temp_properties
1210
1211            expression = self._parse_ddl_select()
1212
1213            if create_token.token_type == TokenType.TABLE:
1214                # exp.Properties.Location.POST_EXPRESSION
1215                temp_properties = self._parse_properties()
1216                if properties and temp_properties:
1217                    properties.expressions.extend(temp_properties.expressions)
1218                elif temp_properties:
1219                    properties = temp_properties
1220
1221                indexes = []
1222                while True:
1223                    index = self._parse_create_table_index()
1224
1225                    # exp.Properties.Location.POST_INDEX
1226                    if self._match(TokenType.PARTITION_BY, advance=False):
1227                        temp_properties = self._parse_properties()
1228                        if properties and temp_properties:
1229                            properties.expressions.extend(temp_properties.expressions)
1230                        elif temp_properties:
1231                            properties = temp_properties
1232
1233                    if not index:
1234                        break
1235                    else:
1236                        indexes.append(index)
1237            elif create_token.token_type == TokenType.VIEW:
1238                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1239                    no_schema_binding = True
1240
1241            if self._match_text_seq("CLONE"):
1242                clone = self._parse_table(schema=True)
1243                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1244                clone_kind = (
1245                    self._match(TokenType.L_PAREN)
1246                    and self._match_texts(self.CLONE_KINDS)
1247                    and self._prev.text.upper()
1248                )
1249                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1250                self._match(TokenType.R_PAREN)
1251                clone = self.expression(
1252                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1253                )
1254
1255        return self.expression(
1256            exp.Create,
1257            this=this,
1258            kind=create_token.text,
1259            replace=replace,
1260            unique=unique,
1261            expression=expression,
1262            exists=exists,
1263            properties=properties,
1264            indexes=indexes,
1265            no_schema_binding=no_schema_binding,
1266            begin=begin,
1267            clone=clone,
1268        )
1269
1270    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1271        self._match(TokenType.COMMA)
1272
1273        # parsers look to _prev for no/dual/default, so need to consume first
1274        self._match_text_seq("NO")
1275        self._match_text_seq("DUAL")
1276        self._match_text_seq("DEFAULT")
1277
1278        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1279            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1280
1281        return None
1282
1283    def _parse_property(self) -> t.Optional[exp.Expression]:
1284        if self._match_texts(self.PROPERTY_PARSERS):
1285            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1286
1287        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1288            return self._parse_character_set(default=True)
1289
1290        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1291            return self._parse_sortkey(compound=True)
1292
1293        if self._match_text_seq("SQL", "SECURITY"):
1294            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1295
1296        assignment = self._match_pair(
1297            TokenType.VAR, TokenType.EQ, advance=False
1298        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1299
1300        if assignment:
1301            key = self._parse_var_or_string()
1302            self._match(TokenType.EQ)
1303            return self.expression(exp.Property, this=key, value=self._parse_column())
1304
1305        return None
1306
1307    def _parse_stored(self) -> exp.Expression:
1308        self._match(TokenType.ALIAS)
1309
1310        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1311        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1312
1313        return self.expression(
1314            exp.FileFormatProperty,
1315            this=self.expression(
1316                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1317            )
1318            if input_format or output_format
1319            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1320        )
1321
1322    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1323        self._match(TokenType.EQ)
1324        self._match(TokenType.ALIAS)
1325        return self.expression(exp_class, this=self._parse_field())
1326
1327    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1328        properties = []
1329
1330        while True:
1331            if before:
1332                identified_property = self._parse_property_before()
1333            else:
1334                identified_property = self._parse_property()
1335
1336            if not identified_property:
1337                break
1338            for p in ensure_list(identified_property):
1339                properties.append(p)
1340
1341        if properties:
1342            return self.expression(exp.Properties, expressions=properties)
1343
1344        return None
1345
1346    def _parse_fallback(self, no=False) -> exp.Expression:
1347        self._match_text_seq("FALLBACK")
1348        return self.expression(
1349            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1350        )
1351
1352    def _parse_volatile_property(self) -> exp.Expression:
1353        if self._index >= 2:
1354            pre_volatile_token = self._tokens[self._index - 2]
1355        else:
1356            pre_volatile_token = None
1357
1358        if pre_volatile_token and pre_volatile_token.token_type in (
1359            TokenType.CREATE,
1360            TokenType.REPLACE,
1361            TokenType.UNIQUE,
1362        ):
1363            return exp.VolatileProperty()
1364
1365        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1366
1367    def _parse_with_property(
1368        self,
1369    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1370        self._match(TokenType.WITH)
1371        if self._match(TokenType.L_PAREN, advance=False):
1372            return self._parse_wrapped_csv(self._parse_property)
1373
1374        if self._match_text_seq("JOURNAL"):
1375            return self._parse_withjournaltable()
1376
1377        if self._match_text_seq("DATA"):
1378            return self._parse_withdata(no=False)
1379        elif self._match_text_seq("NO", "DATA"):
1380            return self._parse_withdata(no=True)
1381
1382        if not self._next:
1383            return None
1384
1385        return self._parse_withisolatedloading()
1386
1387    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1388    def _parse_definer(self) -> t.Optional[exp.Expression]:
1389        self._match(TokenType.EQ)
1390
1391        user = self._parse_id_var()
1392        self._match(TokenType.PARAMETER)
1393        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1394
1395        if not user or not host:
1396            return None
1397
1398        return exp.DefinerProperty(this=f"{user}@{host}")
1399
1400    def _parse_withjournaltable(self) -> exp.Expression:
1401        self._match(TokenType.TABLE)
1402        self._match(TokenType.EQ)
1403        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1404
1405    def _parse_log(self, no=False) -> exp.Expression:
1406        self._match_text_seq("LOG")
1407        return self.expression(exp.LogProperty, no=no)
1408
1409    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1410        before = self._match_text_seq("BEFORE")
1411        self._match_text_seq("JOURNAL")
1412        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1413
1414    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1415        self._match_text_seq("NOT")
1416        self._match_text_seq("LOCAL")
1417        self._match_text_seq("AFTER", "JOURNAL")
1418        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1419
1420    def _parse_checksum(self) -> exp.Expression:
1421        self._match_text_seq("CHECKSUM")
1422        self._match(TokenType.EQ)
1423
1424        on = None
1425        if self._match(TokenType.ON):
1426            on = True
1427        elif self._match_text_seq("OFF"):
1428            on = False
1429        default = self._match(TokenType.DEFAULT)
1430
1431        return self.expression(
1432            exp.ChecksumProperty,
1433            on=on,
1434            default=default,
1435        )
1436
1437    def _parse_freespace(self) -> exp.Expression:
1438        self._match_text_seq("FREESPACE")
1439        self._match(TokenType.EQ)
1440        return self.expression(
1441            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1442        )
1443
1444    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1445        self._match_text_seq("MERGEBLOCKRATIO")
1446        if self._match(TokenType.EQ):
1447            return self.expression(
1448                exp.MergeBlockRatioProperty,
1449                this=self._parse_number(),
1450                percent=self._match(TokenType.PERCENT),
1451            )
1452        else:
1453            return self.expression(
1454                exp.MergeBlockRatioProperty,
1455                no=no,
1456                default=default,
1457            )
1458
1459    def _parse_datablocksize(self, default=None) -> exp.Expression:
1460        if default:
1461            self._match_text_seq("DATABLOCKSIZE")
1462            return self.expression(exp.DataBlocksizeProperty, default=True)
1463        elif self._match_texts(("MIN", "MINIMUM")):
1464            self._match_text_seq("DATABLOCKSIZE")
1465            return self.expression(exp.DataBlocksizeProperty, min=True)
1466        elif self._match_texts(("MAX", "MAXIMUM")):
1467            self._match_text_seq("DATABLOCKSIZE")
1468            return self.expression(exp.DataBlocksizeProperty, min=False)
1469
1470        self._match_text_seq("DATABLOCKSIZE")
1471        self._match(TokenType.EQ)
1472        size = self._parse_number()
1473        units = None
1474        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1475            units = self._prev.text
1476        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1477
1478    def _parse_blockcompression(self) -> exp.Expression:
1479        self._match_text_seq("BLOCKCOMPRESSION")
1480        self._match(TokenType.EQ)
1481        always = self._match_text_seq("ALWAYS")
1482        manual = self._match_text_seq("MANUAL")
1483        never = self._match_text_seq("NEVER")
1484        default = self._match_text_seq("DEFAULT")
1485        autotemp = None
1486        if self._match_text_seq("AUTOTEMP"):
1487            autotemp = self._parse_schema()
1488
1489        return self.expression(
1490            exp.BlockCompressionProperty,
1491            always=always,
1492            manual=manual,
1493            never=never,
1494            default=default,
1495            autotemp=autotemp,
1496        )
1497
1498    def _parse_withisolatedloading(self) -> exp.Expression:
1499        no = self._match_text_seq("NO")
1500        concurrent = self._match_text_seq("CONCURRENT")
1501        self._match_text_seq("ISOLATED", "LOADING")
1502        for_all = self._match_text_seq("FOR", "ALL")
1503        for_insert = self._match_text_seq("FOR", "INSERT")
1504        for_none = self._match_text_seq("FOR", "NONE")
1505        return self.expression(
1506            exp.IsolatedLoadingProperty,
1507            no=no,
1508            concurrent=concurrent,
1509            for_all=for_all,
1510            for_insert=for_insert,
1511            for_none=for_none,
1512        )
1513
1514    def _parse_locking(self) -> exp.Expression:
1515        if self._match(TokenType.TABLE):
1516            kind = "TABLE"
1517        elif self._match(TokenType.VIEW):
1518            kind = "VIEW"
1519        elif self._match(TokenType.ROW):
1520            kind = "ROW"
1521        elif self._match_text_seq("DATABASE"):
1522            kind = "DATABASE"
1523        else:
1524            kind = None
1525
1526        if kind in ("DATABASE", "TABLE", "VIEW"):
1527            this = self._parse_table_parts()
1528        else:
1529            this = None
1530
1531        if self._match(TokenType.FOR):
1532            for_or_in = "FOR"
1533        elif self._match(TokenType.IN):
1534            for_or_in = "IN"
1535        else:
1536            for_or_in = None
1537
1538        if self._match_text_seq("ACCESS"):
1539            lock_type = "ACCESS"
1540        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1541            lock_type = "EXCLUSIVE"
1542        elif self._match_text_seq("SHARE"):
1543            lock_type = "SHARE"
1544        elif self._match_text_seq("READ"):
1545            lock_type = "READ"
1546        elif self._match_text_seq("WRITE"):
1547            lock_type = "WRITE"
1548        elif self._match_text_seq("CHECKSUM"):
1549            lock_type = "CHECKSUM"
1550        else:
1551            lock_type = None
1552
1553        override = self._match_text_seq("OVERRIDE")
1554
1555        return self.expression(
1556            exp.LockingProperty,
1557            this=this,
1558            kind=kind,
1559            for_or_in=for_or_in,
1560            lock_type=lock_type,
1561            override=override,
1562        )
1563
1564    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1565        if self._match(TokenType.PARTITION_BY):
1566            return self._parse_csv(self._parse_conjunction)
1567        return []
1568
1569    def _parse_partitioned_by(self) -> exp.Expression:
1570        self._match(TokenType.EQ)
1571        return self.expression(
1572            exp.PartitionedByProperty,
1573            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1574        )
1575
1576    def _parse_withdata(self, no=False) -> exp.Expression:
1577        if self._match_text_seq("AND", "STATISTICS"):
1578            statistics = True
1579        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1580            statistics = False
1581        else:
1582            statistics = None
1583
1584        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1585
1586    def _parse_noprimaryindex(self) -> exp.Expression:
1587        self._match_text_seq("PRIMARY", "INDEX")
1588        return exp.NoPrimaryIndexProperty()
1589
1590    def _parse_oncommit(self) -> exp.Expression:
1591        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1592        return exp.OnCommitProperty()
1593
1594    def _parse_distkey(self) -> exp.Expression:
1595        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1596
1597    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1598        table = self._parse_table(schema=True)
1599        options = []
1600        while self._match_texts(("INCLUDING", "EXCLUDING")):
1601            this = self._prev.text.upper()
1602            id_var = self._parse_id_var()
1603
1604            if not id_var:
1605                return None
1606
1607            options.append(
1608                self.expression(
1609                    exp.Property,
1610                    this=this,
1611                    value=exp.Var(this=id_var.this.upper()),
1612                )
1613            )
1614        return self.expression(exp.LikeProperty, this=table, expressions=options)
1615
1616    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1617        return self.expression(
1618            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1619        )
1620
1621    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1622        self._match(TokenType.EQ)
1623        return self.expression(
1624            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1625        )
1626
1627    def _parse_returns(self) -> exp.Expression:
1628        value: t.Optional[exp.Expression]
1629        is_table = self._match(TokenType.TABLE)
1630
1631        if is_table:
1632            if self._match(TokenType.LT):
1633                value = self.expression(
1634                    exp.Schema,
1635                    this="TABLE",
1636                    expressions=self._parse_csv(self._parse_struct_types),
1637                )
1638                if not self._match(TokenType.GT):
1639                    self.raise_error("Expecting >")
1640            else:
1641                value = self._parse_schema(exp.Var(this="TABLE"))
1642        else:
1643            value = self._parse_types()
1644
1645        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1646
1647    def _parse_temporary(self, global_=False) -> exp.Expression:
1648        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1649        return self.expression(exp.TemporaryProperty, global_=global_)
1650
1651    def _parse_describe(self) -> exp.Expression:
1652        kind = self._match_set(self.CREATABLES) and self._prev.text
1653        this = self._parse_table()
1654
1655        return self.expression(exp.Describe, this=this, kind=kind)
1656
1657    def _parse_insert(self) -> exp.Expression:
1658        overwrite = self._match(TokenType.OVERWRITE)
1659        local = self._match(TokenType.LOCAL)
1660        alternative = None
1661
1662        if self._match_text_seq("DIRECTORY"):
1663            this: t.Optional[exp.Expression] = self.expression(
1664                exp.Directory,
1665                this=self._parse_var_or_string(),
1666                local=local,
1667                row_format=self._parse_row_format(match_row=True),
1668            )
1669        else:
1670            if self._match(TokenType.OR):
1671                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1672
1673            self._match(TokenType.INTO)
1674            self._match(TokenType.TABLE)
1675            this = self._parse_table(schema=True)
1676
1677        return self.expression(
1678            exp.Insert,
1679            this=this,
1680            exists=self._parse_exists(),
1681            partition=self._parse_partition(),
1682            expression=self._parse_ddl_select(),
1683            conflict=self._parse_on_conflict(),
1684            returning=self._parse_returning(),
1685            overwrite=overwrite,
1686            alternative=alternative,
1687        )
1688
1689    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1690        conflict = self._match_text_seq("ON", "CONFLICT")
1691        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1692
1693        if not (conflict or duplicate):
1694            return None
1695
1696        nothing = None
1697        expressions = None
1698        key = None
1699        constraint = None
1700
1701        if conflict:
1702            if self._match_text_seq("ON", "CONSTRAINT"):
1703                constraint = self._parse_id_var()
1704            else:
1705                key = self._parse_csv(self._parse_value)
1706
1707        self._match_text_seq("DO")
1708        if self._match_text_seq("NOTHING"):
1709            nothing = True
1710        else:
1711            self._match(TokenType.UPDATE)
1712            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1713
1714        return self.expression(
1715            exp.OnConflict,
1716            duplicate=duplicate,
1717            expressions=expressions,
1718            nothing=nothing,
1719            key=key,
1720            constraint=constraint,
1721        )
1722
1723    def _parse_returning(self) -> t.Optional[exp.Expression]:
1724        if not self._match(TokenType.RETURNING):
1725            return None
1726
1727        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1728
1729    def _parse_row(self) -> t.Optional[exp.Expression]:
1730        if not self._match(TokenType.FORMAT):
1731            return None
1732        return self._parse_row_format()
1733
1734    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1735        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1736            return None
1737
1738        if self._match_text_seq("SERDE"):
1739            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1740
1741        self._match_text_seq("DELIMITED")
1742
1743        kwargs = {}
1744
1745        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1746            kwargs["fields"] = self._parse_string()
1747            if self._match_text_seq("ESCAPED", "BY"):
1748                kwargs["escaped"] = self._parse_string()
1749        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1750            kwargs["collection_items"] = self._parse_string()
1751        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1752            kwargs["map_keys"] = self._parse_string()
1753        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1754            kwargs["lines"] = self._parse_string()
1755        if self._match_text_seq("NULL", "DEFINED", "AS"):
1756            kwargs["null"] = self._parse_string()
1757
1758        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1759
1760    def _parse_load_data(self) -> exp.Expression:
1761        local = self._match(TokenType.LOCAL)
1762        self._match_text_seq("INPATH")
1763        inpath = self._parse_string()
1764        overwrite = self._match(TokenType.OVERWRITE)
1765        self._match_pair(TokenType.INTO, TokenType.TABLE)
1766
1767        return self.expression(
1768            exp.LoadData,
1769            this=self._parse_table(schema=True),
1770            local=local,
1771            overwrite=overwrite,
1772            inpath=inpath,
1773            partition=self._parse_partition(),
1774            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1775            serde=self._match_text_seq("SERDE") and self._parse_string(),
1776        )
1777
1778    def _parse_delete(self) -> exp.Expression:
1779        self._match(TokenType.FROM)
1780
1781        return self.expression(
1782            exp.Delete,
1783            this=self._parse_table(),
1784            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1785            where=self._parse_where(),
1786            returning=self._parse_returning(),
1787        )
1788
1789    def _parse_update(self) -> exp.Expression:
1790        return self.expression(
1791            exp.Update,
1792            **{  # type: ignore
1793                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1794                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1795                "from": self._parse_from(modifiers=True),
1796                "where": self._parse_where(),
1797                "returning": self._parse_returning(),
1798            },
1799        )
1800
1801    def _parse_uncache(self) -> exp.Expression:
1802        if not self._match(TokenType.TABLE):
1803            self.raise_error("Expecting TABLE after UNCACHE")
1804
1805        return self.expression(
1806            exp.Uncache,
1807            exists=self._parse_exists(),
1808            this=self._parse_table(schema=True),
1809        )
1810
1811    def _parse_cache(self) -> exp.Expression:
1812        lazy = self._match(TokenType.LAZY)
1813        self._match(TokenType.TABLE)
1814        table = self._parse_table(schema=True)
1815        options = []
1816
1817        if self._match(TokenType.OPTIONS):
1818            self._match_l_paren()
1819            k = self._parse_string()
1820            self._match(TokenType.EQ)
1821            v = self._parse_string()
1822            options = [k, v]
1823            self._match_r_paren()
1824
1825        self._match(TokenType.ALIAS)
1826        return self.expression(
1827            exp.Cache,
1828            this=table,
1829            lazy=lazy,
1830            options=options,
1831            expression=self._parse_select(nested=True),
1832        )
1833
1834    def _parse_partition(self) -> t.Optional[exp.Expression]:
1835        if not self._match(TokenType.PARTITION):
1836            return None
1837
1838        return self.expression(
1839            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1840        )
1841
1842    def _parse_value(self) -> exp.Expression:
1843        if self._match(TokenType.L_PAREN):
1844            expressions = self._parse_csv(self._parse_conjunction)
1845            self._match_r_paren()
1846            return self.expression(exp.Tuple, expressions=expressions)
1847
1848        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1849        # Source: https://prestodb.io/docs/current/sql/values.html
1850        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1851
1852    def _parse_select(
1853        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1854    ) -> t.Optional[exp.Expression]:
1855        cte = self._parse_with()
1856        if cte:
1857            this = self._parse_statement()
1858
1859            if not this:
1860                self.raise_error("Failed to parse any statement following CTE")
1861                return cte
1862
1863            if "with" in this.arg_types:
1864                this.set("with", cte)
1865            else:
1866                self.raise_error(f"{this.key} does not support CTE")
1867                this = cte
1868        elif self._match(TokenType.SELECT):
1869            comments = self._prev_comments
1870
1871            hint = self._parse_hint()
1872            all_ = self._match(TokenType.ALL)
1873            distinct = self._match(TokenType.DISTINCT)
1874
1875            kind = (
1876                self._match(TokenType.ALIAS)
1877                and self._match_texts(("STRUCT", "VALUE"))
1878                and self._prev.text
1879            )
1880
1881            if distinct:
1882                distinct = self.expression(
1883                    exp.Distinct,
1884                    on=self._parse_value() if self._match(TokenType.ON) else None,
1885                )
1886
1887            if all_ and distinct:
1888                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1889
1890            limit = self._parse_limit(top=True)
1891            expressions = self._parse_csv(self._parse_expression)
1892
1893            this = self.expression(
1894                exp.Select,
1895                kind=kind,
1896                hint=hint,
1897                distinct=distinct,
1898                expressions=expressions,
1899                limit=limit,
1900            )
1901            this.comments = comments
1902
1903            into = self._parse_into()
1904            if into:
1905                this.set("into", into)
1906
1907            from_ = self._parse_from()
1908            if from_:
1909                this.set("from", from_)
1910
1911            this = self._parse_query_modifiers(this)
1912        elif (table or nested) and self._match(TokenType.L_PAREN):
1913            this = self._parse_table() if table else self._parse_select(nested=True)
1914            this = self._parse_set_operations(self._parse_query_modifiers(this))
1915            self._match_r_paren()
1916
1917            # early return so that subquery unions aren't parsed again
1918            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1919            # Union ALL should be a property of the top select node, not the subquery
1920            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1921        elif self._match(TokenType.VALUES):
1922            this = self.expression(
1923                exp.Values,
1924                expressions=self._parse_csv(self._parse_value),
1925                alias=self._parse_table_alias(),
1926            )
1927        else:
1928            this = None
1929
1930        return self._parse_set_operations(this)
1931
1932    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1933        if not skip_with_token and not self._match(TokenType.WITH):
1934            return None
1935
1936        comments = self._prev_comments
1937        recursive = self._match(TokenType.RECURSIVE)
1938
1939        expressions = []
1940        while True:
1941            expressions.append(self._parse_cte())
1942
1943            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1944                break
1945            else:
1946                self._match(TokenType.WITH)
1947
1948        return self.expression(
1949            exp.With, comments=comments, expressions=expressions, recursive=recursive
1950        )
1951
1952    def _parse_cte(self) -> exp.Expression:
1953        alias = self._parse_table_alias()
1954        if not alias or not alias.this:
1955            self.raise_error("Expected CTE to have alias")
1956
1957        self._match(TokenType.ALIAS)
1958
1959        return self.expression(
1960            exp.CTE,
1961            this=self._parse_wrapped(self._parse_statement),
1962            alias=alias,
1963        )
1964
1965    def _parse_table_alias(
1966        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1967    ) -> t.Optional[exp.Expression]:
1968        any_token = self._match(TokenType.ALIAS)
1969        alias = (
1970            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1971            or self._parse_string_as_identifier()
1972        )
1973
1974        index = self._index
1975        if self._match(TokenType.L_PAREN):
1976            columns = self._parse_csv(self._parse_function_parameter)
1977            self._match_r_paren() if columns else self._retreat(index)
1978        else:
1979            columns = None
1980
1981        if not alias and not columns:
1982            return None
1983
1984        return self.expression(exp.TableAlias, this=alias, columns=columns)
1985
1986    def _parse_subquery(
1987        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1988    ) -> exp.Expression:
1989        return self.expression(
1990            exp.Subquery,
1991            this=this,
1992            pivots=self._parse_pivots(),
1993            alias=self._parse_table_alias() if parse_alias else None,
1994        )
1995
1996    def _parse_query_modifiers(
1997        self, this: t.Optional[exp.Expression]
1998    ) -> t.Optional[exp.Expression]:
1999        if isinstance(this, self.MODIFIABLES):
2000            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
2001                expression = parser(self)
2002
2003                if expression:
2004                    this.set(key, expression)
2005        return this
2006
2007    def _parse_hint(self) -> t.Optional[exp.Expression]:
2008        if self._match(TokenType.HINT):
2009            hints = self._parse_csv(self._parse_function)
2010            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2011                self.raise_error("Expected */ after HINT")
2012            return self.expression(exp.Hint, expressions=hints)
2013
2014        return None
2015
2016    def _parse_into(self) -> t.Optional[exp.Expression]:
2017        if not self._match(TokenType.INTO):
2018            return None
2019
2020        temp = self._match(TokenType.TEMPORARY)
2021        unlogged = self._match(TokenType.UNLOGGED)
2022        self._match(TokenType.TABLE)
2023
2024        return self.expression(
2025            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2026        )
2027
2028    def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]:
2029        if not self._match(TokenType.FROM):
2030            return None
2031
2032        comments = self._prev_comments
2033        this = self._parse_table()
2034
2035        return self.expression(
2036            exp.From,
2037            comments=comments,
2038            this=self._parse_query_modifiers(this) if modifiers else this,
2039        )
2040
2041    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2042        if not self._match(TokenType.MATCH_RECOGNIZE):
2043            return None
2044
2045        self._match_l_paren()
2046
2047        partition = self._parse_partition_by()
2048        order = self._parse_order()
2049        measures = (
2050            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2051        )
2052
2053        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2054            rows = exp.Var(this="ONE ROW PER MATCH")
2055        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2056            text = "ALL ROWS PER MATCH"
2057            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2058                text += f" SHOW EMPTY MATCHES"
2059            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2060                text += f" OMIT EMPTY MATCHES"
2061            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2062                text += f" WITH UNMATCHED ROWS"
2063            rows = exp.Var(this=text)
2064        else:
2065            rows = None
2066
2067        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2068            text = "AFTER MATCH SKIP"
2069            if self._match_text_seq("PAST", "LAST", "ROW"):
2070                text += f" PAST LAST ROW"
2071            elif self._match_text_seq("TO", "NEXT", "ROW"):
2072                text += f" TO NEXT ROW"
2073            elif self._match_text_seq("TO", "FIRST"):
2074                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2075            elif self._match_text_seq("TO", "LAST"):
2076                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2077            after = exp.Var(this=text)
2078        else:
2079            after = None
2080
2081        if self._match_text_seq("PATTERN"):
2082            self._match_l_paren()
2083
2084            if not self._curr:
2085                self.raise_error("Expecting )", self._curr)
2086
2087            paren = 1
2088            start = self._curr
2089
2090            while self._curr and paren > 0:
2091                if self._curr.token_type == TokenType.L_PAREN:
2092                    paren += 1
2093                if self._curr.token_type == TokenType.R_PAREN:
2094                    paren -= 1
2095                end = self._prev
2096                self._advance()
2097            if paren > 0:
2098                self.raise_error("Expecting )", self._curr)
2099            pattern = exp.Var(this=self._find_sql(start, end))
2100        else:
2101            pattern = None
2102
2103        define = (
2104            self._parse_csv(
2105                lambda: self.expression(
2106                    exp.Alias,
2107                    alias=self._parse_id_var(any_token=True),
2108                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2109                )
2110            )
2111            if self._match_text_seq("DEFINE")
2112            else None
2113        )
2114
2115        self._match_r_paren()
2116
2117        return self.expression(
2118            exp.MatchRecognize,
2119            partition_by=partition,
2120            order=order,
2121            measures=measures,
2122            rows=rows,
2123            after=after,
2124            pattern=pattern,
2125            define=define,
2126            alias=self._parse_table_alias(),
2127        )
2128
2129    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2130        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2131        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2132
2133        if outer_apply or cross_apply:
2134            this = self._parse_select(table=True)
2135            view = None
2136            outer = not cross_apply
2137        elif self._match(TokenType.LATERAL):
2138            this = self._parse_select(table=True)
2139            view = self._match(TokenType.VIEW)
2140            outer = self._match(TokenType.OUTER)
2141        else:
2142            return None
2143
2144        if not this:
2145            this = self._parse_function() or self._parse_id_var(any_token=False)
2146            while self._match(TokenType.DOT):
2147                this = exp.Dot(
2148                    this=this,
2149                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2150                )
2151
2152        table_alias: t.Optional[exp.Expression]
2153
2154        if view:
2155            table = self._parse_id_var(any_token=False)
2156            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2157            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2158        else:
2159            table_alias = self._parse_table_alias()
2160
2161        expression = self.expression(
2162            exp.Lateral,
2163            this=this,
2164            view=view,
2165            outer=outer,
2166            alias=table_alias,
2167        )
2168
2169        return expression
2170
2171    def _parse_join_side_and_kind(
2172        self,
2173    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2174        return (
2175            self._match(TokenType.NATURAL) and self._prev,
2176            self._match_set(self.JOIN_SIDES) and self._prev,
2177            self._match_set(self.JOIN_KINDS) and self._prev,
2178        )
2179
2180    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2181        if self._match(TokenType.COMMA):
2182            return self.expression(exp.Join, this=self._parse_table())
2183
2184        index = self._index
2185        natural, side, kind = self._parse_join_side_and_kind()
2186        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2187        join = self._match(TokenType.JOIN)
2188
2189        if not skip_join_token and not join:
2190            self._retreat(index)
2191            kind = None
2192            natural = None
2193            side = None
2194
2195        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2196        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2197
2198        if not skip_join_token and not join and not outer_apply and not cross_apply:
2199            return None
2200
2201        if outer_apply:
2202            side = Token(TokenType.LEFT, "LEFT")
2203
2204        kwargs: t.Dict[
2205            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2206        ] = {"this": self._parse_table()}
2207
2208        if natural:
2209            kwargs["natural"] = True
2210        if side:
2211            kwargs["side"] = side.text
2212        if kind:
2213            kwargs["kind"] = kind.text
2214        if hint:
2215            kwargs["hint"] = hint
2216
2217        if self._match(TokenType.ON):
2218            kwargs["on"] = self._parse_conjunction()
2219        elif self._match(TokenType.USING):
2220            kwargs["using"] = self._parse_wrapped_id_vars()
2221
2222        return self.expression(exp.Join, **kwargs)  # type: ignore
2223
2224    def _parse_index(self) -> exp.Expression:
2225        index = self._parse_id_var()
2226        self._match(TokenType.ON)
2227        self._match(TokenType.TABLE)  # hive
2228
2229        return self.expression(
2230            exp.Index,
2231            this=index,
2232            table=self.expression(exp.Table, this=self._parse_id_var()),
2233            columns=self._parse_expression(),
2234        )
2235
2236    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2237        unique = self._match(TokenType.UNIQUE)
2238        primary = self._match_text_seq("PRIMARY")
2239        amp = self._match_text_seq("AMP")
2240        if not self._match(TokenType.INDEX):
2241            return None
2242        index = self._parse_id_var()
2243        columns = None
2244        if self._match(TokenType.L_PAREN, advance=False):
2245            columns = self._parse_wrapped_csv(self._parse_column)
2246        return self.expression(
2247            exp.Index,
2248            this=index,
2249            columns=columns,
2250            unique=unique,
2251            primary=primary,
2252            amp=amp,
2253        )
2254
2255    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2256        return (
2257            (not schema and self._parse_function())
2258            or self._parse_id_var(any_token=False)
2259            or self._parse_string_as_identifier()
2260            or self._parse_placeholder()
2261        )
2262
2263    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2264        catalog = None
2265        db = None
2266        table = self._parse_table_part(schema=schema)
2267
2268        while self._match(TokenType.DOT):
2269            if catalog:
2270                # This allows nesting the table in arbitrarily many dot expressions if needed
2271                table = self.expression(
2272                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2273                )
2274            else:
2275                catalog = db
2276                db = table
2277                table = self._parse_table_part(schema=schema)
2278
2279        if not table:
2280            self.raise_error(f"Expected table name but got {self._curr}")
2281
2282        return self.expression(
2283            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2284        )
2285
2286    def _parse_table(
2287        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2288    ) -> t.Optional[exp.Expression]:
2289        lateral = self._parse_lateral()
2290        if lateral:
2291            return lateral
2292
2293        unnest = self._parse_unnest()
2294        if unnest:
2295            return unnest
2296
2297        values = self._parse_derived_table_values()
2298        if values:
2299            return values
2300
2301        subquery = self._parse_select(table=True)
2302        if subquery:
2303            if not subquery.args.get("pivots"):
2304                subquery.set("pivots", self._parse_pivots())
2305            return subquery
2306
2307        this = self._parse_table_parts(schema=schema)
2308
2309        if schema:
2310            return self._parse_schema(this=this)
2311
2312        if self.alias_post_tablesample:
2313            table_sample = self._parse_table_sample()
2314
2315        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2316        if alias:
2317            this.set("alias", alias)
2318
2319        if not this.args.get("pivots"):
2320            this.set("pivots", self._parse_pivots())
2321
2322        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2323            this.set(
2324                "hints",
2325                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2326            )
2327            self._match_r_paren()
2328
2329        if not self.alias_post_tablesample:
2330            table_sample = self._parse_table_sample()
2331
2332        if table_sample:
2333            table_sample.set("this", this)
2334            this = table_sample
2335
2336        return this
2337
2338    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2339        if not self._match(TokenType.UNNEST):
2340            return None
2341
2342        expressions = self._parse_wrapped_csv(self._parse_type)
2343        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2344        alias = self._parse_table_alias()
2345
2346        if alias and self.unnest_column_only:
2347            if alias.args.get("columns"):
2348                self.raise_error("Unexpected extra column alias in unnest.")
2349            alias.set("columns", [alias.this])
2350            alias.set("this", None)
2351
2352        offset = None
2353        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2354            self._match(TokenType.ALIAS)
2355            offset = self._parse_id_var() or exp.Identifier(this="offset")
2356
2357        return self.expression(
2358            exp.Unnest,
2359            expressions=expressions,
2360            ordinality=ordinality,
2361            alias=alias,
2362            offset=offset,
2363        )
2364
2365    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2366        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2367        if not is_derived and not self._match(TokenType.VALUES):
2368            return None
2369
2370        expressions = self._parse_csv(self._parse_value)
2371
2372        if is_derived:
2373            self._match_r_paren()
2374
2375        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2376
2377    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2378        if not self._match(TokenType.TABLE_SAMPLE) and not (
2379            as_modifier and self._match_text_seq("USING", "SAMPLE")
2380        ):
2381            return None
2382
2383        bucket_numerator = None
2384        bucket_denominator = None
2385        bucket_field = None
2386        percent = None
2387        rows = None
2388        size = None
2389        seed = None
2390
2391        kind = (
2392            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2393        )
2394        method = self._parse_var(tokens=(TokenType.ROW,))
2395
2396        self._match(TokenType.L_PAREN)
2397
2398        num = self._parse_number()
2399
2400        if self._match(TokenType.BUCKET):
2401            bucket_numerator = self._parse_number()
2402            self._match(TokenType.OUT_OF)
2403            bucket_denominator = bucket_denominator = self._parse_number()
2404            self._match(TokenType.ON)
2405            bucket_field = self._parse_field()
2406        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2407            percent = num
2408        elif self._match(TokenType.ROWS):
2409            rows = num
2410        else:
2411            size = num
2412
2413        self._match(TokenType.R_PAREN)
2414
2415        if self._match(TokenType.L_PAREN):
2416            method = self._parse_var()
2417            seed = self._match(TokenType.COMMA) and self._parse_number()
2418            self._match_r_paren()
2419        elif self._match_texts(("SEED", "REPEATABLE")):
2420            seed = self._parse_wrapped(self._parse_number)
2421
2422        return self.expression(
2423            exp.TableSample,
2424            method=method,
2425            bucket_numerator=bucket_numerator,
2426            bucket_denominator=bucket_denominator,
2427            bucket_field=bucket_field,
2428            percent=percent,
2429            rows=rows,
2430            size=size,
2431            seed=seed,
2432            kind=kind,
2433        )
2434
2435    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2436        return list(iter(self._parse_pivot, None))
2437
2438    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2439        index = self._index
2440
2441        if self._match(TokenType.PIVOT):
2442            unpivot = False
2443        elif self._match(TokenType.UNPIVOT):
2444            unpivot = True
2445        else:
2446            return None
2447
2448        expressions = []
2449        field = None
2450
2451        if not self._match(TokenType.L_PAREN):
2452            self._retreat(index)
2453            return None
2454
2455        if unpivot:
2456            expressions = self._parse_csv(self._parse_column)
2457        else:
2458            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2459
2460        if not expressions:
2461            self.raise_error("Failed to parse PIVOT's aggregation list")
2462
2463        if not self._match(TokenType.FOR):
2464            self.raise_error("Expecting FOR")
2465
2466        value = self._parse_column()
2467
2468        if not self._match(TokenType.IN):
2469            self.raise_error("Expecting IN")
2470
2471        field = self._parse_in(value)
2472
2473        self._match_r_paren()
2474
2475        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2476
2477        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2478            pivot.set("alias", self._parse_table_alias())
2479
2480        if not unpivot:
2481            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2482
2483            columns: t.List[exp.Expression] = []
2484            for fld in pivot.args["field"].expressions:
2485                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2486                for name in names:
2487                    if self.PREFIXED_PIVOT_COLUMNS:
2488                        name = f"{name}_{field_name}" if name else field_name
2489                    else:
2490                        name = f"{field_name}_{name}" if name else field_name
2491
2492                    columns.append(exp.to_identifier(name))
2493
2494            pivot.set("columns", columns)
2495
2496        return pivot
2497
2498    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2499        return [agg.alias for agg in aggregations]
2500
2501    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2502        if not skip_where_token and not self._match(TokenType.WHERE):
2503            return None
2504
2505        return self.expression(
2506            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2507        )
2508
2509    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2510        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2511            return None
2512
2513        elements = defaultdict(list)
2514
2515        while True:
2516            expressions = self._parse_csv(self._parse_conjunction)
2517            if expressions:
2518                elements["expressions"].extend(expressions)
2519
2520            grouping_sets = self._parse_grouping_sets()
2521            if grouping_sets:
2522                elements["grouping_sets"].extend(grouping_sets)
2523
2524            rollup = None
2525            cube = None
2526            totals = None
2527
2528            with_ = self._match(TokenType.WITH)
2529            if self._match(TokenType.ROLLUP):
2530                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2531                elements["rollup"].extend(ensure_list(rollup))
2532
2533            if self._match(TokenType.CUBE):
2534                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2535                elements["cube"].extend(ensure_list(cube))
2536
2537            if self._match_text_seq("TOTALS"):
2538                totals = True
2539                elements["totals"] = True  # type: ignore
2540
2541            if not (grouping_sets or rollup or cube or totals):
2542                break
2543
2544        return self.expression(exp.Group, **elements)  # type: ignore
2545
2546    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2547        if not self._match(TokenType.GROUPING_SETS):
2548            return None
2549
2550        return self._parse_wrapped_csv(self._parse_grouping_set)
2551
2552    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2553        if self._match(TokenType.L_PAREN):
2554            grouping_set = self._parse_csv(self._parse_column)
2555            self._match_r_paren()
2556            return self.expression(exp.Tuple, expressions=grouping_set)
2557
2558        return self._parse_column()
2559
2560    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2561        if not skip_having_token and not self._match(TokenType.HAVING):
2562            return None
2563        return self.expression(exp.Having, this=self._parse_conjunction())
2564
2565    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2566        if not self._match(TokenType.QUALIFY):
2567            return None
2568        return self.expression(exp.Qualify, this=self._parse_conjunction())
2569
2570    def _parse_order(
2571        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2572    ) -> t.Optional[exp.Expression]:
2573        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2574            return this
2575
2576        return self.expression(
2577            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2578        )
2579
2580    def _parse_sort(
2581        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2582    ) -> t.Optional[exp.Expression]:
2583        if not self._match(token_type):
2584            return None
2585        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2586
2587    def _parse_ordered(self) -> exp.Expression:
2588        this = self._parse_conjunction()
2589        self._match(TokenType.ASC)
2590        is_desc = self._match(TokenType.DESC)
2591        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2592        is_nulls_last = self._match(TokenType.NULLS_LAST)
2593        desc = is_desc or False
2594        asc = not desc
2595        nulls_first = is_nulls_first or False
2596        explicitly_null_ordered = is_nulls_first or is_nulls_last
2597        if (
2598            not explicitly_null_ordered
2599            and (
2600                (asc and self.null_ordering == "nulls_are_small")
2601                or (desc and self.null_ordering != "nulls_are_small")
2602            )
2603            and self.null_ordering != "nulls_are_last"
2604        ):
2605            nulls_first = True
2606
2607        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2608
2609    def _parse_limit(
2610        self, this: t.Optional[exp.Expression] = None, top: bool = False
2611    ) -> t.Optional[exp.Expression]:
2612        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2613            limit_paren = self._match(TokenType.L_PAREN)
2614            limit_exp = self.expression(
2615                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2616            )
2617
2618            if limit_paren:
2619                self._match_r_paren()
2620
2621            return limit_exp
2622
2623        if self._match(TokenType.FETCH):
2624            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2625            direction = self._prev.text if direction else "FIRST"
2626
2627            count = self._parse_number()
2628            percent = self._match(TokenType.PERCENT)
2629
2630            self._match_set((TokenType.ROW, TokenType.ROWS))
2631
2632            only = self._match(TokenType.ONLY)
2633            with_ties = self._match_text_seq("WITH", "TIES")
2634
2635            if only and with_ties:
2636                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2637
2638            return self.expression(
2639                exp.Fetch,
2640                direction=direction,
2641                count=count,
2642                percent=percent,
2643                with_ties=with_ties,
2644            )
2645
2646        return this
2647
2648    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2649        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2650            return this
2651
2652        count = self._parse_number()
2653        self._match_set((TokenType.ROW, TokenType.ROWS))
2654        return self.expression(exp.Offset, this=this, expression=count)
2655
2656    def _parse_locks(self) -> t.List[exp.Expression]:
2657        # Lists are invariant, so we need to use a type hint here
2658        locks: t.List[exp.Expression] = []
2659
2660        while True:
2661            if self._match_text_seq("FOR", "UPDATE"):
2662                update = True
2663            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2664                "LOCK", "IN", "SHARE", "MODE"
2665            ):
2666                update = False
2667            else:
2668                break
2669
2670            expressions = None
2671            if self._match_text_seq("OF"):
2672                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2673
2674            wait: t.Optional[bool | exp.Expression] = None
2675            if self._match_text_seq("NOWAIT"):
2676                wait = True
2677            elif self._match_text_seq("WAIT"):
2678                wait = self._parse_primary()
2679            elif self._match_text_seq("SKIP", "LOCKED"):
2680                wait = False
2681
2682            locks.append(
2683                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2684            )
2685
2686        return locks
2687
2688    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2689        if not self._match_set(self.SET_OPERATIONS):
2690            return this
2691
2692        token_type = self._prev.token_type
2693
2694        if token_type == TokenType.UNION:
2695            expression = exp.Union
2696        elif token_type == TokenType.EXCEPT:
2697            expression = exp.Except
2698        else:
2699            expression = exp.Intersect
2700
2701        return self.expression(
2702            expression,
2703            this=this,
2704            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2705            expression=self._parse_set_operations(self._parse_select(nested=True)),
2706        )
2707
2708    def _parse_expression(self) -> t.Optional[exp.Expression]:
2709        return self._parse_alias(self._parse_conjunction())
2710
2711    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2712        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2713
2714    def _parse_equality(self) -> t.Optional[exp.Expression]:
2715        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2716
2717    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2718        return self._parse_tokens(self._parse_range, self.COMPARISON)
2719
2720    def _parse_range(self) -> t.Optional[exp.Expression]:
2721        this = self._parse_bitwise()
2722        negate = self._match(TokenType.NOT)
2723
2724        if self._match_set(self.RANGE_PARSERS):
2725            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2726            if not expression:
2727                return this
2728
2729            this = expression
2730        elif self._match(TokenType.ISNULL):
2731            this = self.expression(exp.Is, this=this, expression=exp.Null())
2732
2733        # Postgres supports ISNULL and NOTNULL for conditions.
2734        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2735        if self._match(TokenType.NOTNULL):
2736            this = self.expression(exp.Is, this=this, expression=exp.Null())
2737            this = self.expression(exp.Not, this=this)
2738
2739        if negate:
2740            this = self.expression(exp.Not, this=this)
2741
2742        if self._match(TokenType.IS):
2743            this = self._parse_is(this)
2744
2745        return this
2746
2747    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2748        index = self._index - 1
2749        negate = self._match(TokenType.NOT)
2750        if self._match(TokenType.DISTINCT_FROM):
2751            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2752            return self.expression(klass, this=this, expression=self._parse_expression())
2753
2754        expression = self._parse_null() or self._parse_boolean()
2755        if not expression:
2756            self._retreat(index)
2757            return None
2758
2759        this = self.expression(exp.Is, this=this, expression=expression)
2760        return self.expression(exp.Not, this=this) if negate else this
2761
2762    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2763        unnest = self._parse_unnest()
2764        if unnest:
2765            this = self.expression(exp.In, this=this, unnest=unnest)
2766        elif self._match(TokenType.L_PAREN):
2767            expressions = self._parse_csv(self._parse_select_or_expression)
2768
2769            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2770                this = self.expression(exp.In, this=this, query=expressions[0])
2771            else:
2772                this = self.expression(exp.In, this=this, expressions=expressions)
2773
2774            self._match_r_paren(this)
2775        else:
2776            this = self.expression(exp.In, this=this, field=self._parse_field())
2777
2778        return this
2779
2780    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2781        low = self._parse_bitwise()
2782        self._match(TokenType.AND)
2783        high = self._parse_bitwise()
2784        return self.expression(exp.Between, this=this, low=low, high=high)
2785
2786    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2787        if not self._match(TokenType.ESCAPE):
2788            return this
2789        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2790
2791    def _parse_interval(self) -> t.Optional[exp.Expression]:
2792        if not self._match(TokenType.INTERVAL):
2793            return None
2794
2795        this = self._parse_primary() or self._parse_term()
2796        unit = self._parse_function() or self._parse_var()
2797
2798        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2799        # each INTERVAL expression into this canonical form so it's easy to transpile
2800        if this and isinstance(this, exp.Literal):
2801            if this.is_number:
2802                this = exp.Literal.string(this.name)
2803
2804            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2805            parts = this.name.split()
2806            if not unit and len(parts) <= 2:
2807                this = exp.Literal.string(seq_get(parts, 0))
2808                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2809
2810        return self.expression(exp.Interval, this=this, unit=unit)
2811
2812    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2813        this = self._parse_term()
2814
2815        while True:
2816            if self._match_set(self.BITWISE):
2817                this = self.expression(
2818                    self.BITWISE[self._prev.token_type],
2819                    this=this,
2820                    expression=self._parse_term(),
2821                )
2822            elif self._match_pair(TokenType.LT, TokenType.LT):
2823                this = self.expression(
2824                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2825                )
2826            elif self._match_pair(TokenType.GT, TokenType.GT):
2827                this = self.expression(
2828                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2829                )
2830            else:
2831                break
2832
2833        return this
2834
2835    def _parse_term(self) -> t.Optional[exp.Expression]:
2836        return self._parse_tokens(self._parse_factor, self.TERM)
2837
2838    def _parse_factor(self) -> t.Optional[exp.Expression]:
2839        return self._parse_tokens(self._parse_unary, self.FACTOR)
2840
2841    def _parse_unary(self) -> t.Optional[exp.Expression]:
2842        if self._match_set(self.UNARY_PARSERS):
2843            return self.UNARY_PARSERS[self._prev.token_type](self)
2844        return self._parse_at_time_zone(self._parse_type())
2845
2846    def _parse_type(self) -> t.Optional[exp.Expression]:
2847        interval = self._parse_interval()
2848        if interval:
2849            return interval
2850
2851        index = self._index
2852        data_type = self._parse_types(check_func=True)
2853        this = self._parse_column()
2854
2855        if data_type:
2856            if isinstance(this, exp.Literal):
2857                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2858                if parser:
2859                    return parser(self, this, data_type)
2860                return self.expression(exp.Cast, this=this, to=data_type)
2861            if not data_type.expressions:
2862                self._retreat(index)
2863                return self._parse_column()
2864            return data_type
2865
2866        return this
2867
2868    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2869        this = self._parse_type()
2870        if not this:
2871            return None
2872
2873        return self.expression(
2874            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2875        )
2876
2877    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2878        index = self._index
2879
2880        prefix = self._match_text_seq("SYSUDTLIB", ".")
2881
2882        if not self._match_set(self.TYPE_TOKENS):
2883            return None
2884
2885        type_token = self._prev.token_type
2886
2887        if type_token == TokenType.PSEUDO_TYPE:
2888            return self.expression(exp.PseudoType, this=self._prev.text)
2889
2890        nested = type_token in self.NESTED_TYPE_TOKENS
2891        is_struct = type_token == TokenType.STRUCT
2892        expressions = None
2893        maybe_func = False
2894
2895        if self._match(TokenType.L_PAREN):
2896            if is_struct:
2897                expressions = self._parse_csv(self._parse_struct_types)
2898            elif nested:
2899                expressions = self._parse_csv(self._parse_types)
2900            else:
2901                expressions = self._parse_csv(self._parse_type_size)
2902
2903            if not expressions or not self._match(TokenType.R_PAREN):
2904                self._retreat(index)
2905                return None
2906
2907            maybe_func = True
2908
2909        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2910            this = exp.DataType(
2911                this=exp.DataType.Type.ARRAY,
2912                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2913                nested=True,
2914            )
2915
2916            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2917                this = exp.DataType(
2918                    this=exp.DataType.Type.ARRAY,
2919                    expressions=[this],
2920                    nested=True,
2921                )
2922
2923            return this
2924
2925        if self._match(TokenType.L_BRACKET):
2926            self._retreat(index)
2927            return None
2928
2929        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2930        if nested and self._match(TokenType.LT):
2931            if is_struct:
2932                expressions = self._parse_csv(self._parse_struct_types)
2933            else:
2934                expressions = self._parse_csv(self._parse_types)
2935
2936            if not self._match(TokenType.GT):
2937                self.raise_error("Expecting >")
2938
2939            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2940                values = self._parse_csv(self._parse_conjunction)
2941                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2942
2943        value: t.Optional[exp.Expression] = None
2944        if type_token in self.TIMESTAMPS:
2945            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2946                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2947            elif (
2948                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2949            ):
2950                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2951            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2952                if type_token == TokenType.TIME:
2953                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2954                else:
2955                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2956
2957            maybe_func = maybe_func and value is None
2958
2959            if value is None:
2960                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2961        elif type_token == TokenType.INTERVAL:
2962            unit = self._parse_var()
2963
2964            if not unit:
2965                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2966            else:
2967                value = self.expression(exp.Interval, unit=unit)
2968
2969        if maybe_func and check_func:
2970            index2 = self._index
2971            peek = self._parse_string()
2972
2973            if not peek:
2974                self._retreat(index)
2975                return None
2976
2977            self._retreat(index2)
2978
2979        if value:
2980            return value
2981
2982        return exp.DataType(
2983            this=exp.DataType.Type[type_token.value.upper()],
2984            expressions=expressions,
2985            nested=nested,
2986            values=values,
2987            prefix=prefix,
2988        )
2989
2990    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
2991        this = self._parse_type() or self._parse_id_var()
2992        self._match(TokenType.COLON)
2993        return self._parse_column_def(this)
2994
2995    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2996        if not self._match(TokenType.AT_TIME_ZONE):
2997            return this
2998        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2999
3000    def _parse_column(self) -> t.Optional[exp.Expression]:
3001        this = self._parse_field()
3002        if isinstance(this, exp.Identifier):
3003            this = self.expression(exp.Column, this=this)
3004        elif not this:
3005            return self._parse_bracket(this)
3006        this = self._parse_bracket(this)
3007
3008        while self._match_set(self.COLUMN_OPERATORS):
3009            op_token = self._prev.token_type
3010            op = self.COLUMN_OPERATORS.get(op_token)
3011
3012            if op_token == TokenType.DCOLON:
3013                field = self._parse_types()
3014                if not field:
3015                    self.raise_error("Expected type")
3016            elif op and self._curr:
3017                self._advance()
3018                value = self._prev.text
3019                field = (
3020                    exp.Literal.number(value)
3021                    if self._prev.token_type == TokenType.NUMBER
3022                    else exp.Literal.string(value)
3023                )
3024            else:
3025                field = (
3026                    self._parse_star()
3027                    or self._parse_function(anonymous=True)
3028                    or self._parse_id_var()
3029                )
3030
3031            if isinstance(field, exp.Func):
3032                # bigquery allows function calls like x.y.count(...)
3033                # SAFE.SUBSTR(...)
3034                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3035                this = self._replace_columns_with_dots(this)
3036
3037            if op:
3038                this = op(self, this, field)
3039            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3040                this = self.expression(
3041                    exp.Column,
3042                    this=field,
3043                    table=this.this,
3044                    db=this.args.get("table"),
3045                    catalog=this.args.get("db"),
3046                )
3047            else:
3048                this = self.expression(exp.Dot, this=this, expression=field)
3049            this = self._parse_bracket(this)
3050
3051        return this
3052
3053    def _parse_primary(self) -> t.Optional[exp.Expression]:
3054        if self._match_set(self.PRIMARY_PARSERS):
3055            token_type = self._prev.token_type
3056            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3057
3058            if token_type == TokenType.STRING:
3059                expressions = [primary]
3060                while self._match(TokenType.STRING):
3061                    expressions.append(exp.Literal.string(self._prev.text))
3062                if len(expressions) > 1:
3063                    return self.expression(exp.Concat, expressions=expressions)
3064            return primary
3065
3066        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3067            return exp.Literal.number(f"0.{self._prev.text}")
3068
3069        if self._match(TokenType.L_PAREN):
3070            comments = self._prev_comments
3071            query = self._parse_select()
3072
3073            if query:
3074                expressions = [query]
3075            else:
3076                expressions = self._parse_csv(self._parse_expression)
3077
3078            this = self._parse_query_modifiers(seq_get(expressions, 0))
3079
3080            if isinstance(this, exp.Subqueryable):
3081                this = self._parse_set_operations(
3082                    self._parse_subquery(this=this, parse_alias=False)
3083                )
3084            elif len(expressions) > 1:
3085                this = self.expression(exp.Tuple, expressions=expressions)
3086            else:
3087                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3088
3089            if this:
3090                this.add_comments(comments)
3091            self._match_r_paren(expression=this)
3092
3093            return this
3094
3095        return None
3096
3097    def _parse_field(
3098        self,
3099        any_token: bool = False,
3100        tokens: t.Optional[t.Collection[TokenType]] = None,
3101    ) -> t.Optional[exp.Expression]:
3102        return (
3103            self._parse_primary()
3104            or self._parse_function()
3105            or self._parse_id_var(any_token=any_token, tokens=tokens)
3106        )
3107
3108    def _parse_function(
3109        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3110    ) -> t.Optional[exp.Expression]:
3111        if not self._curr:
3112            return None
3113
3114        token_type = self._curr.token_type
3115
3116        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3117            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3118
3119        if not self._next or self._next.token_type != TokenType.L_PAREN:
3120            if token_type in self.NO_PAREN_FUNCTIONS:
3121                self._advance()
3122                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3123
3124            return None
3125
3126        if token_type not in self.FUNC_TOKENS:
3127            return None
3128
3129        this = self._curr.text
3130        upper = this.upper()
3131        self._advance(2)
3132
3133        parser = self.FUNCTION_PARSERS.get(upper)
3134
3135        if parser and not anonymous:
3136            this = parser(self)
3137        else:
3138            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3139
3140            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3141                this = self.expression(subquery_predicate, this=self._parse_select())
3142                self._match_r_paren()
3143                return this
3144
3145            if functions is None:
3146                functions = self.FUNCTIONS
3147
3148            function = functions.get(upper)
3149            args = self._parse_csv(self._parse_lambda)
3150
3151            if function and not anonymous:
3152                this = function(args)
3153                self.validate_expression(this, args)
3154            else:
3155                this = self.expression(exp.Anonymous, this=this, expressions=args)
3156
3157        self._match_r_paren(this)
3158        return self._parse_window(this)
3159
3160    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3161        return self._parse_column_def(self._parse_id_var())
3162
3163    def _parse_user_defined_function(
3164        self, kind: t.Optional[TokenType] = None
3165    ) -> t.Optional[exp.Expression]:
3166        this = self._parse_id_var()
3167
3168        while self._match(TokenType.DOT):
3169            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3170
3171        if not self._match(TokenType.L_PAREN):
3172            return this
3173
3174        expressions = self._parse_csv(self._parse_function_parameter)
3175        self._match_r_paren()
3176        return self.expression(
3177            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3178        )
3179
3180    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3181        literal = self._parse_primary()
3182        if literal:
3183            return self.expression(exp.Introducer, this=token.text, expression=literal)
3184
3185        return self.expression(exp.Identifier, this=token.text)
3186
3187    def _parse_national(self, token: Token) -> exp.Expression:
3188        return self.expression(exp.National, this=exp.Literal.string(token.text))
3189
3190    def _parse_session_parameter(self) -> exp.Expression:
3191        kind = None
3192        this = self._parse_id_var() or self._parse_primary()
3193
3194        if this and self._match(TokenType.DOT):
3195            kind = this.name
3196            this = self._parse_var() or self._parse_primary()
3197
3198        return self.expression(exp.SessionParameter, this=this, kind=kind)
3199
3200    def _parse_lambda(self) -> t.Optional[exp.Expression]:
3201        index = self._index
3202
3203        if self._match(TokenType.L_PAREN):
3204            expressions = self._parse_csv(self._parse_id_var)
3205
3206            if not self._match(TokenType.R_PAREN):
3207                self._retreat(index)
3208        else:
3209            expressions = [self._parse_id_var()]
3210
3211        if self._match_set(self.LAMBDAS):
3212            return self.LAMBDAS[self._prev.token_type](self, expressions)
3213
3214        self._retreat(index)
3215
3216        this: t.Optional[exp.Expression]
3217
3218        if self._match(TokenType.DISTINCT):
3219            this = self.expression(
3220                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3221            )
3222        else:
3223            this = self._parse_select_or_expression()
3224
3225            if isinstance(this, exp.EQ):
3226                left = this.this
3227                if isinstance(left, exp.Column):
3228                    left.replace(exp.Var(this=left.text("this")))
3229
3230        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3231
3232    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3233        index = self._index
3234
3235        try:
3236            if self._parse_select(nested=True):
3237                return this
3238        except Exception:
3239            pass
3240        finally:
3241            self._retreat(index)
3242
3243        if not self._match(TokenType.L_PAREN):
3244            return this
3245
3246        args = self._parse_csv(
3247            lambda: self._parse_constraint()
3248            or self._parse_column_def(self._parse_field(any_token=True))
3249        )
3250        self._match_r_paren()
3251        return self.expression(exp.Schema, this=this, expressions=args)
3252
3253    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3254        # column defs are not really columns, they're identifiers
3255        if isinstance(this, exp.Column):
3256            this = this.this
3257        kind = self._parse_types()
3258
3259        if self._match_text_seq("FOR", "ORDINALITY"):
3260            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3261
3262        constraints = []
3263        while True:
3264            constraint = self._parse_column_constraint()
3265            if not constraint:
3266                break
3267            constraints.append(constraint)
3268
3269        if not kind and not constraints:
3270            return this
3271
3272        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3273
3274    def _parse_auto_increment(self) -> exp.Expression:
3275        start = None
3276        increment = None
3277
3278        if self._match(TokenType.L_PAREN, advance=False):
3279            args = self._parse_wrapped_csv(self._parse_bitwise)
3280            start = seq_get(args, 0)
3281            increment = seq_get(args, 1)
3282        elif self._match_text_seq("START"):
3283            start = self._parse_bitwise()
3284            self._match_text_seq("INCREMENT")
3285            increment = self._parse_bitwise()
3286
3287        if start and increment:
3288            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3289
3290        return exp.AutoIncrementColumnConstraint()
3291
3292    def _parse_compress(self) -> exp.Expression:
3293        if self._match(TokenType.L_PAREN, advance=False):
3294            return self.expression(
3295                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3296            )
3297
3298        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3299
3300    def _parse_generated_as_identity(self) -> exp.Expression:
3301        if self._match(TokenType.BY_DEFAULT):
3302            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3303            this = self.expression(
3304                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3305            )
3306        else:
3307            self._match_text_seq("ALWAYS")
3308            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3309
3310        self._match_text_seq("AS", "IDENTITY")
3311        if self._match(TokenType.L_PAREN):
3312            if self._match_text_seq("START", "WITH"):
3313                this.set("start", self._parse_bitwise())
3314            if self._match_text_seq("INCREMENT", "BY"):
3315                this.set("increment", self._parse_bitwise())
3316            if self._match_text_seq("MINVALUE"):
3317                this.set("minvalue", self._parse_bitwise())
3318            if self._match_text_seq("MAXVALUE"):
3319                this.set("maxvalue", self._parse_bitwise())
3320
3321            if self._match_text_seq("CYCLE"):
3322                this.set("cycle", True)
3323            elif self._match_text_seq("NO", "CYCLE"):
3324                this.set("cycle", False)
3325
3326            self._match_r_paren()
3327
3328        return this
3329
3330    def _parse_inline(self) -> t.Optional[exp.Expression]:
3331        self._match_text_seq("LENGTH")
3332        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3333
3334    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3335        if self._match_text_seq("NULL"):
3336            return self.expression(exp.NotNullColumnConstraint)
3337        if self._match_text_seq("CASESPECIFIC"):
3338            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3339        return None
3340
3341    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3342        if self._match(TokenType.CONSTRAINT):
3343            this = self._parse_id_var()
3344        else:
3345            this = None
3346
3347        if self._match_texts(self.CONSTRAINT_PARSERS):
3348            return self.expression(
3349                exp.ColumnConstraint,
3350                this=this,
3351                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3352            )
3353
3354        return this
3355
3356    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3357        if not self._match(TokenType.CONSTRAINT):
3358            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3359
3360        this = self._parse_id_var()
3361        expressions = []
3362
3363        while True:
3364            constraint = self._parse_unnamed_constraint() or self._parse_function()
3365            if not constraint:
3366                break
3367            expressions.append(constraint)
3368
3369        return self.expression(exp.Constraint, this=this, expressions=expressions)
3370
3371    def _parse_unnamed_constraint(
3372        self, constraints: t.Optional[t.Collection[str]] = None
3373    ) -> t.Optional[exp.Expression]:
3374        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3375            return None
3376
3377        constraint = self._prev.text.upper()
3378        if constraint not in self.CONSTRAINT_PARSERS:
3379            self.raise_error(f"No parser found for schema constraint {constraint}.")
3380
3381        return self.CONSTRAINT_PARSERS[constraint](self)
3382
3383    def _parse_unique(self) -> exp.Expression:
3384        if not self._match(TokenType.L_PAREN, advance=False):
3385            return self.expression(exp.UniqueColumnConstraint)
3386        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3387
3388    def _parse_key_constraint_options(self) -> t.List[str]:
3389        options = []
3390        while True:
3391            if not self._curr:
3392                break
3393
3394            if self._match(TokenType.ON):
3395                action = None
3396                on = self._advance_any() and self._prev.text
3397
3398                if self._match(TokenType.NO_ACTION):
3399                    action = "NO ACTION"
3400                elif self._match(TokenType.CASCADE):
3401                    action = "CASCADE"
3402                elif self._match_pair(TokenType.SET, TokenType.NULL):
3403                    action = "SET NULL"
3404                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3405                    action = "SET DEFAULT"
3406                else:
3407                    self.raise_error("Invalid key constraint")
3408
3409                options.append(f"ON {on} {action}")
3410            elif self._match_text_seq("NOT", "ENFORCED"):
3411                options.append("NOT ENFORCED")
3412            elif self._match_text_seq("DEFERRABLE"):
3413                options.append("DEFERRABLE")
3414            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3415                options.append("INITIALLY DEFERRED")
3416            elif self._match_text_seq("NORELY"):
3417                options.append("NORELY")
3418            elif self._match_text_seq("MATCH", "FULL"):
3419                options.append("MATCH FULL")
3420            else:
3421                break
3422
3423        return options
3424
3425    def _parse_references(self, match=True) -> t.Optional[exp.Expression]:
3426        if match and not self._match(TokenType.REFERENCES):
3427            return None
3428
3429        expressions = None
3430        this = self._parse_id_var()
3431
3432        if self._match(TokenType.L_PAREN, advance=False):
3433            expressions = self._parse_wrapped_id_vars()
3434
3435        options = self._parse_key_constraint_options()
3436        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3437
3438    def _parse_foreign_key(self) -> exp.Expression:
3439        expressions = self._parse_wrapped_id_vars()
3440        reference = self._parse_references()
3441        options = {}
3442
3443        while self._match(TokenType.ON):
3444            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3445                self.raise_error("Expected DELETE or UPDATE")
3446
3447            kind = self._prev.text.lower()
3448
3449            if self._match(TokenType.NO_ACTION):
3450                action = "NO ACTION"
3451            elif self._match(TokenType.SET):
3452                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3453                action = "SET " + self._prev.text.upper()
3454            else:
3455                self._advance()
3456                action = self._prev.text.upper()
3457
3458            options[kind] = action
3459
3460        return self.expression(
3461            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3462        )
3463
3464    def _parse_primary_key(self) -> exp.Expression:
3465        desc = (
3466            self._match_set((TokenType.ASC, TokenType.DESC))
3467            and self._prev.token_type == TokenType.DESC
3468        )
3469
3470        if not self._match(TokenType.L_PAREN, advance=False):
3471            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3472
3473        expressions = self._parse_wrapped_csv(self._parse_field)
3474        options = self._parse_key_constraint_options()
3475        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3476
3477    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3478        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3479            return this
3480
3481        bracket_kind = self._prev.token_type
3482        expressions: t.List[t.Optional[exp.Expression]]
3483
3484        if self._match(TokenType.COLON):
3485            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3486        else:
3487            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3488
3489        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3490        if bracket_kind == TokenType.L_BRACE:
3491            this = self.expression(exp.Struct, expressions=expressions)
3492        elif not this or this.name.upper() == "ARRAY":
3493            this = self.expression(exp.Array, expressions=expressions)
3494        else:
3495            expressions = apply_index_offset(this, expressions, -self.index_offset)
3496            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3497
3498        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3499            self.raise_error("Expected ]")
3500        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3501            self.raise_error("Expected }")
3502
3503        self._add_comments(this)
3504        return self._parse_bracket(this)
3505
3506    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3507        if self._match(TokenType.COLON):
3508            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3509        return this
3510
3511    def _parse_case(self) -> t.Optional[exp.Expression]:
3512        ifs = []
3513        default = None
3514
3515        expression = self._parse_conjunction()
3516
3517        while self._match(TokenType.WHEN):
3518            this = self._parse_conjunction()
3519            self._match(TokenType.THEN)
3520            then = self._parse_conjunction()
3521            ifs.append(self.expression(exp.If, this=this, true=then))
3522
3523        if self._match(TokenType.ELSE):
3524            default = self._parse_conjunction()
3525
3526        if not self._match(TokenType.END):
3527            self.raise_error("Expected END after CASE", self._prev)
3528
3529        return self._parse_window(
3530            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3531        )
3532
3533    def _parse_if(self) -> t.Optional[exp.Expression]:
3534        if self._match(TokenType.L_PAREN):
3535            args = self._parse_csv(self._parse_conjunction)
3536            this = exp.If.from_arg_list(args)
3537            self.validate_expression(this, args)
3538            self._match_r_paren()
3539        else:
3540            index = self._index - 1
3541            condition = self._parse_conjunction()
3542
3543            if not condition:
3544                self._retreat(index)
3545                return None
3546
3547            self._match(TokenType.THEN)
3548            true = self._parse_conjunction()
3549            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3550            self._match(TokenType.END)
3551            this = self.expression(exp.If, this=condition, true=true, false=false)
3552
3553        return self._parse_window(this)
3554
3555    def _parse_extract(self) -> exp.Expression:
3556        this = self._parse_function() or self._parse_var() or self._parse_type()
3557
3558        if self._match(TokenType.FROM):
3559            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3560
3561        if not self._match(TokenType.COMMA):
3562            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3563
3564        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3565
3566    def _parse_cast(self, strict: bool) -> exp.Expression:
3567        this = self._parse_conjunction()
3568
3569        if not self._match(TokenType.ALIAS):
3570            if self._match(TokenType.COMMA):
3571                return self.expression(
3572                    exp.CastToStrType, this=this, expression=self._parse_string()
3573                )
3574            else:
3575                self.raise_error("Expected AS after CAST")
3576
3577        to = self._parse_types()
3578
3579        if not to:
3580            self.raise_error("Expected TYPE after CAST")
3581        elif to.this == exp.DataType.Type.CHAR:
3582            if self._match(TokenType.CHARACTER_SET):
3583                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3584
3585        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3586
3587    def _parse_string_agg(self) -> exp.Expression:
3588        expression: t.Optional[exp.Expression]
3589
3590        if self._match(TokenType.DISTINCT):
3591            args = self._parse_csv(self._parse_conjunction)
3592            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3593        else:
3594            args = self._parse_csv(self._parse_conjunction)
3595            expression = seq_get(args, 0)
3596
3597        index = self._index
3598        if not self._match(TokenType.R_PAREN):
3599            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3600            order = self._parse_order(this=expression)
3601            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3602
3603        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3604        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3605        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3606        if not self._match(TokenType.WITHIN_GROUP):
3607            self._retreat(index)
3608            this = exp.GroupConcat.from_arg_list(args)
3609            self.validate_expression(this, args)
3610            return this
3611
3612        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3613        order = self._parse_order(this=expression)
3614        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3615
3616    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3617        to: t.Optional[exp.Expression]
3618        this = self._parse_bitwise()
3619
3620        if self._match(TokenType.USING):
3621            to = self.expression(exp.CharacterSet, this=self._parse_var())
3622        elif self._match(TokenType.COMMA):
3623            to = self._parse_bitwise()
3624        else:
3625            to = None
3626
3627        # Swap the argument order if needed to produce the correct AST
3628        if self.CONVERT_TYPE_FIRST:
3629            this, to = to, this
3630
3631        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3632
3633    def _parse_decode(self) -> t.Optional[exp.Expression]:
3634        """
3635        There are generally two variants of the DECODE function:
3636
3637        - DECODE(bin, charset)
3638        - DECODE(expression, search, result [, search, result] ... [, default])
3639
3640        The second variant will always be parsed into a CASE expression. Note that NULL
3641        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3642        instead of relying on pattern matching.
3643        """
3644        args = self._parse_csv(self._parse_conjunction)
3645
3646        if len(args) < 3:
3647            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3648
3649        expression, *expressions = args
3650        if not expression:
3651            return None
3652
3653        ifs = []
3654        for search, result in zip(expressions[::2], expressions[1::2]):
3655            if not search or not result:
3656                return None
3657
3658            if isinstance(search, exp.Literal):
3659                ifs.append(
3660                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3661                )
3662            elif isinstance(search, exp.Null):
3663                ifs.append(
3664                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3665                )
3666            else:
3667                cond = exp.or_(
3668                    exp.EQ(this=expression.copy(), expression=search),
3669                    exp.and_(
3670                        exp.Is(this=expression.copy(), expression=exp.Null()),
3671                        exp.Is(this=search.copy(), expression=exp.Null()),
3672                        copy=False,
3673                    ),
3674                    copy=False,
3675                )
3676                ifs.append(exp.If(this=cond, true=result))
3677
3678        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3679
3680    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3681        self._match_text_seq("KEY")
3682        key = self._parse_field()
3683        self._match(TokenType.COLON)
3684        self._match_text_seq("VALUE")
3685        value = self._parse_field()
3686        if not key and not value:
3687            return None
3688        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3689
3690    def _parse_json_object(self) -> exp.Expression:
3691        expressions = self._parse_csv(self._parse_json_key_value)
3692
3693        null_handling = None
3694        if self._match_text_seq("NULL", "ON", "NULL"):
3695            null_handling = "NULL ON NULL"
3696        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3697            null_handling = "ABSENT ON NULL"
3698
3699        unique_keys = None
3700        if self._match_text_seq("WITH", "UNIQUE"):
3701            unique_keys = True
3702        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3703            unique_keys = False
3704
3705        self._match_text_seq("KEYS")
3706
3707        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3708        format_json = self._match_text_seq("FORMAT", "JSON")
3709        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3710
3711        return self.expression(
3712            exp.JSONObject,
3713            expressions=expressions,
3714            null_handling=null_handling,
3715            unique_keys=unique_keys,
3716            return_type=return_type,
3717            format_json=format_json,
3718            encoding=encoding,
3719        )
3720
3721    def _parse_logarithm(self) -> exp.Expression:
3722        # Default argument order is base, expression
3723        args = self._parse_csv(self._parse_range)
3724
3725        if len(args) > 1:
3726            if not self.LOG_BASE_FIRST:
3727                args.reverse()
3728            return exp.Log.from_arg_list(args)
3729
3730        return self.expression(
3731            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3732        )
3733
3734    def _parse_match_against(self) -> exp.Expression:
3735        expressions = self._parse_csv(self._parse_column)
3736
3737        self._match_text_seq(")", "AGAINST", "(")
3738
3739        this = self._parse_string()
3740
3741        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3742            modifier = "IN NATURAL LANGUAGE MODE"
3743            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3744                modifier = f"{modifier} WITH QUERY EXPANSION"
3745        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3746            modifier = "IN BOOLEAN MODE"
3747        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3748            modifier = "WITH QUERY EXPANSION"
3749        else:
3750            modifier = None
3751
3752        return self.expression(
3753            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3754        )
3755
3756    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3757    def _parse_open_json(self) -> exp.Expression:
3758        this = self._parse_bitwise()
3759        path = self._match(TokenType.COMMA) and self._parse_string()
3760
3761        def _parse_open_json_column_def() -> exp.Expression:
3762            this = self._parse_field(any_token=True)
3763            kind = self._parse_types()
3764            path = self._parse_string()
3765            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3766            return self.expression(
3767                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3768            )
3769
3770        expressions = None
3771        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3772            self._match_l_paren()
3773            expressions = self._parse_csv(_parse_open_json_column_def)
3774
3775        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3776
3777    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3778        args = self._parse_csv(self._parse_bitwise)
3779
3780        if self._match(TokenType.IN):
3781            return self.expression(
3782                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3783            )
3784
3785        if haystack_first:
3786            haystack = seq_get(args, 0)
3787            needle = seq_get(args, 1)
3788        else:
3789            needle = seq_get(args, 0)
3790            haystack = seq_get(args, 1)
3791
3792        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3793
3794        self.validate_expression(this, args)
3795
3796        return this
3797
3798    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3799        args = self._parse_csv(self._parse_table)
3800        return exp.JoinHint(this=func_name.upper(), expressions=args)
3801
3802    def _parse_substring(self) -> exp.Expression:
3803        # Postgres supports the form: substring(string [from int] [for int])
3804        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3805
3806        args = self._parse_csv(self._parse_bitwise)
3807
3808        if self._match(TokenType.FROM):
3809            args.append(self._parse_bitwise())
3810            if self._match(TokenType.FOR):
3811                args.append(self._parse_bitwise())
3812
3813        this = exp.Substring.from_arg_list(args)
3814        self.validate_expression(this, args)
3815
3816        return this
3817
3818    def _parse_trim(self) -> exp.Expression:
3819        # https://www.w3resource.com/sql/character-functions/trim.php
3820        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3821
3822        position = None
3823        collation = None
3824
3825        if self._match_set(self.TRIM_TYPES):
3826            position = self._prev.text.upper()
3827
3828        expression = self._parse_bitwise()
3829        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3830            this = self._parse_bitwise()
3831        else:
3832            this = expression
3833            expression = None
3834
3835        if self._match(TokenType.COLLATE):
3836            collation = self._parse_bitwise()
3837
3838        return self.expression(
3839            exp.Trim,
3840            this=this,
3841            position=position,
3842            expression=expression,
3843            collation=collation,
3844        )
3845
3846    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3847        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3848
3849    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3850        return self._parse_window(self._parse_id_var(), alias=True)
3851
3852    def _parse_respect_or_ignore_nulls(
3853        self, this: t.Optional[exp.Expression]
3854    ) -> t.Optional[exp.Expression]:
3855        if self._match(TokenType.IGNORE_NULLS):
3856            return self.expression(exp.IgnoreNulls, this=this)
3857        if self._match(TokenType.RESPECT_NULLS):
3858            return self.expression(exp.RespectNulls, this=this)
3859        return this
3860
3861    def _parse_window(
3862        self, this: t.Optional[exp.Expression], alias: bool = False
3863    ) -> t.Optional[exp.Expression]:
3864        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3865            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3866            self._match_r_paren()
3867
3868        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3869        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3870        if self._match(TokenType.WITHIN_GROUP):
3871            order = self._parse_wrapped(self._parse_order)
3872            this = self.expression(exp.WithinGroup, this=this, expression=order)
3873
3874        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3875        # Some dialects choose to implement and some do not.
3876        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3877
3878        # There is some code above in _parse_lambda that handles
3879        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3880
3881        # The below changes handle
3882        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3883
3884        # Oracle allows both formats
3885        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3886        #   and Snowflake chose to do the same for familiarity
3887        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3888        this = self._parse_respect_or_ignore_nulls(this)
3889
3890        # bigquery select from window x AS (partition by ...)
3891        if alias:
3892            over = None
3893            self._match(TokenType.ALIAS)
3894        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3895            return this
3896        else:
3897            over = self._prev.text.upper()
3898
3899        if not self._match(TokenType.L_PAREN):
3900            return self.expression(
3901                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3902            )
3903
3904        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3905
3906        first = self._match(TokenType.FIRST)
3907        if self._match_text_seq("LAST"):
3908            first = False
3909
3910        partition = self._parse_partition_by()
3911        order = self._parse_order()
3912        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3913
3914        if kind:
3915            self._match(TokenType.BETWEEN)
3916            start = self._parse_window_spec()
3917            self._match(TokenType.AND)
3918            end = self._parse_window_spec()
3919
3920            spec = self.expression(
3921                exp.WindowSpec,
3922                kind=kind,
3923                start=start["value"],
3924                start_side=start["side"],
3925                end=end["value"],
3926                end_side=end["side"],
3927            )
3928        else:
3929            spec = None
3930
3931        self._match_r_paren()
3932
3933        return self.expression(
3934            exp.Window,
3935            this=this,
3936            partition_by=partition,
3937            order=order,
3938            spec=spec,
3939            alias=window_alias,
3940            over=over,
3941            first=first,
3942        )
3943
3944    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3945        self._match(TokenType.BETWEEN)
3946
3947        return {
3948            "value": (
3949                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3950            )
3951            or self._parse_bitwise(),
3952            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3953        }
3954
3955    def _parse_alias(
3956        self, this: t.Optional[exp.Expression], explicit: bool = False
3957    ) -> t.Optional[exp.Expression]:
3958        any_token = self._match(TokenType.ALIAS)
3959
3960        if explicit and not any_token:
3961            return this
3962
3963        if self._match(TokenType.L_PAREN):
3964            aliases = self.expression(
3965                exp.Aliases,
3966                this=this,
3967                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3968            )
3969            self._match_r_paren(aliases)
3970            return aliases
3971
3972        alias = self._parse_id_var(any_token)
3973
3974        if alias:
3975            return self.expression(exp.Alias, this=this, alias=alias)
3976
3977        return this
3978
3979    def _parse_id_var(
3980        self,
3981        any_token: bool = True,
3982        tokens: t.Optional[t.Collection[TokenType]] = None,
3983        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3984    ) -> t.Optional[exp.Expression]:
3985        identifier = self._parse_identifier()
3986
3987        if identifier:
3988            return identifier
3989
3990        prefix = ""
3991
3992        if prefix_tokens:
3993            while self._match_set(prefix_tokens):
3994                prefix += self._prev.text
3995
3996        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3997            quoted = self._prev.token_type == TokenType.STRING
3998            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3999
4000        return None
4001
4002    def _parse_string(self) -> t.Optional[exp.Expression]:
4003        if self._match(TokenType.STRING):
4004            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
4005        return self._parse_placeholder()
4006
4007    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
4008        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
4009
4010    def _parse_number(self) -> t.Optional[exp.Expression]:
4011        if self._match(TokenType.NUMBER):
4012            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
4013        return self._parse_placeholder()
4014
4015    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4016        if self._match(TokenType.IDENTIFIER):
4017            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4018        return self._parse_placeholder()
4019
4020    def _parse_var(
4021        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4022    ) -> t.Optional[exp.Expression]:
4023        if (
4024            (any_token and self._advance_any())
4025            or self._match(TokenType.VAR)
4026            or (self._match_set(tokens) if tokens else False)
4027        ):
4028            return self.expression(exp.Var, this=self._prev.text)
4029        return self._parse_placeholder()
4030
4031    def _advance_any(self) -> t.Optional[Token]:
4032        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4033            self._advance()
4034            return self._prev
4035        return None
4036
4037    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4038        return self._parse_var() or self._parse_string()
4039
4040    def _parse_null(self) -> t.Optional[exp.Expression]:
4041        if self._match(TokenType.NULL):
4042            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4043        return None
4044
4045    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4046        if self._match(TokenType.TRUE):
4047            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4048        if self._match(TokenType.FALSE):
4049            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4050        return None
4051
4052    def _parse_star(self) -> t.Optional[exp.Expression]:
4053        if self._match(TokenType.STAR):
4054            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4055        return None
4056
4057    def _parse_parameter(self) -> exp.Expression:
4058        wrapped = self._match(TokenType.L_BRACE)
4059        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4060        self._match(TokenType.R_BRACE)
4061        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4062
4063    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4064        if self._match_set(self.PLACEHOLDER_PARSERS):
4065            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4066            if placeholder:
4067                return placeholder
4068            self._advance(-1)
4069        return None
4070
4071    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4072        if not self._match(TokenType.EXCEPT):
4073            return None
4074        if self._match(TokenType.L_PAREN, advance=False):
4075            return self._parse_wrapped_csv(self._parse_column)
4076        return self._parse_csv(self._parse_column)
4077
4078    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4079        if not self._match(TokenType.REPLACE):
4080            return None
4081        if self._match(TokenType.L_PAREN, advance=False):
4082            return self._parse_wrapped_csv(self._parse_expression)
4083        return self._parse_csv(self._parse_expression)
4084
4085    def _parse_csv(
4086        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4087    ) -> t.List[t.Optional[exp.Expression]]:
4088        parse_result = parse_method()
4089        items = [parse_result] if parse_result is not None else []
4090
4091        while self._match(sep):
4092            self._add_comments(parse_result)
4093            parse_result = parse_method()
4094            if parse_result is not None:
4095                items.append(parse_result)
4096
4097        return items
4098
4099    def _parse_tokens(
4100        self, parse_method: t.Callable, expressions: t.Dict
4101    ) -> t.Optional[exp.Expression]:
4102        this = parse_method()
4103
4104        while self._match_set(expressions):
4105            this = self.expression(
4106                expressions[self._prev.token_type],
4107                this=this,
4108                comments=self._prev_comments,
4109                expression=parse_method(),
4110            )
4111
4112        return this
4113
4114    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4115        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4116
4117    def _parse_wrapped_csv(
4118        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4119    ) -> t.List[t.Optional[exp.Expression]]:
4120        return self._parse_wrapped(
4121            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4122        )
4123
4124    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4125        wrapped = self._match(TokenType.L_PAREN)
4126        if not wrapped and not optional:
4127            self.raise_error("Expecting (")
4128        parse_result = parse_method()
4129        if wrapped:
4130            self._match_r_paren()
4131        return parse_result
4132
4133    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
4134        return self._parse_select() or self._parse_set_operations(self._parse_expression())
4135
4136    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4137        return self._parse_set_operations(
4138            self._parse_select(nested=True, parse_subquery_alias=False)
4139        )
4140
4141    def _parse_transaction(self) -> exp.Expression:
4142        this = None
4143        if self._match_texts(self.TRANSACTION_KIND):
4144            this = self._prev.text
4145
4146        self._match_texts({"TRANSACTION", "WORK"})
4147
4148        modes = []
4149        while True:
4150            mode = []
4151            while self._match(TokenType.VAR):
4152                mode.append(self._prev.text)
4153
4154            if mode:
4155                modes.append(" ".join(mode))
4156            if not self._match(TokenType.COMMA):
4157                break
4158
4159        return self.expression(exp.Transaction, this=this, modes=modes)
4160
4161    def _parse_commit_or_rollback(self) -> exp.Expression:
4162        chain = None
4163        savepoint = None
4164        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4165
4166        self._match_texts({"TRANSACTION", "WORK"})
4167
4168        if self._match_text_seq("TO"):
4169            self._match_text_seq("SAVEPOINT")
4170            savepoint = self._parse_id_var()
4171
4172        if self._match(TokenType.AND):
4173            chain = not self._match_text_seq("NO")
4174            self._match_text_seq("CHAIN")
4175
4176        if is_rollback:
4177            return self.expression(exp.Rollback, savepoint=savepoint)
4178        return self.expression(exp.Commit, chain=chain)
4179
4180    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4181        if not self._match_text_seq("ADD"):
4182            return None
4183
4184        self._match(TokenType.COLUMN)
4185        exists_column = self._parse_exists(not_=True)
4186        expression = self._parse_column_def(self._parse_field(any_token=True))
4187
4188        if expression:
4189            expression.set("exists", exists_column)
4190
4191            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4192            if self._match_texts(("FIRST", "AFTER")):
4193                position = self._prev.text
4194                column_position = self.expression(
4195                    exp.ColumnPosition, this=self._parse_column(), position=position
4196                )
4197                expression.set("position", column_position)
4198
4199        return expression
4200
4201    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4202        drop = self._match(TokenType.DROP) and self._parse_drop()
4203        if drop and not isinstance(drop, exp.Command):
4204            drop.set("kind", drop.args.get("kind", "COLUMN"))
4205        return drop
4206
4207    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4208    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4209        return self.expression(
4210            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4211        )
4212
4213    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4214        this = None
4215        kind = self._prev.token_type
4216
4217        if kind == TokenType.CONSTRAINT:
4218            this = self._parse_id_var()
4219
4220            if self._match_text_seq("CHECK"):
4221                expression = self._parse_wrapped(self._parse_conjunction)
4222                enforced = self._match_text_seq("ENFORCED")
4223
4224                return self.expression(
4225                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4226                )
4227
4228        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4229            expression = self._parse_foreign_key()
4230        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4231            expression = self._parse_primary_key()
4232        else:
4233            expression = None
4234
4235        return self.expression(exp.AddConstraint, this=this, expression=expression)
4236
4237    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4238        index = self._index - 1
4239
4240        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4241            return self._parse_csv(self._parse_add_constraint)
4242
4243        self._retreat(index)
4244        return self._parse_csv(self._parse_add_column)
4245
4246    def _parse_alter_table_alter(self) -> exp.Expression:
4247        self._match(TokenType.COLUMN)
4248        column = self._parse_field(any_token=True)
4249
4250        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4251            return self.expression(exp.AlterColumn, this=column, drop=True)
4252        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4253            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4254
4255        self._match_text_seq("SET", "DATA")
4256        return self.expression(
4257            exp.AlterColumn,
4258            this=column,
4259            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4260            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4261            using=self._match(TokenType.USING) and self._parse_conjunction(),
4262        )
4263
4264    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4265        index = self._index - 1
4266
4267        partition_exists = self._parse_exists()
4268        if self._match(TokenType.PARTITION, advance=False):
4269            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4270
4271        self._retreat(index)
4272        return self._parse_csv(self._parse_drop_column)
4273
4274    def _parse_alter_table_rename(self) -> exp.Expression:
4275        self._match_text_seq("TO")
4276        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4277
4278    def _parse_alter(self) -> t.Optional[exp.Expression]:
4279        start = self._prev
4280
4281        if not self._match(TokenType.TABLE):
4282            return self._parse_as_command(start)
4283
4284        exists = self._parse_exists()
4285        this = self._parse_table(schema=True)
4286
4287        if self._next:
4288            self._advance()
4289        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4290
4291        if parser:
4292            actions = ensure_list(parser(self))
4293
4294            if not self._curr:
4295                return self.expression(
4296                    exp.AlterTable,
4297                    this=this,
4298                    exists=exists,
4299                    actions=actions,
4300                )
4301        return self._parse_as_command(start)
4302
4303    def _parse_merge(self) -> exp.Expression:
4304        self._match(TokenType.INTO)
4305        target = self._parse_table()
4306
4307        self._match(TokenType.USING)
4308        using = self._parse_table()
4309
4310        self._match(TokenType.ON)
4311        on = self._parse_conjunction()
4312
4313        whens = []
4314        while self._match(TokenType.WHEN):
4315            matched = not self._match(TokenType.NOT)
4316            self._match_text_seq("MATCHED")
4317            source = (
4318                False
4319                if self._match_text_seq("BY", "TARGET")
4320                else self._match_text_seq("BY", "SOURCE")
4321            )
4322            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4323
4324            self._match(TokenType.THEN)
4325
4326            if self._match(TokenType.INSERT):
4327                _this = self._parse_star()
4328                if _this:
4329                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4330                else:
4331                    then = self.expression(
4332                        exp.Insert,
4333                        this=self._parse_value(),
4334                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4335                    )
4336            elif self._match(TokenType.UPDATE):
4337                expressions = self._parse_star()
4338                if expressions:
4339                    then = self.expression(exp.Update, expressions=expressions)
4340                else:
4341                    then = self.expression(
4342                        exp.Update,
4343                        expressions=self._match(TokenType.SET)
4344                        and self._parse_csv(self._parse_equality),
4345                    )
4346            elif self._match(TokenType.DELETE):
4347                then = self.expression(exp.Var, this=self._prev.text)
4348            else:
4349                then = None
4350
4351            whens.append(
4352                self.expression(
4353                    exp.When,
4354                    matched=matched,
4355                    source=source,
4356                    condition=condition,
4357                    then=then,
4358                )
4359            )
4360
4361        return self.expression(
4362            exp.Merge,
4363            this=target,
4364            using=using,
4365            on=on,
4366            expressions=whens,
4367        )
4368
4369    def _parse_show(self) -> t.Optional[exp.Expression]:
4370        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4371        if parser:
4372            return parser(self)
4373        self._advance()
4374        return self.expression(exp.Show, this=self._prev.text.upper())
4375
4376    def _parse_set_item_assignment(
4377        self, kind: t.Optional[str] = None
4378    ) -> t.Optional[exp.Expression]:
4379        index = self._index
4380
4381        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4382            return self._parse_set_transaction(global_=kind == "GLOBAL")
4383
4384        left = self._parse_primary() or self._parse_id_var()
4385
4386        if not self._match_texts(("=", "TO")):
4387            self._retreat(index)
4388            return None
4389
4390        right = self._parse_statement() or self._parse_id_var()
4391        this = self.expression(
4392            exp.EQ,
4393            this=left,
4394            expression=right,
4395        )
4396
4397        return self.expression(
4398            exp.SetItem,
4399            this=this,
4400            kind=kind,
4401        )
4402
4403    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4404        self._match_text_seq("TRANSACTION")
4405        characteristics = self._parse_csv(
4406            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4407        )
4408        return self.expression(
4409            exp.SetItem,
4410            expressions=characteristics,
4411            kind="TRANSACTION",
4412            **{"global": global_},  # type: ignore
4413        )
4414
4415    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4416        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4417        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4418
4419    def _parse_set(self) -> exp.Expression:
4420        index = self._index
4421        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4422
4423        if self._curr:
4424            self._retreat(index)
4425            return self._parse_as_command(self._prev)
4426
4427        return set_
4428
4429    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4430        for option in options:
4431            if self._match_text_seq(*option.split(" ")):
4432                return exp.Var(this=option)
4433        return None
4434
4435    def _parse_as_command(self, start: Token) -> exp.Command:
4436        while self._curr:
4437            self._advance()
4438        text = self._find_sql(start, self._prev)
4439        size = len(start.text)
4440        return exp.Command(this=text[:size], expression=text[size:])
4441
4442    def _find_parser(
4443        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4444    ) -> t.Optional[t.Callable]:
4445        if not self._curr:
4446            return None
4447
4448        index = self._index
4449        this = []
4450        while True:
4451            # The current token might be multiple words
4452            curr = self._curr.text.upper()
4453            key = curr.split(" ")
4454            this.append(curr)
4455            self._advance()
4456            result, trie = in_trie(trie, key)
4457            if result == 0:
4458                break
4459            if result == 2:
4460                subparser = parsers[" ".join(this)]
4461                return subparser
4462        self._retreat(index)
4463        return None
4464
4465    def _match(self, token_type, advance=True, expression=None):
4466        if not self._curr:
4467            return None
4468
4469        if self._curr.token_type == token_type:
4470            if advance:
4471                self._advance()
4472            self._add_comments(expression)
4473            return True
4474
4475        return None
4476
4477    def _match_set(self, types, advance=True):
4478        if not self._curr:
4479            return None
4480
4481        if self._curr.token_type in types:
4482            if advance:
4483                self._advance()
4484            return True
4485
4486        return None
4487
4488    def _match_pair(self, token_type_a, token_type_b, advance=True):
4489        if not self._curr or not self._next:
4490            return None
4491
4492        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4493            if advance:
4494                self._advance(2)
4495            return True
4496
4497        return None
4498
4499    def _match_l_paren(self, expression=None):
4500        if not self._match(TokenType.L_PAREN, expression=expression):
4501            self.raise_error("Expecting (")
4502
4503    def _match_r_paren(self, expression=None):
4504        if not self._match(TokenType.R_PAREN, expression=expression):
4505            self.raise_error("Expecting )")
4506
4507    def _match_texts(self, texts, advance=True):
4508        if self._curr and self._curr.text.upper() in texts:
4509            if advance:
4510                self._advance()
4511            return True
4512        return False
4513
4514    def _match_text_seq(self, *texts, advance=True):
4515        index = self._index
4516        for text in texts:
4517            if self._curr and self._curr.text.upper() == text:
4518                self._advance()
4519            else:
4520                self._retreat(index)
4521                return False
4522
4523        if not advance:
4524            self._retreat(index)
4525
4526        return True
4527
4528    def _replace_columns_with_dots(self, this):
4529        if isinstance(this, exp.Dot):
4530            exp.replace_children(this, self._replace_columns_with_dots)
4531        elif isinstance(this, exp.Column):
4532            exp.replace_children(this, self._replace_columns_with_dots)
4533            table = this.args.get("table")
4534            this = (
4535                self.expression(exp.Dot, this=table, expression=this.this)
4536                if table
4537                else self.expression(exp.Var, this=this.name)
4538            )
4539        elif isinstance(this, exp.Identifier):
4540            this = self.expression(exp.Var, this=this.name)
4541        return this
4542
4543    def _replace_lambda(self, node, lambda_variables):
4544        for column in node.find_all(exp.Column):
4545            if column.parts[0].name in lambda_variables:
4546                dot_or_id = column.to_dot() if column.table else column.this
4547                parent = column.parent
4548
4549                while isinstance(parent, exp.Dot):
4550                    if not isinstance(parent.parent, exp.Dot):
4551                        parent.replace(dot_or_id)
4552                        break
4553                    parent = parent.parent
4554                else:
4555                    if column is node:
4556                        node = dot_or_id
4557                    else:
4558                        column.replace(dot_or_id)
4559        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
809    def __init__(
810        self,
811        error_level: t.Optional[ErrorLevel] = None,
812        error_message_context: int = 100,
813        index_offset: int = 0,
814        unnest_column_only: bool = False,
815        alias_post_tablesample: bool = False,
816        max_errors: int = 3,
817        null_ordering: t.Optional[str] = None,
818    ):
819        self.error_level = error_level or ErrorLevel.IMMEDIATE
820        self.error_message_context = error_message_context
821        self.index_offset = index_offset
822        self.unnest_column_only = unnest_column_only
823        self.alias_post_tablesample = alias_post_tablesample
824        self.max_errors = max_errors
825        self.null_ordering = null_ordering
826        self.reset()
def reset(self):
828    def reset(self):
829        self.sql = ""
830        self.errors = []
831        self._tokens = []
832        self._index = 0
833        self._curr = None
834        self._next = None
835        self._prev = None
836        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
838    def parse(
839        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
840    ) -> t.List[t.Optional[exp.Expression]]:
841        """
842        Parses a list of tokens and returns a list of syntax trees, one tree
843        per parsed SQL statement.
844
845        Args:
846            raw_tokens: the list of tokens.
847            sql: the original SQL string, used to produce helpful debug messages.
848
849        Returns:
850            The list of syntax trees.
851        """
852        return self._parse(
853            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
854        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
856    def parse_into(
857        self,
858        expression_types: exp.IntoType,
859        raw_tokens: t.List[Token],
860        sql: t.Optional[str] = None,
861    ) -> t.List[t.Optional[exp.Expression]]:
862        """
863        Parses a list of tokens into a given Expression type. If a collection of Expression
864        types is given instead, this method will try to parse the token list into each one
865        of them, stopping at the first for which the parsing succeeds.
866
867        Args:
868            expression_types: the expression type(s) to try and parse the token list into.
869            raw_tokens: the list of tokens.
870            sql: the original SQL string, used to produce helpful debug messages.
871
872        Returns:
873            The target Expression.
874        """
875        errors = []
876        for expression_type in ensure_collection(expression_types):
877            parser = self.EXPRESSION_PARSERS.get(expression_type)
878            if not parser:
879                raise TypeError(f"No parser registered for {expression_type}")
880            try:
881                return self._parse(parser, raw_tokens, sql)
882            except ParseError as e:
883                e.errors[0]["into_expression"] = expression_type
884                errors.append(e)
885        raise ParseError(
886            f"Failed to parse into {expression_types}",
887            errors=merge_errors(errors),
888        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
924    def check_errors(self) -> None:
925        """
926        Logs or raises any found errors, depending on the chosen error level setting.
927        """
928        if self.error_level == ErrorLevel.WARN:
929            for error in self.errors:
930                logger.error(str(error))
931        elif self.error_level == ErrorLevel.RAISE and self.errors:
932            raise ParseError(
933                concat_messages(self.errors, self.max_errors),
934                errors=merge_errors(self.errors),
935            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
937    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
938        """
939        Appends an error in the list of recorded errors or raises it, depending on the chosen
940        error level setting.
941        """
942        token = token or self._curr or self._prev or Token.string("")
943        start = token.start
944        end = token.end
945        start_context = self.sql[max(start - self.error_message_context, 0) : start]
946        highlight = self.sql[start:end]
947        end_context = self.sql[end : end + self.error_message_context]
948
949        error = ParseError.new(
950            f"{message}. Line {token.line}, Col: {token.col}.\n"
951            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
952            description=message,
953            line=token.line,
954            col=token.col,
955            start_context=start_context,
956            highlight=highlight,
957            end_context=end_context,
958        )
959
960        if self.error_level == ErrorLevel.IMMEDIATE:
961            raise error
962
963        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[~E], comments: Optional[List[str]] = None, **kwargs) -> ~E:
965    def expression(
966        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
967    ) -> E:
968        """
969        Creates a new, validated Expression.
970
971        Args:
972            exp_class: the expression class to instantiate.
973            comments: an optional list of comments to attach to the expression.
974            kwargs: the arguments to set for the expression along with their respective values.
975
976        Returns:
977            The target expression.
978        """
979        instance = exp_class(**kwargs)
980        instance.add_comments(comments) if comments else self._add_comments(instance)
981        self.validate_expression(instance)
982        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
 989    def validate_expression(
 990        self, expression: exp.Expression, args: t.Optional[t.List] = None
 991    ) -> None:
 992        """
 993        Validates an already instantiated expression, making sure that all its mandatory arguments
 994        are set.
 995
 996        Args:
 997            expression: the expression to validate.
 998            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 999        """
1000        if self.error_level == ErrorLevel.IGNORE:
1001            return
1002
1003        for error_message in expression.error_messages(args):
1004            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.