Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get
  10from sqlglot.tokens import Token, Tokenizer, TokenType
  11from sqlglot.trie import in_trie, new_trie
  12
  13logger = logging.getLogger("sqlglot")
  14
  15E = t.TypeVar("E", bound=exp.Expression)
  16
  17
  18def parse_var_map(args: t.Sequence) -> exp.Expression:
  19    if len(args) == 1 and args[0].is_star:
  20        return exp.StarMap(this=args[0])
  21
  22    keys = []
  23    values = []
  24    for i in range(0, len(args), 2):
  25        keys.append(args[i])
  26        values.append(args[i + 1])
  27    return exp.VarMap(
  28        keys=exp.Array(expressions=keys),
  29        values=exp.Array(expressions=values),
  30    )
  31
  32
  33def parse_like(args):
  34    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  35    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  36
  37
  38def binary_range_parser(
  39    expr_type: t.Type[exp.Expression],
  40) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  41    return lambda self, this: self._parse_escape(
  42        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  43    )
  44
  45
  46class _Parser(type):
  47    def __new__(cls, clsname, bases, attrs):
  48        klass = super().__new__(cls, clsname, bases, attrs)
  49        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  50        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  51
  52        return klass
  53
  54
  55class Parser(metaclass=_Parser):
  56    """
  57    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  58    a parsed syntax tree.
  59
  60    Args:
  61        error_level: the desired error level.
  62            Default: ErrorLevel.RAISE
  63        error_message_context: determines the amount of context to capture from a
  64            query string when displaying the error message (in number of characters).
  65            Default: 50.
  66        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  67            Default: 0
  68        alias_post_tablesample: If the table alias comes after tablesample.
  69            Default: False
  70        max_errors: Maximum number of error messages to include in a raised ParseError.
  71            This is only relevant if error_level is ErrorLevel.RAISE.
  72            Default: 3
  73        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  74            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  75            Default: "nulls_are_small"
  76    """
  77
  78    FUNCTIONS: t.Dict[str, t.Callable] = {
  79        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  80        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  81            this=seq_get(args, 0),
  82            to=exp.DataType(this=exp.DataType.Type.TEXT),
  83        ),
  84        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  85        "IFNULL": exp.Coalesce.from_arg_list,
  86        "LIKE": parse_like,
  87        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  88            this=seq_get(args, 0),
  89            to=exp.DataType(this=exp.DataType.Type.TEXT),
  90        ),
  91        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  92            this=exp.Cast(
  93                this=seq_get(args, 0),
  94                to=exp.DataType(this=exp.DataType.Type.TEXT),
  95            ),
  96            start=exp.Literal.number(1),
  97            length=exp.Literal.number(10),
  98        ),
  99        "VAR_MAP": parse_var_map,
 100    }
 101
 102    NO_PAREN_FUNCTIONS = {
 103        TokenType.CURRENT_DATE: exp.CurrentDate,
 104        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 105        TokenType.CURRENT_TIME: exp.CurrentTime,
 106        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 107        TokenType.CURRENT_USER: exp.CurrentUser,
 108    }
 109
 110    JOIN_HINTS: t.Set[str] = set()
 111
 112    NESTED_TYPE_TOKENS = {
 113        TokenType.ARRAY,
 114        TokenType.MAP,
 115        TokenType.NULLABLE,
 116        TokenType.STRUCT,
 117    }
 118
 119    TYPE_TOKENS = {
 120        TokenType.BIT,
 121        TokenType.BOOLEAN,
 122        TokenType.TINYINT,
 123        TokenType.UTINYINT,
 124        TokenType.SMALLINT,
 125        TokenType.USMALLINT,
 126        TokenType.INT,
 127        TokenType.UINT,
 128        TokenType.BIGINT,
 129        TokenType.UBIGINT,
 130        TokenType.INT128,
 131        TokenType.UINT128,
 132        TokenType.INT256,
 133        TokenType.UINT256,
 134        TokenType.FLOAT,
 135        TokenType.DOUBLE,
 136        TokenType.CHAR,
 137        TokenType.NCHAR,
 138        TokenType.VARCHAR,
 139        TokenType.NVARCHAR,
 140        TokenType.TEXT,
 141        TokenType.MEDIUMTEXT,
 142        TokenType.LONGTEXT,
 143        TokenType.MEDIUMBLOB,
 144        TokenType.LONGBLOB,
 145        TokenType.BINARY,
 146        TokenType.VARBINARY,
 147        TokenType.JSON,
 148        TokenType.JSONB,
 149        TokenType.INTERVAL,
 150        TokenType.TIME,
 151        TokenType.TIMESTAMP,
 152        TokenType.TIMESTAMPTZ,
 153        TokenType.TIMESTAMPLTZ,
 154        TokenType.DATETIME,
 155        TokenType.DATETIME64,
 156        TokenType.DATE,
 157        TokenType.DECIMAL,
 158        TokenType.BIGDECIMAL,
 159        TokenType.UUID,
 160        TokenType.GEOGRAPHY,
 161        TokenType.GEOMETRY,
 162        TokenType.HLLSKETCH,
 163        TokenType.HSTORE,
 164        TokenType.PSEUDO_TYPE,
 165        TokenType.SUPER,
 166        TokenType.SERIAL,
 167        TokenType.SMALLSERIAL,
 168        TokenType.BIGSERIAL,
 169        TokenType.XML,
 170        TokenType.UNIQUEIDENTIFIER,
 171        TokenType.MONEY,
 172        TokenType.SMALLMONEY,
 173        TokenType.ROWVERSION,
 174        TokenType.IMAGE,
 175        TokenType.VARIANT,
 176        TokenType.OBJECT,
 177        TokenType.INET,
 178        *NESTED_TYPE_TOKENS,
 179    }
 180
 181    SUBQUERY_PREDICATES = {
 182        TokenType.ANY: exp.Any,
 183        TokenType.ALL: exp.All,
 184        TokenType.EXISTS: exp.Exists,
 185        TokenType.SOME: exp.Any,
 186    }
 187
 188    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 189
 190    DB_CREATABLES = {
 191        TokenType.DATABASE,
 192        TokenType.SCHEMA,
 193        TokenType.TABLE,
 194        TokenType.VIEW,
 195    }
 196
 197    CREATABLES = {
 198        TokenType.COLUMN,
 199        TokenType.FUNCTION,
 200        TokenType.INDEX,
 201        TokenType.PROCEDURE,
 202        *DB_CREATABLES,
 203    }
 204
 205    ID_VAR_TOKENS = {
 206        TokenType.VAR,
 207        TokenType.ANTI,
 208        TokenType.APPLY,
 209        TokenType.ASC,
 210        TokenType.AUTO_INCREMENT,
 211        TokenType.BEGIN,
 212        TokenType.CACHE,
 213        TokenType.COLLATE,
 214        TokenType.COMMAND,
 215        TokenType.COMMENT,
 216        TokenType.COMMIT,
 217        TokenType.CONSTRAINT,
 218        TokenType.DEFAULT,
 219        TokenType.DELETE,
 220        TokenType.DESC,
 221        TokenType.DESCRIBE,
 222        TokenType.DIV,
 223        TokenType.END,
 224        TokenType.EXECUTE,
 225        TokenType.ESCAPE,
 226        TokenType.FALSE,
 227        TokenType.FIRST,
 228        TokenType.FILTER,
 229        TokenType.FORMAT,
 230        TokenType.FULL,
 231        TokenType.IF,
 232        TokenType.IS,
 233        TokenType.ISNULL,
 234        TokenType.INTERVAL,
 235        TokenType.KEEP,
 236        TokenType.LEFT,
 237        TokenType.LOAD,
 238        TokenType.MERGE,
 239        TokenType.NATURAL,
 240        TokenType.NEXT,
 241        TokenType.OFFSET,
 242        TokenType.ORDINALITY,
 243        TokenType.OVERWRITE,
 244        TokenType.PARTITION,
 245        TokenType.PERCENT,
 246        TokenType.PIVOT,
 247        TokenType.PRAGMA,
 248        TokenType.RANGE,
 249        TokenType.REFERENCES,
 250        TokenType.RIGHT,
 251        TokenType.ROW,
 252        TokenType.ROWS,
 253        TokenType.SEMI,
 254        TokenType.SET,
 255        TokenType.SETTINGS,
 256        TokenType.SHOW,
 257        TokenType.TEMPORARY,
 258        TokenType.TOP,
 259        TokenType.TRUE,
 260        TokenType.UNIQUE,
 261        TokenType.UNPIVOT,
 262        TokenType.VOLATILE,
 263        TokenType.WINDOW,
 264        *CREATABLES,
 265        *SUBQUERY_PREDICATES,
 266        *TYPE_TOKENS,
 267        *NO_PAREN_FUNCTIONS,
 268    }
 269
 270    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 271
 272    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 273        TokenType.APPLY,
 274        TokenType.FULL,
 275        TokenType.LEFT,
 276        TokenType.LOCK,
 277        TokenType.NATURAL,
 278        TokenType.OFFSET,
 279        TokenType.RIGHT,
 280        TokenType.WINDOW,
 281    }
 282
 283    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 284
 285    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 286
 287    TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
 288
 289    FUNC_TOKENS = {
 290        TokenType.COMMAND,
 291        TokenType.CURRENT_DATE,
 292        TokenType.CURRENT_DATETIME,
 293        TokenType.CURRENT_TIMESTAMP,
 294        TokenType.CURRENT_TIME,
 295        TokenType.CURRENT_USER,
 296        TokenType.FILTER,
 297        TokenType.FIRST,
 298        TokenType.FORMAT,
 299        TokenType.GLOB,
 300        TokenType.IDENTIFIER,
 301        TokenType.INDEX,
 302        TokenType.ISNULL,
 303        TokenType.ILIKE,
 304        TokenType.LIKE,
 305        TokenType.MERGE,
 306        TokenType.OFFSET,
 307        TokenType.PRIMARY_KEY,
 308        TokenType.RANGE,
 309        TokenType.REPLACE,
 310        TokenType.ROW,
 311        TokenType.UNNEST,
 312        TokenType.VAR,
 313        TokenType.LEFT,
 314        TokenType.RIGHT,
 315        TokenType.DATE,
 316        TokenType.DATETIME,
 317        TokenType.TABLE,
 318        TokenType.TIMESTAMP,
 319        TokenType.TIMESTAMPTZ,
 320        TokenType.WINDOW,
 321        *TYPE_TOKENS,
 322        *SUBQUERY_PREDICATES,
 323    }
 324
 325    CONJUNCTION = {
 326        TokenType.AND: exp.And,
 327        TokenType.OR: exp.Or,
 328    }
 329
 330    EQUALITY = {
 331        TokenType.EQ: exp.EQ,
 332        TokenType.NEQ: exp.NEQ,
 333        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 334    }
 335
 336    COMPARISON = {
 337        TokenType.GT: exp.GT,
 338        TokenType.GTE: exp.GTE,
 339        TokenType.LT: exp.LT,
 340        TokenType.LTE: exp.LTE,
 341    }
 342
 343    BITWISE = {
 344        TokenType.AMP: exp.BitwiseAnd,
 345        TokenType.CARET: exp.BitwiseXor,
 346        TokenType.PIPE: exp.BitwiseOr,
 347        TokenType.DPIPE: exp.DPipe,
 348    }
 349
 350    TERM = {
 351        TokenType.DASH: exp.Sub,
 352        TokenType.PLUS: exp.Add,
 353        TokenType.MOD: exp.Mod,
 354        TokenType.COLLATE: exp.Collate,
 355    }
 356
 357    FACTOR = {
 358        TokenType.DIV: exp.IntDiv,
 359        TokenType.LR_ARROW: exp.Distance,
 360        TokenType.SLASH: exp.Div,
 361        TokenType.STAR: exp.Mul,
 362    }
 363
 364    TIMESTAMPS = {
 365        TokenType.TIME,
 366        TokenType.TIMESTAMP,
 367        TokenType.TIMESTAMPTZ,
 368        TokenType.TIMESTAMPLTZ,
 369    }
 370
 371    SET_OPERATIONS = {
 372        TokenType.UNION,
 373        TokenType.INTERSECT,
 374        TokenType.EXCEPT,
 375    }
 376
 377    JOIN_SIDES = {
 378        TokenType.LEFT,
 379        TokenType.RIGHT,
 380        TokenType.FULL,
 381    }
 382
 383    JOIN_KINDS = {
 384        TokenType.INNER,
 385        TokenType.OUTER,
 386        TokenType.CROSS,
 387        TokenType.SEMI,
 388        TokenType.ANTI,
 389    }
 390
 391    LAMBDAS = {
 392        TokenType.ARROW: lambda self, expressions: self.expression(
 393            exp.Lambda,
 394            this=self._replace_lambda(
 395                self._parse_conjunction(),
 396                {node.name for node in expressions},
 397            ),
 398            expressions=expressions,
 399        ),
 400        TokenType.FARROW: lambda self, expressions: self.expression(
 401            exp.Kwarg,
 402            this=exp.Var(this=expressions[0].name),
 403            expression=self._parse_conjunction(),
 404        ),
 405    }
 406
 407    COLUMN_OPERATORS = {
 408        TokenType.DOT: None,
 409        TokenType.DCOLON: lambda self, this, to: self.expression(
 410            exp.Cast if self.STRICT_CAST else exp.TryCast,
 411            this=this,
 412            to=to,
 413        ),
 414        TokenType.ARROW: lambda self, this, path: self.expression(
 415            exp.JSONExtract,
 416            this=this,
 417            expression=path,
 418        ),
 419        TokenType.DARROW: lambda self, this, path: self.expression(
 420            exp.JSONExtractScalar,
 421            this=this,
 422            expression=path,
 423        ),
 424        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 425            exp.JSONBExtract,
 426            this=this,
 427            expression=path,
 428        ),
 429        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 430            exp.JSONBExtractScalar,
 431            this=this,
 432            expression=path,
 433        ),
 434        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 435            exp.JSONBContains,
 436            this=this,
 437            expression=key,
 438        ),
 439    }
 440
 441    EXPRESSION_PARSERS = {
 442        exp.Column: lambda self: self._parse_column(),
 443        exp.DataType: lambda self: self._parse_types(),
 444        exp.From: lambda self: self._parse_from(),
 445        exp.Group: lambda self: self._parse_group(),
 446        exp.Identifier: lambda self: self._parse_id_var(),
 447        exp.Lateral: lambda self: self._parse_lateral(),
 448        exp.Join: lambda self: self._parse_join(),
 449        exp.Order: lambda self: self._parse_order(),
 450        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"),
 451        exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"),
 452        exp.Lambda: lambda self: self._parse_lambda(),
 453        exp.Limit: lambda self: self._parse_limit(),
 454        exp.Offset: lambda self: self._parse_offset(),
 455        exp.TableAlias: lambda self: self._parse_table_alias(),
 456        exp.Table: lambda self: self._parse_table_parts(),
 457        exp.Condition: lambda self: self._parse_conjunction(),
 458        exp.Expression: lambda self: self._parse_statement(),
 459        exp.Properties: lambda self: self._parse_properties(),
 460        exp.Where: lambda self: self._parse_where(),
 461        exp.Ordered: lambda self: self._parse_ordered(),
 462        exp.Having: lambda self: self._parse_having(),
 463        exp.With: lambda self: self._parse_with(),
 464        exp.Window: lambda self: self._parse_named_window(),
 465        exp.Qualify: lambda self: self._parse_qualify(),
 466        exp.Returning: lambda self: self._parse_returning(),
 467        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 468    }
 469
 470    STATEMENT_PARSERS = {
 471        TokenType.ALTER: lambda self: self._parse_alter(),
 472        TokenType.BEGIN: lambda self: self._parse_transaction(),
 473        TokenType.CACHE: lambda self: self._parse_cache(),
 474        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 475        TokenType.COMMENT: lambda self: self._parse_comment(),
 476        TokenType.CREATE: lambda self: self._parse_create(),
 477        TokenType.DELETE: lambda self: self._parse_delete(),
 478        TokenType.DESC: lambda self: self._parse_describe(),
 479        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 480        TokenType.DROP: lambda self: self._parse_drop(),
 481        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 482        TokenType.INSERT: lambda self: self._parse_insert(),
 483        TokenType.LOAD: lambda self: self._parse_load(),
 484        TokenType.MERGE: lambda self: self._parse_merge(),
 485        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 486        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 487        TokenType.SET: lambda self: self._parse_set(),
 488        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 489        TokenType.UPDATE: lambda self: self._parse_update(),
 490        TokenType.USE: lambda self: self.expression(
 491            exp.Use,
 492            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 493            and exp.Var(this=self._prev.text),
 494            this=self._parse_table(schema=False),
 495        ),
 496    }
 497
 498    UNARY_PARSERS = {
 499        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 500        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 501        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 502        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 503    }
 504
 505    PRIMARY_PARSERS = {
 506        TokenType.STRING: lambda self, token: self.expression(
 507            exp.Literal, this=token.text, is_string=True
 508        ),
 509        TokenType.NUMBER: lambda self, token: self.expression(
 510            exp.Literal, this=token.text, is_string=False
 511        ),
 512        TokenType.STAR: lambda self, _: self.expression(
 513            exp.Star,
 514            **{"except": self._parse_except(), "replace": self._parse_replace()},
 515        ),
 516        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 517        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 518        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 519        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 520        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 521        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 522        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 523        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 524        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 525    }
 526
 527    PLACEHOLDER_PARSERS = {
 528        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 529        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 530        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 531        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 532        else None,
 533    }
 534
 535    RANGE_PARSERS = {
 536        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 537        TokenType.GLOB: binary_range_parser(exp.Glob),
 538        TokenType.ILIKE: binary_range_parser(exp.ILike),
 539        TokenType.IN: lambda self, this: self._parse_in(this),
 540        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 541        TokenType.IS: lambda self, this: self._parse_is(this),
 542        TokenType.LIKE: binary_range_parser(exp.Like),
 543        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 544        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 545        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 546    }
 547
 548    PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
 549        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 550        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 551        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 552        "CHARACTER SET": lambda self: self._parse_character_set(),
 553        "CHECKSUM": lambda self: self._parse_checksum(),
 554        "CLUSTER": lambda self: self._parse_cluster(),
 555        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 556        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 557        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 558        "DEFINER": lambda self: self._parse_definer(),
 559        "DETERMINISTIC": lambda self: self.expression(
 560            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 561        ),
 562        "DISTKEY": lambda self: self._parse_distkey(),
 563        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 564        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 565        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 566        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 567        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 568        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 569        "FREESPACE": lambda self: self._parse_freespace(),
 570        "IMMUTABLE": lambda self: self.expression(
 571            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 572        ),
 573        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 574        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 575        "LIKE": lambda self: self._parse_create_like(),
 576        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 577        "LOCK": lambda self: self._parse_locking(),
 578        "LOCKING": lambda self: self._parse_locking(),
 579        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 580        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 581        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 582        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 583        "NO": lambda self: self._parse_no_property(),
 584        "ON": lambda self: self._parse_on_property(),
 585        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 586        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 587        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 588        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 589        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 590        "RETURNS": lambda self: self._parse_returns(),
 591        "ROW": lambda self: self._parse_row(),
 592        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 593        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 594        "SETTINGS": lambda self: self.expression(
 595            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 596        ),
 597        "SORTKEY": lambda self: self._parse_sortkey(),
 598        "STABLE": lambda self: self.expression(
 599            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 600        ),
 601        "STORED": lambda self: self._parse_stored(),
 602        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 603        "TEMP": lambda self: self.expression(exp.TemporaryProperty),
 604        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
 605        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 606        "TTL": lambda self: self._parse_ttl(),
 607        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 608        "VOLATILE": lambda self: self._parse_volatile_property(),
 609        "WITH": lambda self: self._parse_with_property(),
 610    }
 611
 612    CONSTRAINT_PARSERS = {
 613        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 614        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 615        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 616        "CHARACTER SET": lambda self: self.expression(
 617            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 618        ),
 619        "CHECK": lambda self: self.expression(
 620            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 621        ),
 622        "COLLATE": lambda self: self.expression(
 623            exp.CollateColumnConstraint, this=self._parse_var()
 624        ),
 625        "COMMENT": lambda self: self.expression(
 626            exp.CommentColumnConstraint, this=self._parse_string()
 627        ),
 628        "COMPRESS": lambda self: self._parse_compress(),
 629        "DEFAULT": lambda self: self.expression(
 630            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 631        ),
 632        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 633        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 634        "FORMAT": lambda self: self.expression(
 635            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 636        ),
 637        "GENERATED": lambda self: self._parse_generated_as_identity(),
 638        "IDENTITY": lambda self: self._parse_auto_increment(),
 639        "INLINE": lambda self: self._parse_inline(),
 640        "LIKE": lambda self: self._parse_create_like(),
 641        "NOT": lambda self: self._parse_not_constraint(),
 642        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 643        "ON": lambda self: self._match(TokenType.UPDATE)
 644        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 645        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 646        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 647        "REFERENCES": lambda self: self._parse_references(match=False),
 648        "TITLE": lambda self: self.expression(
 649            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 650        ),
 651        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 652        "UNIQUE": lambda self: self._parse_unique(),
 653        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 654    }
 655
 656    ALTER_PARSERS = {
 657        "ADD": lambda self: self._parse_alter_table_add(),
 658        "ALTER": lambda self: self._parse_alter_table_alter(),
 659        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 660        "DROP": lambda self: self._parse_alter_table_drop(),
 661        "RENAME": lambda self: self._parse_alter_table_rename(),
 662    }
 663
 664    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 665
 666    NO_PAREN_FUNCTION_PARSERS = {
 667        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 668        TokenType.CASE: lambda self: self._parse_case(),
 669        TokenType.IF: lambda self: self._parse_if(),
 670        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 671            exp.NextValueFor,
 672            this=self._parse_column(),
 673            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 674        ),
 675    }
 676
 677    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 678        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 679        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 680        "DECODE": lambda self: self._parse_decode(),
 681        "EXTRACT": lambda self: self._parse_extract(),
 682        "JSON_OBJECT": lambda self: self._parse_json_object(),
 683        "LOG": lambda self: self._parse_logarithm(),
 684        "MATCH": lambda self: self._parse_match_against(),
 685        "OPENJSON": lambda self: self._parse_open_json(),
 686        "POSITION": lambda self: self._parse_position(),
 687        "STRING_AGG": lambda self: self._parse_string_agg(),
 688        "SUBSTRING": lambda self: self._parse_substring(),
 689        "STRUCT": lambda self: self._parse_struct(),
 690        "TRIM": lambda self: self._parse_trim(),
 691        "TRY_CAST": lambda self: self._parse_cast(False),
 692        "TRY_CONVERT": lambda self: self._parse_convert(False),
 693    }
 694
 695    QUERY_MODIFIER_PARSERS = {
 696        "joins": lambda self: list(iter(self._parse_join, None)),
 697        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 698        "match": lambda self: self._parse_match_recognize(),
 699        "where": lambda self: self._parse_where(),
 700        "group": lambda self: self._parse_group(),
 701        "having": lambda self: self._parse_having(),
 702        "qualify": lambda self: self._parse_qualify(),
 703        "windows": lambda self: self._parse_window_clause(),
 704        "order": lambda self: self._parse_order(),
 705        "limit": lambda self: self._parse_limit(),
 706        "offset": lambda self: self._parse_offset(),
 707        "locks": lambda self: self._parse_locks(),
 708        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 709    }
 710
 711    SET_PARSERS = {
 712        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 713        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 714        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 715        "TRANSACTION": lambda self: self._parse_set_transaction(),
 716    }
 717
 718    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 719
 720    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 721
 722    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 723
 724    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 725
 726    TRANSACTION_CHARACTERISTICS = {
 727        "ISOLATION LEVEL REPEATABLE READ",
 728        "ISOLATION LEVEL READ COMMITTED",
 729        "ISOLATION LEVEL READ UNCOMMITTED",
 730        "ISOLATION LEVEL SERIALIZABLE",
 731        "READ WRITE",
 732        "READ ONLY",
 733    }
 734
 735    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 736
 737    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 738
 739    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 740    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 741
 742    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 743
 744    STRICT_CAST = True
 745
 746    CONVERT_TYPE_FIRST = False
 747
 748    PREFIXED_PIVOT_COLUMNS = False
 749    IDENTIFY_PIVOT_STRINGS = False
 750
 751    LOG_BASE_FIRST = True
 752    LOG_DEFAULTS_TO_LN = False
 753
 754    __slots__ = (
 755        "error_level",
 756        "error_message_context",
 757        "sql",
 758        "errors",
 759        "index_offset",
 760        "unnest_column_only",
 761        "alias_post_tablesample",
 762        "max_errors",
 763        "null_ordering",
 764        "_tokens",
 765        "_index",
 766        "_curr",
 767        "_next",
 768        "_prev",
 769        "_prev_comments",
 770        "_show_trie",
 771        "_set_trie",
 772    )
 773
 774    def __init__(
 775        self,
 776        error_level: t.Optional[ErrorLevel] = None,
 777        error_message_context: int = 100,
 778        index_offset: int = 0,
 779        unnest_column_only: bool = False,
 780        alias_post_tablesample: bool = False,
 781        max_errors: int = 3,
 782        null_ordering: t.Optional[str] = None,
 783    ):
 784        self.error_level = error_level or ErrorLevel.IMMEDIATE
 785        self.error_message_context = error_message_context
 786        self.index_offset = index_offset
 787        self.unnest_column_only = unnest_column_only
 788        self.alias_post_tablesample = alias_post_tablesample
 789        self.max_errors = max_errors
 790        self.null_ordering = null_ordering
 791        self.reset()
 792
 793    def reset(self):
 794        self.sql = ""
 795        self.errors = []
 796        self._tokens = []
 797        self._index = 0
 798        self._curr = None
 799        self._next = None
 800        self._prev = None
 801        self._prev_comments = None
 802
 803    def parse(
 804        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 805    ) -> t.List[t.Optional[exp.Expression]]:
 806        """
 807        Parses a list of tokens and returns a list of syntax trees, one tree
 808        per parsed SQL statement.
 809
 810        Args:
 811            raw_tokens: the list of tokens.
 812            sql: the original SQL string, used to produce helpful debug messages.
 813
 814        Returns:
 815            The list of syntax trees.
 816        """
 817        return self._parse(
 818            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 819        )
 820
 821    def parse_into(
 822        self,
 823        expression_types: exp.IntoType,
 824        raw_tokens: t.List[Token],
 825        sql: t.Optional[str] = None,
 826    ) -> t.List[t.Optional[exp.Expression]]:
 827        """
 828        Parses a list of tokens into a given Expression type. If a collection of Expression
 829        types is given instead, this method will try to parse the token list into each one
 830        of them, stopping at the first for which the parsing succeeds.
 831
 832        Args:
 833            expression_types: the expression type(s) to try and parse the token list into.
 834            raw_tokens: the list of tokens.
 835            sql: the original SQL string, used to produce helpful debug messages.
 836
 837        Returns:
 838            The target Expression.
 839        """
 840        errors = []
 841        for expression_type in ensure_collection(expression_types):
 842            parser = self.EXPRESSION_PARSERS.get(expression_type)
 843            if not parser:
 844                raise TypeError(f"No parser registered for {expression_type}")
 845            try:
 846                return self._parse(parser, raw_tokens, sql)
 847            except ParseError as e:
 848                e.errors[0]["into_expression"] = expression_type
 849                errors.append(e)
 850        raise ParseError(
 851            f"Failed to parse into {expression_types}",
 852            errors=merge_errors(errors),
 853        ) from errors[-1]
 854
 855    def _parse(
 856        self,
 857        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 858        raw_tokens: t.List[Token],
 859        sql: t.Optional[str] = None,
 860    ) -> t.List[t.Optional[exp.Expression]]:
 861        self.reset()
 862        self.sql = sql or ""
 863        total = len(raw_tokens)
 864        chunks: t.List[t.List[Token]] = [[]]
 865
 866        for i, token in enumerate(raw_tokens):
 867            if token.token_type == TokenType.SEMICOLON:
 868                if i < total - 1:
 869                    chunks.append([])
 870            else:
 871                chunks[-1].append(token)
 872
 873        expressions = []
 874
 875        for tokens in chunks:
 876            self._index = -1
 877            self._tokens = tokens
 878            self._advance()
 879
 880            expressions.append(parse_method(self))
 881
 882            if self._index < len(self._tokens):
 883                self.raise_error("Invalid expression / Unexpected token")
 884
 885            self.check_errors()
 886
 887        return expressions
 888
 889    def check_errors(self) -> None:
 890        """
 891        Logs or raises any found errors, depending on the chosen error level setting.
 892        """
 893        if self.error_level == ErrorLevel.WARN:
 894            for error in self.errors:
 895                logger.error(str(error))
 896        elif self.error_level == ErrorLevel.RAISE and self.errors:
 897            raise ParseError(
 898                concat_messages(self.errors, self.max_errors),
 899                errors=merge_errors(self.errors),
 900            )
 901
 902    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 903        """
 904        Appends an error in the list of recorded errors or raises it, depending on the chosen
 905        error level setting.
 906        """
 907        token = token or self._curr or self._prev or Token.string("")
 908        start = token.start
 909        end = token.end + 1
 910        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 911        highlight = self.sql[start:end]
 912        end_context = self.sql[end : end + self.error_message_context]
 913
 914        error = ParseError.new(
 915            f"{message}. Line {token.line}, Col: {token.col}.\n"
 916            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 917            description=message,
 918            line=token.line,
 919            col=token.col,
 920            start_context=start_context,
 921            highlight=highlight,
 922            end_context=end_context,
 923        )
 924
 925        if self.error_level == ErrorLevel.IMMEDIATE:
 926            raise error
 927
 928        self.errors.append(error)
 929
 930    def expression(
 931        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 932    ) -> E:
 933        """
 934        Creates a new, validated Expression.
 935
 936        Args:
 937            exp_class: the expression class to instantiate.
 938            comments: an optional list of comments to attach to the expression.
 939            kwargs: the arguments to set for the expression along with their respective values.
 940
 941        Returns:
 942            The target expression.
 943        """
 944        instance = exp_class(**kwargs)
 945        instance.add_comments(comments) if comments else self._add_comments(instance)
 946        self.validate_expression(instance)
 947        return instance
 948
 949    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 950        if expression and self._prev_comments:
 951            expression.add_comments(self._prev_comments)
 952            self._prev_comments = None
 953
 954    def validate_expression(
 955        self, expression: exp.Expression, args: t.Optional[t.List] = None
 956    ) -> None:
 957        """
 958        Validates an already instantiated expression, making sure that all its mandatory arguments
 959        are set.
 960
 961        Args:
 962            expression: the expression to validate.
 963            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 964        """
 965        if self.error_level == ErrorLevel.IGNORE:
 966            return
 967
 968        for error_message in expression.error_messages(args):
 969            self.raise_error(error_message)
 970
 971    def _find_sql(self, start: Token, end: Token) -> str:
 972        return self.sql[start.start : end.end + 1]
 973
 974    def _advance(self, times: int = 1) -> None:
 975        self._index += times
 976        self._curr = seq_get(self._tokens, self._index)
 977        self._next = seq_get(self._tokens, self._index + 1)
 978        if self._index > 0:
 979            self._prev = self._tokens[self._index - 1]
 980            self._prev_comments = self._prev.comments
 981        else:
 982            self._prev = None
 983            self._prev_comments = None
 984
 985    def _retreat(self, index: int) -> None:
 986        if index != self._index:
 987            self._advance(index - self._index)
 988
 989    def _parse_command(self) -> exp.Command:
 990        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 991
 992    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
 993        start = self._prev
 994        exists = self._parse_exists() if allow_exists else None
 995
 996        self._match(TokenType.ON)
 997
 998        kind = self._match_set(self.CREATABLES) and self._prev
 999
1000        if not kind:
1001            return self._parse_as_command(start)
1002
1003        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1004            this = self._parse_user_defined_function(kind=kind.token_type)
1005        elif kind.token_type == TokenType.TABLE:
1006            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1007        elif kind.token_type == TokenType.COLUMN:
1008            this = self._parse_column()
1009        else:
1010            this = self._parse_id_var()
1011
1012        self._match(TokenType.IS)
1013
1014        return self.expression(
1015            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1016        )
1017
1018    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1019    def _parse_ttl(self) -> exp.Expression:
1020        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1021            this = self._parse_bitwise()
1022
1023            if self._match_text_seq("DELETE"):
1024                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1025            if self._match_text_seq("RECOMPRESS"):
1026                return self.expression(
1027                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1028                )
1029            if self._match_text_seq("TO", "DISK"):
1030                return self.expression(
1031                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1032                )
1033            if self._match_text_seq("TO", "VOLUME"):
1034                return self.expression(
1035                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1036                )
1037
1038            return this
1039
1040        expressions = self._parse_csv(_parse_ttl_action)
1041        where = self._parse_where()
1042        group = self._parse_group()
1043
1044        aggregates = None
1045        if group and self._match(TokenType.SET):
1046            aggregates = self._parse_csv(self._parse_set_item)
1047
1048        return self.expression(
1049            exp.MergeTreeTTL,
1050            expressions=expressions,
1051            where=where,
1052            group=group,
1053            aggregates=aggregates,
1054        )
1055
1056    def _parse_statement(self) -> t.Optional[exp.Expression]:
1057        if self._curr is None:
1058            return None
1059
1060        if self._match_set(self.STATEMENT_PARSERS):
1061            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1062
1063        if self._match_set(Tokenizer.COMMANDS):
1064            return self._parse_command()
1065
1066        expression = self._parse_expression()
1067        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1068        return self._parse_query_modifiers(expression)
1069
1070    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1071        start = self._prev
1072        temporary = self._match(TokenType.TEMPORARY)
1073        materialized = self._match_text_seq("MATERIALIZED")
1074        kind = self._match_set(self.CREATABLES) and self._prev.text
1075        if not kind:
1076            return self._parse_as_command(start)
1077
1078        return self.expression(
1079            exp.Drop,
1080            exists=self._parse_exists(),
1081            this=self._parse_table(schema=True),
1082            kind=kind,
1083            temporary=temporary,
1084            materialized=materialized,
1085            cascade=self._match_text_seq("CASCADE"),
1086            constraints=self._match_text_seq("CONSTRAINTS"),
1087            purge=self._match_text_seq("PURGE"),
1088        )
1089
1090    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1091        return (
1092            self._match(TokenType.IF)
1093            and (not not_ or self._match(TokenType.NOT))
1094            and self._match(TokenType.EXISTS)
1095        )
1096
1097    def _parse_create(self) -> t.Optional[exp.Expression]:
1098        start = self._prev
1099        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1100            TokenType.OR, TokenType.REPLACE
1101        )
1102        unique = self._match(TokenType.UNIQUE)
1103
1104        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1105            self._match(TokenType.TABLE)
1106
1107        properties = None
1108        create_token = self._match_set(self.CREATABLES) and self._prev
1109
1110        if not create_token:
1111            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1112            create_token = self._match_set(self.CREATABLES) and self._prev
1113
1114            if not properties or not create_token:
1115                return self._parse_as_command(start)
1116
1117        exists = self._parse_exists(not_=True)
1118        this = None
1119        expression = None
1120        indexes = None
1121        no_schema_binding = None
1122        begin = None
1123        clone = None
1124
1125        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1126            this = self._parse_user_defined_function(kind=create_token.token_type)
1127            temp_properties = self._parse_properties()
1128            if properties and temp_properties:
1129                properties.expressions.extend(temp_properties.expressions)
1130            elif temp_properties:
1131                properties = temp_properties
1132
1133            self._match(TokenType.ALIAS)
1134            begin = self._match(TokenType.BEGIN)
1135            return_ = self._match_text_seq("RETURN")
1136            expression = self._parse_statement()
1137
1138            if return_:
1139                expression = self.expression(exp.Return, this=expression)
1140        elif create_token.token_type == TokenType.INDEX:
1141            this = self._parse_index()
1142        elif create_token.token_type in self.DB_CREATABLES:
1143            table_parts = self._parse_table_parts(schema=True)
1144
1145            # exp.Properties.Location.POST_NAME
1146            if self._match(TokenType.COMMA):
1147                temp_properties = self._parse_properties(before=True)
1148                if properties and temp_properties:
1149                    properties.expressions.extend(temp_properties.expressions)
1150                elif temp_properties:
1151                    properties = temp_properties
1152
1153            this = self._parse_schema(this=table_parts)
1154
1155            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1156            temp_properties = self._parse_properties()
1157            if properties and temp_properties:
1158                properties.expressions.extend(temp_properties.expressions)
1159            elif temp_properties:
1160                properties = temp_properties
1161
1162            self._match(TokenType.ALIAS)
1163
1164            # exp.Properties.Location.POST_ALIAS
1165            if not (
1166                self._match(TokenType.SELECT, advance=False)
1167                or self._match(TokenType.WITH, advance=False)
1168                or self._match(TokenType.L_PAREN, advance=False)
1169            ):
1170                temp_properties = self._parse_properties()
1171                if properties and temp_properties:
1172                    properties.expressions.extend(temp_properties.expressions)
1173                elif temp_properties:
1174                    properties = temp_properties
1175
1176            expression = self._parse_ddl_select()
1177
1178            if create_token.token_type == TokenType.TABLE:
1179                indexes = []
1180                while True:
1181                    index = self._parse_create_table_index()
1182
1183                    # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX
1184                    temp_properties = self._parse_properties()
1185                    if properties and temp_properties:
1186                        properties.expressions.extend(temp_properties.expressions)
1187                    elif temp_properties:
1188                        properties = temp_properties
1189
1190                    if not index:
1191                        break
1192                    else:
1193                        self._match(TokenType.COMMA)
1194                        indexes.append(index)
1195            elif create_token.token_type == TokenType.VIEW:
1196                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1197                    no_schema_binding = True
1198
1199            if self._match_text_seq("CLONE"):
1200                clone = self._parse_table(schema=True)
1201                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1202                clone_kind = (
1203                    self._match(TokenType.L_PAREN)
1204                    and self._match_texts(self.CLONE_KINDS)
1205                    and self._prev.text.upper()
1206                )
1207                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1208                self._match(TokenType.R_PAREN)
1209                clone = self.expression(
1210                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1211                )
1212
1213        return self.expression(
1214            exp.Create,
1215            this=this,
1216            kind=create_token.text,
1217            replace=replace,
1218            unique=unique,
1219            expression=expression,
1220            exists=exists,
1221            properties=properties,
1222            indexes=indexes,
1223            no_schema_binding=no_schema_binding,
1224            begin=begin,
1225            clone=clone,
1226        )
1227
1228    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1229        # only used for teradata currently
1230        self._match(TokenType.COMMA)
1231
1232        kwargs = {
1233            "no": self._match_text_seq("NO"),
1234            "dual": self._match_text_seq("DUAL"),
1235            "before": self._match_text_seq("BEFORE"),
1236            "default": self._match_text_seq("DEFAULT"),
1237            "local": (self._match_text_seq("LOCAL") and "LOCAL")
1238            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1239            "after": self._match_text_seq("AFTER"),
1240            "minimum": self._match_texts(("MIN", "MINIMUM")),
1241            "maximum": self._match_texts(("MAX", "MAXIMUM")),
1242        }
1243
1244        if self._match_texts(self.PROPERTY_PARSERS):
1245            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1246            try:
1247                return parser(self, **{k: v for k, v in kwargs.items() if v})
1248            except TypeError:
1249                self.raise_error(f"Cannot parse property '{self._prev.text}'")
1250
1251        return None
1252
1253    def _parse_property(self) -> t.Optional[exp.Expression]:
1254        if self._match_texts(self.PROPERTY_PARSERS):
1255            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1256
1257        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1258            return self._parse_character_set(default=True)
1259
1260        if self._match_text_seq("COMPOUND", "SORTKEY"):
1261            return self._parse_sortkey(compound=True)
1262
1263        if self._match_text_seq("SQL", "SECURITY"):
1264            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1265
1266        assignment = self._match_pair(
1267            TokenType.VAR, TokenType.EQ, advance=False
1268        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1269
1270        if assignment:
1271            key = self._parse_var_or_string()
1272            self._match(TokenType.EQ)
1273            return self.expression(exp.Property, this=key, value=self._parse_column())
1274
1275        return None
1276
1277    def _parse_stored(self) -> exp.Expression:
1278        self._match(TokenType.ALIAS)
1279
1280        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1281        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1282
1283        return self.expression(
1284            exp.FileFormatProperty,
1285            this=self.expression(
1286                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1287            )
1288            if input_format or output_format
1289            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1290        )
1291
1292    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1293        self._match(TokenType.EQ)
1294        self._match(TokenType.ALIAS)
1295        return self.expression(exp_class, this=self._parse_field())
1296
1297    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1298        properties = []
1299
1300        while True:
1301            if before:
1302                prop = self._parse_property_before()
1303            else:
1304                prop = self._parse_property()
1305
1306            if not prop:
1307                break
1308            for p in ensure_list(prop):
1309                properties.append(p)
1310
1311        if properties:
1312            return self.expression(exp.Properties, expressions=properties)
1313
1314        return None
1315
1316    def _parse_fallback(self, no: bool = False) -> exp.Expression:
1317        return self.expression(
1318            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1319        )
1320
1321    def _parse_volatile_property(self) -> exp.Expression:
1322        if self._index >= 2:
1323            pre_volatile_token = self._tokens[self._index - 2]
1324        else:
1325            pre_volatile_token = None
1326
1327        if pre_volatile_token and pre_volatile_token.token_type in (
1328            TokenType.CREATE,
1329            TokenType.REPLACE,
1330            TokenType.UNIQUE,
1331        ):
1332            return exp.VolatileProperty()
1333
1334        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1335
1336    def _parse_with_property(
1337        self,
1338    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1339        self._match(TokenType.WITH)
1340        if self._match(TokenType.L_PAREN, advance=False):
1341            return self._parse_wrapped_csv(self._parse_property)
1342
1343        if self._match_text_seq("JOURNAL"):
1344            return self._parse_withjournaltable()
1345
1346        if self._match_text_seq("DATA"):
1347            return self._parse_withdata(no=False)
1348        elif self._match_text_seq("NO", "DATA"):
1349            return self._parse_withdata(no=True)
1350
1351        if not self._next:
1352            return None
1353
1354        return self._parse_withisolatedloading()
1355
1356    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1357    def _parse_definer(self) -> t.Optional[exp.Expression]:
1358        self._match(TokenType.EQ)
1359
1360        user = self._parse_id_var()
1361        self._match(TokenType.PARAMETER)
1362        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1363
1364        if not user or not host:
1365            return None
1366
1367        return exp.DefinerProperty(this=f"{user}@{host}")
1368
1369    def _parse_withjournaltable(self) -> exp.Expression:
1370        self._match(TokenType.TABLE)
1371        self._match(TokenType.EQ)
1372        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1373
1374    def _parse_log(self, no: bool = False) -> exp.Expression:
1375        return self.expression(exp.LogProperty, no=no)
1376
1377    def _parse_journal(self, **kwargs) -> exp.Expression:
1378        return self.expression(exp.JournalProperty, **kwargs)
1379
1380    def _parse_checksum(self) -> exp.Expression:
1381        self._match(TokenType.EQ)
1382
1383        on = None
1384        if self._match(TokenType.ON):
1385            on = True
1386        elif self._match_text_seq("OFF"):
1387            on = False
1388        default = self._match(TokenType.DEFAULT)
1389
1390        return self.expression(
1391            exp.ChecksumProperty,
1392            on=on,
1393            default=default,
1394        )
1395
1396    def _parse_cluster(self) -> t.Optional[exp.Expression]:
1397        if not self._match_text_seq("BY"):
1398            self._retreat(self._index - 1)
1399            return None
1400        return self.expression(
1401            exp.Cluster,
1402            expressions=self._parse_csv(self._parse_ordered),
1403        )
1404
1405    def _parse_freespace(self) -> exp.Expression:
1406        self._match(TokenType.EQ)
1407        return self.expression(
1408            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1409        )
1410
1411    def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression:
1412        if self._match(TokenType.EQ):
1413            return self.expression(
1414                exp.MergeBlockRatioProperty,
1415                this=self._parse_number(),
1416                percent=self._match(TokenType.PERCENT),
1417            )
1418        return self.expression(
1419            exp.MergeBlockRatioProperty,
1420            no=no,
1421            default=default,
1422        )
1423
1424    def _parse_datablocksize(
1425        self,
1426        default: t.Optional[bool] = None,
1427        minimum: t.Optional[bool] = None,
1428        maximum: t.Optional[bool] = None,
1429    ) -> exp.Expression:
1430        self._match(TokenType.EQ)
1431        size = self._parse_number()
1432        units = None
1433        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1434            units = self._prev.text
1435        return self.expression(
1436            exp.DataBlocksizeProperty,
1437            size=size,
1438            units=units,
1439            default=default,
1440            minimum=minimum,
1441            maximum=maximum,
1442        )
1443
1444    def _parse_blockcompression(self) -> exp.Expression:
1445        self._match(TokenType.EQ)
1446        always = self._match_text_seq("ALWAYS")
1447        manual = self._match_text_seq("MANUAL")
1448        never = self._match_text_seq("NEVER")
1449        default = self._match_text_seq("DEFAULT")
1450        autotemp = None
1451        if self._match_text_seq("AUTOTEMP"):
1452            autotemp = self._parse_schema()
1453
1454        return self.expression(
1455            exp.BlockCompressionProperty,
1456            always=always,
1457            manual=manual,
1458            never=never,
1459            default=default,
1460            autotemp=autotemp,
1461        )
1462
1463    def _parse_withisolatedloading(self) -> exp.Expression:
1464        no = self._match_text_seq("NO")
1465        concurrent = self._match_text_seq("CONCURRENT")
1466        self._match_text_seq("ISOLATED", "LOADING")
1467        for_all = self._match_text_seq("FOR", "ALL")
1468        for_insert = self._match_text_seq("FOR", "INSERT")
1469        for_none = self._match_text_seq("FOR", "NONE")
1470        return self.expression(
1471            exp.IsolatedLoadingProperty,
1472            no=no,
1473            concurrent=concurrent,
1474            for_all=for_all,
1475            for_insert=for_insert,
1476            for_none=for_none,
1477        )
1478
1479    def _parse_locking(self) -> exp.Expression:
1480        if self._match(TokenType.TABLE):
1481            kind = "TABLE"
1482        elif self._match(TokenType.VIEW):
1483            kind = "VIEW"
1484        elif self._match(TokenType.ROW):
1485            kind = "ROW"
1486        elif self._match_text_seq("DATABASE"):
1487            kind = "DATABASE"
1488        else:
1489            kind = None
1490
1491        if kind in ("DATABASE", "TABLE", "VIEW"):
1492            this = self._parse_table_parts()
1493        else:
1494            this = None
1495
1496        if self._match(TokenType.FOR):
1497            for_or_in = "FOR"
1498        elif self._match(TokenType.IN):
1499            for_or_in = "IN"
1500        else:
1501            for_or_in = None
1502
1503        if self._match_text_seq("ACCESS"):
1504            lock_type = "ACCESS"
1505        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1506            lock_type = "EXCLUSIVE"
1507        elif self._match_text_seq("SHARE"):
1508            lock_type = "SHARE"
1509        elif self._match_text_seq("READ"):
1510            lock_type = "READ"
1511        elif self._match_text_seq("WRITE"):
1512            lock_type = "WRITE"
1513        elif self._match_text_seq("CHECKSUM"):
1514            lock_type = "CHECKSUM"
1515        else:
1516            lock_type = None
1517
1518        override = self._match_text_seq("OVERRIDE")
1519
1520        return self.expression(
1521            exp.LockingProperty,
1522            this=this,
1523            kind=kind,
1524            for_or_in=for_or_in,
1525            lock_type=lock_type,
1526            override=override,
1527        )
1528
1529    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1530        if self._match(TokenType.PARTITION_BY):
1531            return self._parse_csv(self._parse_conjunction)
1532        return []
1533
1534    def _parse_partitioned_by(self) -> exp.Expression:
1535        self._match(TokenType.EQ)
1536        return self.expression(
1537            exp.PartitionedByProperty,
1538            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1539        )
1540
1541    def _parse_withdata(self, no=False) -> exp.Expression:
1542        if self._match_text_seq("AND", "STATISTICS"):
1543            statistics = True
1544        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1545            statistics = False
1546        else:
1547            statistics = None
1548
1549        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1550
1551    def _parse_no_property(self) -> t.Optional[exp.Property]:
1552        if self._match_text_seq("PRIMARY", "INDEX"):
1553            return exp.NoPrimaryIndexProperty()
1554        return None
1555
1556    def _parse_on_property(self) -> t.Optional[exp.Property]:
1557        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1558            return exp.OnCommitProperty()
1559        elif self._match_text_seq("COMMIT", "DELETE", "ROWS"):
1560            return exp.OnCommitProperty(delete=True)
1561        return None
1562
1563    def _parse_distkey(self) -> exp.Expression:
1564        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1565
1566    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1567        table = self._parse_table(schema=True)
1568        options = []
1569        while self._match_texts(("INCLUDING", "EXCLUDING")):
1570            this = self._prev.text.upper()
1571            id_var = self._parse_id_var()
1572
1573            if not id_var:
1574                return None
1575
1576            options.append(
1577                self.expression(
1578                    exp.Property,
1579                    this=this,
1580                    value=exp.Var(this=id_var.this.upper()),
1581                )
1582            )
1583        return self.expression(exp.LikeProperty, this=table, expressions=options)
1584
1585    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1586        return self.expression(
1587            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1588        )
1589
1590    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1591        self._match(TokenType.EQ)
1592        return self.expression(
1593            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1594        )
1595
1596    def _parse_returns(self) -> exp.Expression:
1597        value: t.Optional[exp.Expression]
1598        is_table = self._match(TokenType.TABLE)
1599
1600        if is_table:
1601            if self._match(TokenType.LT):
1602                value = self.expression(
1603                    exp.Schema,
1604                    this="TABLE",
1605                    expressions=self._parse_csv(self._parse_struct_types),
1606                )
1607                if not self._match(TokenType.GT):
1608                    self.raise_error("Expecting >")
1609            else:
1610                value = self._parse_schema(exp.Var(this="TABLE"))
1611        else:
1612            value = self._parse_types()
1613
1614        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1615
1616    def _parse_describe(self) -> exp.Expression:
1617        kind = self._match_set(self.CREATABLES) and self._prev.text
1618        this = self._parse_table()
1619
1620        return self.expression(exp.Describe, this=this, kind=kind)
1621
1622    def _parse_insert(self) -> exp.Expression:
1623        overwrite = self._match(TokenType.OVERWRITE)
1624        local = self._match_text_seq("LOCAL")
1625        alternative = None
1626
1627        if self._match_text_seq("DIRECTORY"):
1628            this: t.Optional[exp.Expression] = self.expression(
1629                exp.Directory,
1630                this=self._parse_var_or_string(),
1631                local=local,
1632                row_format=self._parse_row_format(match_row=True),
1633            )
1634        else:
1635            if self._match(TokenType.OR):
1636                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1637
1638            self._match(TokenType.INTO)
1639            self._match(TokenType.TABLE)
1640            this = self._parse_table(schema=True)
1641
1642        return self.expression(
1643            exp.Insert,
1644            this=this,
1645            exists=self._parse_exists(),
1646            partition=self._parse_partition(),
1647            expression=self._parse_ddl_select(),
1648            conflict=self._parse_on_conflict(),
1649            returning=self._parse_returning(),
1650            overwrite=overwrite,
1651            alternative=alternative,
1652        )
1653
1654    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1655        conflict = self._match_text_seq("ON", "CONFLICT")
1656        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1657
1658        if not (conflict or duplicate):
1659            return None
1660
1661        nothing = None
1662        expressions = None
1663        key = None
1664        constraint = None
1665
1666        if conflict:
1667            if self._match_text_seq("ON", "CONSTRAINT"):
1668                constraint = self._parse_id_var()
1669            else:
1670                key = self._parse_csv(self._parse_value)
1671
1672        self._match_text_seq("DO")
1673        if self._match_text_seq("NOTHING"):
1674            nothing = True
1675        else:
1676            self._match(TokenType.UPDATE)
1677            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1678
1679        return self.expression(
1680            exp.OnConflict,
1681            duplicate=duplicate,
1682            expressions=expressions,
1683            nothing=nothing,
1684            key=key,
1685            constraint=constraint,
1686        )
1687
1688    def _parse_returning(self) -> t.Optional[exp.Expression]:
1689        if not self._match(TokenType.RETURNING):
1690            return None
1691
1692        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1693
1694    def _parse_row(self) -> t.Optional[exp.Expression]:
1695        if not self._match(TokenType.FORMAT):
1696            return None
1697        return self._parse_row_format()
1698
1699    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1700        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1701            return None
1702
1703        if self._match_text_seq("SERDE"):
1704            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1705
1706        self._match_text_seq("DELIMITED")
1707
1708        kwargs = {}
1709
1710        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1711            kwargs["fields"] = self._parse_string()
1712            if self._match_text_seq("ESCAPED", "BY"):
1713                kwargs["escaped"] = self._parse_string()
1714        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1715            kwargs["collection_items"] = self._parse_string()
1716        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1717            kwargs["map_keys"] = self._parse_string()
1718        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1719            kwargs["lines"] = self._parse_string()
1720        if self._match_text_seq("NULL", "DEFINED", "AS"):
1721            kwargs["null"] = self._parse_string()
1722
1723        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1724
1725    def _parse_load(self) -> exp.Expression:
1726        if self._match_text_seq("DATA"):
1727            local = self._match_text_seq("LOCAL")
1728            self._match_text_seq("INPATH")
1729            inpath = self._parse_string()
1730            overwrite = self._match(TokenType.OVERWRITE)
1731            self._match_pair(TokenType.INTO, TokenType.TABLE)
1732
1733            return self.expression(
1734                exp.LoadData,
1735                this=self._parse_table(schema=True),
1736                local=local,
1737                overwrite=overwrite,
1738                inpath=inpath,
1739                partition=self._parse_partition(),
1740                input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1741                serde=self._match_text_seq("SERDE") and self._parse_string(),
1742            )
1743        return self._parse_as_command(self._prev)
1744
1745    def _parse_delete(self) -> exp.Expression:
1746        self._match(TokenType.FROM)
1747
1748        return self.expression(
1749            exp.Delete,
1750            this=self._parse_table(),
1751            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1752            where=self._parse_where(),
1753            returning=self._parse_returning(),
1754        )
1755
1756    def _parse_update(self) -> exp.Expression:
1757        return self.expression(
1758            exp.Update,
1759            **{  # type: ignore
1760                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1761                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1762                "from": self._parse_from(modifiers=True),
1763                "where": self._parse_where(),
1764                "returning": self._parse_returning(),
1765            },
1766        )
1767
1768    def _parse_uncache(self) -> exp.Expression:
1769        if not self._match(TokenType.TABLE):
1770            self.raise_error("Expecting TABLE after UNCACHE")
1771
1772        return self.expression(
1773            exp.Uncache,
1774            exists=self._parse_exists(),
1775            this=self._parse_table(schema=True),
1776        )
1777
1778    def _parse_cache(self) -> exp.Expression:
1779        lazy = self._match_text_seq("LAZY")
1780        self._match(TokenType.TABLE)
1781        table = self._parse_table(schema=True)
1782        options = []
1783
1784        if self._match_text_seq("OPTIONS"):
1785            self._match_l_paren()
1786            k = self._parse_string()
1787            self._match(TokenType.EQ)
1788            v = self._parse_string()
1789            options = [k, v]
1790            self._match_r_paren()
1791
1792        self._match(TokenType.ALIAS)
1793        return self.expression(
1794            exp.Cache,
1795            this=table,
1796            lazy=lazy,
1797            options=options,
1798            expression=self._parse_select(nested=True),
1799        )
1800
1801    def _parse_partition(self) -> t.Optional[exp.Expression]:
1802        if not self._match(TokenType.PARTITION):
1803            return None
1804
1805        return self.expression(
1806            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1807        )
1808
1809    def _parse_value(self) -> exp.Expression:
1810        if self._match(TokenType.L_PAREN):
1811            expressions = self._parse_csv(self._parse_conjunction)
1812            self._match_r_paren()
1813            return self.expression(exp.Tuple, expressions=expressions)
1814
1815        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1816        # Source: https://prestodb.io/docs/current/sql/values.html
1817        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1818
1819    def _parse_select(
1820        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1821    ) -> t.Optional[exp.Expression]:
1822        cte = self._parse_with()
1823        if cte:
1824            this = self._parse_statement()
1825
1826            if not this:
1827                self.raise_error("Failed to parse any statement following CTE")
1828                return cte
1829
1830            if "with" in this.arg_types:
1831                this.set("with", cte)
1832            else:
1833                self.raise_error(f"{this.key} does not support CTE")
1834                this = cte
1835        elif self._match(TokenType.SELECT):
1836            comments = self._prev_comments
1837
1838            hint = self._parse_hint()
1839            all_ = self._match(TokenType.ALL)
1840            distinct = self._match(TokenType.DISTINCT)
1841
1842            kind = (
1843                self._match(TokenType.ALIAS)
1844                and self._match_texts(("STRUCT", "VALUE"))
1845                and self._prev.text
1846            )
1847
1848            if distinct:
1849                distinct = self.expression(
1850                    exp.Distinct,
1851                    on=self._parse_value() if self._match(TokenType.ON) else None,
1852                )
1853
1854            if all_ and distinct:
1855                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1856
1857            limit = self._parse_limit(top=True)
1858            expressions = self._parse_csv(self._parse_expression)
1859
1860            this = self.expression(
1861                exp.Select,
1862                kind=kind,
1863                hint=hint,
1864                distinct=distinct,
1865                expressions=expressions,
1866                limit=limit,
1867            )
1868            this.comments = comments
1869
1870            into = self._parse_into()
1871            if into:
1872                this.set("into", into)
1873
1874            from_ = self._parse_from()
1875            if from_:
1876                this.set("from", from_)
1877
1878            this = self._parse_query_modifiers(this)
1879        elif (table or nested) and self._match(TokenType.L_PAREN):
1880            this = self._parse_table() if table else self._parse_select(nested=True)
1881            this = self._parse_set_operations(self._parse_query_modifiers(this))
1882            self._match_r_paren()
1883
1884            # early return so that subquery unions aren't parsed again
1885            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1886            # Union ALL should be a property of the top select node, not the subquery
1887            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1888        elif self._match(TokenType.VALUES):
1889            this = self.expression(
1890                exp.Values,
1891                expressions=self._parse_csv(self._parse_value),
1892                alias=self._parse_table_alias(),
1893            )
1894        else:
1895            this = None
1896
1897        return self._parse_set_operations(this)
1898
1899    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1900        if not skip_with_token and not self._match(TokenType.WITH):
1901            return None
1902
1903        comments = self._prev_comments
1904        recursive = self._match(TokenType.RECURSIVE)
1905
1906        expressions = []
1907        while True:
1908            expressions.append(self._parse_cte())
1909
1910            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1911                break
1912            else:
1913                self._match(TokenType.WITH)
1914
1915        return self.expression(
1916            exp.With, comments=comments, expressions=expressions, recursive=recursive
1917        )
1918
1919    def _parse_cte(self) -> exp.Expression:
1920        alias = self._parse_table_alias()
1921        if not alias or not alias.this:
1922            self.raise_error("Expected CTE to have alias")
1923
1924        self._match(TokenType.ALIAS)
1925
1926        return self.expression(
1927            exp.CTE,
1928            this=self._parse_wrapped(self._parse_statement),
1929            alias=alias,
1930        )
1931
1932    def _parse_table_alias(
1933        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1934    ) -> t.Optional[exp.Expression]:
1935        any_token = self._match(TokenType.ALIAS)
1936        alias = (
1937            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1938            or self._parse_string_as_identifier()
1939        )
1940
1941        index = self._index
1942        if self._match(TokenType.L_PAREN):
1943            columns = self._parse_csv(self._parse_function_parameter)
1944            self._match_r_paren() if columns else self._retreat(index)
1945        else:
1946            columns = None
1947
1948        if not alias and not columns:
1949            return None
1950
1951        return self.expression(exp.TableAlias, this=alias, columns=columns)
1952
1953    def _parse_subquery(
1954        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1955    ) -> exp.Expression:
1956        return self.expression(
1957            exp.Subquery,
1958            this=this,
1959            pivots=self._parse_pivots(),
1960            alias=self._parse_table_alias() if parse_alias else None,
1961        )
1962
1963    def _parse_query_modifiers(
1964        self, this: t.Optional[exp.Expression]
1965    ) -> t.Optional[exp.Expression]:
1966        if isinstance(this, self.MODIFIABLES):
1967            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1968                expression = parser(self)
1969
1970                if expression:
1971                    this.set(key, expression)
1972        return this
1973
1974    def _parse_hint(self) -> t.Optional[exp.Expression]:
1975        if self._match(TokenType.HINT):
1976            hints = self._parse_csv(self._parse_function)
1977            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1978                self.raise_error("Expected */ after HINT")
1979            return self.expression(exp.Hint, expressions=hints)
1980
1981        return None
1982
1983    def _parse_into(self) -> t.Optional[exp.Expression]:
1984        if not self._match(TokenType.INTO):
1985            return None
1986
1987        temp = self._match(TokenType.TEMPORARY)
1988        unlogged = self._match_text_seq("UNLOGGED")
1989        self._match(TokenType.TABLE)
1990
1991        return self.expression(
1992            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1993        )
1994
1995    def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]:
1996        if not self._match(TokenType.FROM):
1997            return None
1998
1999        comments = self._prev_comments
2000        this = self._parse_table()
2001
2002        return self.expression(
2003            exp.From,
2004            comments=comments,
2005            this=self._parse_query_modifiers(this) if modifiers else this,
2006        )
2007
2008    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2009        if not self._match(TokenType.MATCH_RECOGNIZE):
2010            return None
2011
2012        self._match_l_paren()
2013
2014        partition = self._parse_partition_by()
2015        order = self._parse_order()
2016        measures = (
2017            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2018        )
2019
2020        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2021            rows = exp.Var(this="ONE ROW PER MATCH")
2022        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2023            text = "ALL ROWS PER MATCH"
2024            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2025                text += f" SHOW EMPTY MATCHES"
2026            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2027                text += f" OMIT EMPTY MATCHES"
2028            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2029                text += f" WITH UNMATCHED ROWS"
2030            rows = exp.Var(this=text)
2031        else:
2032            rows = None
2033
2034        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2035            text = "AFTER MATCH SKIP"
2036            if self._match_text_seq("PAST", "LAST", "ROW"):
2037                text += f" PAST LAST ROW"
2038            elif self._match_text_seq("TO", "NEXT", "ROW"):
2039                text += f" TO NEXT ROW"
2040            elif self._match_text_seq("TO", "FIRST"):
2041                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2042            elif self._match_text_seq("TO", "LAST"):
2043                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2044            after = exp.Var(this=text)
2045        else:
2046            after = None
2047
2048        if self._match_text_seq("PATTERN"):
2049            self._match_l_paren()
2050
2051            if not self._curr:
2052                self.raise_error("Expecting )", self._curr)
2053
2054            paren = 1
2055            start = self._curr
2056
2057            while self._curr and paren > 0:
2058                if self._curr.token_type == TokenType.L_PAREN:
2059                    paren += 1
2060                if self._curr.token_type == TokenType.R_PAREN:
2061                    paren -= 1
2062                end = self._prev
2063                self._advance()
2064            if paren > 0:
2065                self.raise_error("Expecting )", self._curr)
2066            pattern = exp.Var(this=self._find_sql(start, end))
2067        else:
2068            pattern = None
2069
2070        define = (
2071            self._parse_csv(
2072                lambda: self.expression(
2073                    exp.Alias,
2074                    alias=self._parse_id_var(any_token=True),
2075                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2076                )
2077            )
2078            if self._match_text_seq("DEFINE")
2079            else None
2080        )
2081
2082        self._match_r_paren()
2083
2084        return self.expression(
2085            exp.MatchRecognize,
2086            partition_by=partition,
2087            order=order,
2088            measures=measures,
2089            rows=rows,
2090            after=after,
2091            pattern=pattern,
2092            define=define,
2093            alias=self._parse_table_alias(),
2094        )
2095
2096    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2097        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2098        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2099
2100        if outer_apply or cross_apply:
2101            this = self._parse_select(table=True)
2102            view = None
2103            outer = not cross_apply
2104        elif self._match(TokenType.LATERAL):
2105            this = self._parse_select(table=True)
2106            view = self._match(TokenType.VIEW)
2107            outer = self._match(TokenType.OUTER)
2108        else:
2109            return None
2110
2111        if not this:
2112            this = self._parse_function() or self._parse_id_var(any_token=False)
2113            while self._match(TokenType.DOT):
2114                this = exp.Dot(
2115                    this=this,
2116                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2117                )
2118
2119        table_alias: t.Optional[exp.Expression]
2120
2121        if view:
2122            table = self._parse_id_var(any_token=False)
2123            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2124            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2125        else:
2126            table_alias = self._parse_table_alias()
2127
2128        expression = self.expression(
2129            exp.Lateral,
2130            this=this,
2131            view=view,
2132            outer=outer,
2133            alias=table_alias,
2134        )
2135
2136        return expression
2137
2138    def _parse_join_side_and_kind(
2139        self,
2140    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2141        return (
2142            self._match(TokenType.NATURAL) and self._prev,
2143            self._match_set(self.JOIN_SIDES) and self._prev,
2144            self._match_set(self.JOIN_KINDS) and self._prev,
2145        )
2146
2147    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2148        if self._match(TokenType.COMMA):
2149            return self.expression(exp.Join, this=self._parse_table())
2150
2151        index = self._index
2152        natural, side, kind = self._parse_join_side_and_kind()
2153        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2154        join = self._match(TokenType.JOIN)
2155
2156        if not skip_join_token and not join:
2157            self._retreat(index)
2158            kind = None
2159            natural = None
2160            side = None
2161
2162        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2163        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2164
2165        if not skip_join_token and not join and not outer_apply and not cross_apply:
2166            return None
2167
2168        if outer_apply:
2169            side = Token(TokenType.LEFT, "LEFT")
2170
2171        kwargs: t.Dict[
2172            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2173        ] = {"this": self._parse_table()}
2174
2175        if natural:
2176            kwargs["natural"] = True
2177        if side:
2178            kwargs["side"] = side.text
2179        if kind:
2180            kwargs["kind"] = kind.text
2181        if hint:
2182            kwargs["hint"] = hint
2183
2184        if self._match(TokenType.ON):
2185            kwargs["on"] = self._parse_conjunction()
2186        elif self._match(TokenType.USING):
2187            kwargs["using"] = self._parse_wrapped_id_vars()
2188
2189        return self.expression(exp.Join, **kwargs)  # type: ignore
2190
2191    def _parse_index(self) -> exp.Expression:
2192        index = self._parse_id_var()
2193        self._match(TokenType.ON)
2194        self._match(TokenType.TABLE)  # hive
2195
2196        return self.expression(
2197            exp.Index,
2198            this=index,
2199            table=self.expression(exp.Table, this=self._parse_id_var()),
2200            columns=self._parse_expression(),
2201        )
2202
2203    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2204        unique = self._match(TokenType.UNIQUE)
2205        primary = self._match_text_seq("PRIMARY")
2206        amp = self._match_text_seq("AMP")
2207        if not self._match(TokenType.INDEX):
2208            return None
2209        index = self._parse_id_var()
2210        columns = None
2211        if self._match(TokenType.L_PAREN, advance=False):
2212            columns = self._parse_wrapped_csv(self._parse_column)
2213        return self.expression(
2214            exp.Index,
2215            this=index,
2216            columns=columns,
2217            unique=unique,
2218            primary=primary,
2219            amp=amp,
2220        )
2221
2222    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2223        return (
2224            (not schema and self._parse_function())
2225            or self._parse_id_var(any_token=False)
2226            or self._parse_string_as_identifier()
2227            or self._parse_placeholder()
2228        )
2229
2230    def _parse_table_parts(self, schema: bool = False) -> exp.Table:
2231        catalog = None
2232        db = None
2233        table = self._parse_table_part(schema=schema)
2234
2235        while self._match(TokenType.DOT):
2236            if catalog:
2237                # This allows nesting the table in arbitrarily many dot expressions if needed
2238                table = self.expression(
2239                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2240                )
2241            else:
2242                catalog = db
2243                db = table
2244                table = self._parse_table_part(schema=schema)
2245
2246        if not table:
2247            self.raise_error(f"Expected table name but got {self._curr}")
2248
2249        return self.expression(
2250            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2251        )
2252
2253    def _parse_table(
2254        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2255    ) -> t.Optional[exp.Expression]:
2256        lateral = self._parse_lateral()
2257        if lateral:
2258            return lateral
2259
2260        unnest = self._parse_unnest()
2261        if unnest:
2262            return unnest
2263
2264        values = self._parse_derived_table_values()
2265        if values:
2266            return values
2267
2268        subquery = self._parse_select(table=True)
2269        if subquery:
2270            if not subquery.args.get("pivots"):
2271                subquery.set("pivots", self._parse_pivots())
2272            return subquery
2273
2274        this: exp.Expression = self._parse_table_parts(schema=schema)
2275
2276        if schema:
2277            return self._parse_schema(this=this)
2278
2279        if self.alias_post_tablesample:
2280            table_sample = self._parse_table_sample()
2281
2282        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2283        if alias:
2284            this.set("alias", alias)
2285
2286        if not this.args.get("pivots"):
2287            this.set("pivots", self._parse_pivots())
2288
2289        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2290            this.set(
2291                "hints",
2292                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2293            )
2294            self._match_r_paren()
2295
2296        if not self.alias_post_tablesample:
2297            table_sample = self._parse_table_sample()
2298
2299        if table_sample:
2300            table_sample.set("this", this)
2301            this = table_sample
2302
2303        return this
2304
2305    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2306        if not self._match(TokenType.UNNEST):
2307            return None
2308
2309        expressions = self._parse_wrapped_csv(self._parse_type)
2310        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2311        alias = self._parse_table_alias()
2312
2313        if alias and self.unnest_column_only:
2314            if alias.args.get("columns"):
2315                self.raise_error("Unexpected extra column alias in unnest.")
2316            alias.set("columns", [alias.this])
2317            alias.set("this", None)
2318
2319        offset = None
2320        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2321            self._match(TokenType.ALIAS)
2322            offset = self._parse_id_var() or exp.Identifier(this="offset")
2323
2324        return self.expression(
2325            exp.Unnest,
2326            expressions=expressions,
2327            ordinality=ordinality,
2328            alias=alias,
2329            offset=offset,
2330        )
2331
2332    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2333        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2334        if not is_derived and not self._match(TokenType.VALUES):
2335            return None
2336
2337        expressions = self._parse_csv(self._parse_value)
2338
2339        if is_derived:
2340            self._match_r_paren()
2341
2342        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2343
2344    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2345        if not self._match(TokenType.TABLE_SAMPLE) and not (
2346            as_modifier and self._match_text_seq("USING", "SAMPLE")
2347        ):
2348            return None
2349
2350        bucket_numerator = None
2351        bucket_denominator = None
2352        bucket_field = None
2353        percent = None
2354        rows = None
2355        size = None
2356        seed = None
2357
2358        kind = (
2359            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2360        )
2361        method = self._parse_var(tokens=(TokenType.ROW,))
2362
2363        self._match(TokenType.L_PAREN)
2364
2365        num = self._parse_number()
2366
2367        if self._match_text_seq("BUCKET"):
2368            bucket_numerator = self._parse_number()
2369            self._match_text_seq("OUT", "OF")
2370            bucket_denominator = bucket_denominator = self._parse_number()
2371            self._match(TokenType.ON)
2372            bucket_field = self._parse_field()
2373        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2374            percent = num
2375        elif self._match(TokenType.ROWS):
2376            rows = num
2377        else:
2378            size = num
2379
2380        self._match(TokenType.R_PAREN)
2381
2382        if self._match(TokenType.L_PAREN):
2383            method = self._parse_var()
2384            seed = self._match(TokenType.COMMA) and self._parse_number()
2385            self._match_r_paren()
2386        elif self._match_texts(("SEED", "REPEATABLE")):
2387            seed = self._parse_wrapped(self._parse_number)
2388
2389        return self.expression(
2390            exp.TableSample,
2391            method=method,
2392            bucket_numerator=bucket_numerator,
2393            bucket_denominator=bucket_denominator,
2394            bucket_field=bucket_field,
2395            percent=percent,
2396            rows=rows,
2397            size=size,
2398            seed=seed,
2399            kind=kind,
2400        )
2401
2402    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2403        return list(iter(self._parse_pivot, None))
2404
2405    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2406        index = self._index
2407
2408        if self._match(TokenType.PIVOT):
2409            unpivot = False
2410        elif self._match(TokenType.UNPIVOT):
2411            unpivot = True
2412        else:
2413            return None
2414
2415        expressions = []
2416        field = None
2417
2418        if not self._match(TokenType.L_PAREN):
2419            self._retreat(index)
2420            return None
2421
2422        if unpivot:
2423            expressions = self._parse_csv(self._parse_column)
2424        else:
2425            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2426
2427        if not expressions:
2428            self.raise_error("Failed to parse PIVOT's aggregation list")
2429
2430        if not self._match(TokenType.FOR):
2431            self.raise_error("Expecting FOR")
2432
2433        value = self._parse_column()
2434
2435        if not self._match(TokenType.IN):
2436            self.raise_error("Expecting IN")
2437
2438        field = self._parse_in(value, alias=True)
2439
2440        self._match_r_paren()
2441
2442        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2443
2444        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2445            pivot.set("alias", self._parse_table_alias())
2446
2447        if not unpivot:
2448            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2449
2450            columns: t.List[exp.Expression] = []
2451            for fld in pivot.args["field"].expressions:
2452                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2453                for name in names:
2454                    if self.PREFIXED_PIVOT_COLUMNS:
2455                        name = f"{name}_{field_name}" if name else field_name
2456                    else:
2457                        name = f"{field_name}_{name}" if name else field_name
2458
2459                    columns.append(exp.to_identifier(name))
2460
2461            pivot.set("columns", columns)
2462
2463        return pivot
2464
2465    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2466        return [agg.alias for agg in aggregations]
2467
2468    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2469        if not skip_where_token and not self._match(TokenType.WHERE):
2470            return None
2471
2472        return self.expression(
2473            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2474        )
2475
2476    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2477        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2478            return None
2479
2480        elements = defaultdict(list)
2481
2482        while True:
2483            expressions = self._parse_csv(self._parse_conjunction)
2484            if expressions:
2485                elements["expressions"].extend(expressions)
2486
2487            grouping_sets = self._parse_grouping_sets()
2488            if grouping_sets:
2489                elements["grouping_sets"].extend(grouping_sets)
2490
2491            rollup = None
2492            cube = None
2493            totals = None
2494
2495            with_ = self._match(TokenType.WITH)
2496            if self._match(TokenType.ROLLUP):
2497                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2498                elements["rollup"].extend(ensure_list(rollup))
2499
2500            if self._match(TokenType.CUBE):
2501                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2502                elements["cube"].extend(ensure_list(cube))
2503
2504            if self._match_text_seq("TOTALS"):
2505                totals = True
2506                elements["totals"] = True  # type: ignore
2507
2508            if not (grouping_sets or rollup or cube or totals):
2509                break
2510
2511        return self.expression(exp.Group, **elements)  # type: ignore
2512
2513    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2514        if not self._match(TokenType.GROUPING_SETS):
2515            return None
2516
2517        return self._parse_wrapped_csv(self._parse_grouping_set)
2518
2519    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2520        if self._match(TokenType.L_PAREN):
2521            grouping_set = self._parse_csv(self._parse_column)
2522            self._match_r_paren()
2523            return self.expression(exp.Tuple, expressions=grouping_set)
2524
2525        return self._parse_column()
2526
2527    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2528        if not skip_having_token and not self._match(TokenType.HAVING):
2529            return None
2530        return self.expression(exp.Having, this=self._parse_conjunction())
2531
2532    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2533        if not self._match(TokenType.QUALIFY):
2534            return None
2535        return self.expression(exp.Qualify, this=self._parse_conjunction())
2536
2537    def _parse_order(
2538        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2539    ) -> t.Optional[exp.Expression]:
2540        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2541            return this
2542
2543        return self.expression(
2544            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2545        )
2546
2547    def _parse_sort(
2548        self, exp_class: t.Type[exp.Expression], *texts: str
2549    ) -> t.Optional[exp.Expression]:
2550        if not self._match_text_seq(*texts):
2551            return None
2552        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2553
2554    def _parse_ordered(self) -> exp.Expression:
2555        this = self._parse_conjunction()
2556        self._match(TokenType.ASC)
2557        is_desc = self._match(TokenType.DESC)
2558        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
2559        is_nulls_last = self._match_text_seq("NULLS", "LAST")
2560        desc = is_desc or False
2561        asc = not desc
2562        nulls_first = is_nulls_first or False
2563        explicitly_null_ordered = is_nulls_first or is_nulls_last
2564        if (
2565            not explicitly_null_ordered
2566            and (
2567                (asc and self.null_ordering == "nulls_are_small")
2568                or (desc and self.null_ordering != "nulls_are_small")
2569            )
2570            and self.null_ordering != "nulls_are_last"
2571        ):
2572            nulls_first = True
2573
2574        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2575
2576    def _parse_limit(
2577        self, this: t.Optional[exp.Expression] = None, top: bool = False
2578    ) -> t.Optional[exp.Expression]:
2579        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2580            limit_paren = self._match(TokenType.L_PAREN)
2581            limit_exp = self.expression(
2582                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2583            )
2584
2585            if limit_paren:
2586                self._match_r_paren()
2587
2588            return limit_exp
2589
2590        if self._match(TokenType.FETCH):
2591            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2592            direction = self._prev.text if direction else "FIRST"
2593
2594            count = self._parse_number()
2595            percent = self._match(TokenType.PERCENT)
2596
2597            self._match_set((TokenType.ROW, TokenType.ROWS))
2598
2599            only = self._match_text_seq("ONLY")
2600            with_ties = self._match_text_seq("WITH", "TIES")
2601
2602            if only and with_ties:
2603                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2604
2605            return self.expression(
2606                exp.Fetch,
2607                direction=direction,
2608                count=count,
2609                percent=percent,
2610                with_ties=with_ties,
2611            )
2612
2613        return this
2614
2615    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2616        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2617            return this
2618
2619        count = self._parse_number()
2620        self._match_set((TokenType.ROW, TokenType.ROWS))
2621        return self.expression(exp.Offset, this=this, expression=count)
2622
2623    def _parse_locks(self) -> t.List[exp.Expression]:
2624        # Lists are invariant, so we need to use a type hint here
2625        locks: t.List[exp.Expression] = []
2626
2627        while True:
2628            if self._match_text_seq("FOR", "UPDATE"):
2629                update = True
2630            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2631                "LOCK", "IN", "SHARE", "MODE"
2632            ):
2633                update = False
2634            else:
2635                break
2636
2637            expressions = None
2638            if self._match_text_seq("OF"):
2639                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2640
2641            wait: t.Optional[bool | exp.Expression] = None
2642            if self._match_text_seq("NOWAIT"):
2643                wait = True
2644            elif self._match_text_seq("WAIT"):
2645                wait = self._parse_primary()
2646            elif self._match_text_seq("SKIP", "LOCKED"):
2647                wait = False
2648
2649            locks.append(
2650                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2651            )
2652
2653        return locks
2654
2655    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2656        if not self._match_set(self.SET_OPERATIONS):
2657            return this
2658
2659        token_type = self._prev.token_type
2660
2661        if token_type == TokenType.UNION:
2662            expression = exp.Union
2663        elif token_type == TokenType.EXCEPT:
2664            expression = exp.Except
2665        else:
2666            expression = exp.Intersect
2667
2668        return self.expression(
2669            expression,
2670            this=this,
2671            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2672            expression=self._parse_set_operations(self._parse_select(nested=True)),
2673        )
2674
2675    def _parse_expression(self) -> t.Optional[exp.Expression]:
2676        return self._parse_alias(self._parse_conjunction())
2677
2678    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2679        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2680
2681    def _parse_equality(self) -> t.Optional[exp.Expression]:
2682        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2683
2684    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2685        return self._parse_tokens(self._parse_range, self.COMPARISON)
2686
2687    def _parse_range(self) -> t.Optional[exp.Expression]:
2688        this = self._parse_bitwise()
2689        negate = self._match(TokenType.NOT)
2690
2691        if self._match_set(self.RANGE_PARSERS):
2692            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2693            if not expression:
2694                return this
2695
2696            this = expression
2697        elif self._match(TokenType.ISNULL):
2698            this = self.expression(exp.Is, this=this, expression=exp.Null())
2699
2700        # Postgres supports ISNULL and NOTNULL for conditions.
2701        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2702        if self._match(TokenType.NOTNULL):
2703            this = self.expression(exp.Is, this=this, expression=exp.Null())
2704            this = self.expression(exp.Not, this=this)
2705
2706        if negate:
2707            this = self.expression(exp.Not, this=this)
2708
2709        if self._match(TokenType.IS):
2710            this = self._parse_is(this)
2711
2712        return this
2713
2714    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2715        index = self._index - 1
2716        negate = self._match(TokenType.NOT)
2717        if self._match_text_seq("DISTINCT", "FROM"):
2718            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2719            return self.expression(klass, this=this, expression=self._parse_expression())
2720
2721        expression = self._parse_null() or self._parse_boolean()
2722        if not expression:
2723            self._retreat(index)
2724            return None
2725
2726        this = self.expression(exp.Is, this=this, expression=expression)
2727        return self.expression(exp.Not, this=this) if negate else this
2728
2729    def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.Expression:
2730        unnest = self._parse_unnest()
2731        if unnest:
2732            this = self.expression(exp.In, this=this, unnest=unnest)
2733        elif self._match(TokenType.L_PAREN):
2734            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
2735
2736            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2737                this = self.expression(exp.In, this=this, query=expressions[0])
2738            else:
2739                this = self.expression(exp.In, this=this, expressions=expressions)
2740
2741            self._match_r_paren(this)
2742        else:
2743            this = self.expression(exp.In, this=this, field=self._parse_field())
2744
2745        return this
2746
2747    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2748        low = self._parse_bitwise()
2749        self._match(TokenType.AND)
2750        high = self._parse_bitwise()
2751        return self.expression(exp.Between, this=this, low=low, high=high)
2752
2753    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2754        if not self._match(TokenType.ESCAPE):
2755            return this
2756        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2757
2758    def _parse_interval(self) -> t.Optional[exp.Expression]:
2759        if not self._match(TokenType.INTERVAL):
2760            return None
2761
2762        this = self._parse_primary() or self._parse_term()
2763        unit = self._parse_function() or self._parse_var()
2764
2765        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2766        # each INTERVAL expression into this canonical form so it's easy to transpile
2767        if this and isinstance(this, exp.Literal):
2768            if this.is_number:
2769                this = exp.Literal.string(this.name)
2770
2771            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2772            parts = this.name.split()
2773            if not unit and len(parts) <= 2:
2774                this = exp.Literal.string(seq_get(parts, 0))
2775                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2776
2777        return self.expression(exp.Interval, this=this, unit=unit)
2778
2779    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2780        this = self._parse_term()
2781
2782        while True:
2783            if self._match_set(self.BITWISE):
2784                this = self.expression(
2785                    self.BITWISE[self._prev.token_type],
2786                    this=this,
2787                    expression=self._parse_term(),
2788                )
2789            elif self._match_pair(TokenType.LT, TokenType.LT):
2790                this = self.expression(
2791                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2792                )
2793            elif self._match_pair(TokenType.GT, TokenType.GT):
2794                this = self.expression(
2795                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2796                )
2797            else:
2798                break
2799
2800        return this
2801
2802    def _parse_term(self) -> t.Optional[exp.Expression]:
2803        return self._parse_tokens(self._parse_factor, self.TERM)
2804
2805    def _parse_factor(self) -> t.Optional[exp.Expression]:
2806        return self._parse_tokens(self._parse_unary, self.FACTOR)
2807
2808    def _parse_unary(self) -> t.Optional[exp.Expression]:
2809        if self._match_set(self.UNARY_PARSERS):
2810            return self.UNARY_PARSERS[self._prev.token_type](self)
2811        return self._parse_at_time_zone(self._parse_type())
2812
2813    def _parse_type(self) -> t.Optional[exp.Expression]:
2814        interval = self._parse_interval()
2815        if interval:
2816            return interval
2817
2818        index = self._index
2819        data_type = self._parse_types(check_func=True)
2820        this = self._parse_column()
2821
2822        if data_type:
2823            if isinstance(this, exp.Literal):
2824                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2825                if parser:
2826                    return parser(self, this, data_type)
2827                return self.expression(exp.Cast, this=this, to=data_type)
2828            if not data_type.expressions:
2829                self._retreat(index)
2830                return self._parse_column()
2831            return data_type
2832
2833        return this
2834
2835    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2836        this = self._parse_type()
2837        if not this:
2838            return None
2839
2840        return self.expression(
2841            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2842        )
2843
2844    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2845        index = self._index
2846
2847        prefix = self._match_text_seq("SYSUDTLIB", ".")
2848
2849        if not self._match_set(self.TYPE_TOKENS):
2850            return None
2851
2852        type_token = self._prev.token_type
2853
2854        if type_token == TokenType.PSEUDO_TYPE:
2855            return self.expression(exp.PseudoType, this=self._prev.text)
2856
2857        nested = type_token in self.NESTED_TYPE_TOKENS
2858        is_struct = type_token == TokenType.STRUCT
2859        expressions = None
2860        maybe_func = False
2861
2862        if self._match(TokenType.L_PAREN):
2863            if is_struct:
2864                expressions = self._parse_csv(self._parse_struct_types)
2865            elif nested:
2866                expressions = self._parse_csv(self._parse_types)
2867            else:
2868                expressions = self._parse_csv(self._parse_type_size)
2869
2870            if not expressions or not self._match(TokenType.R_PAREN):
2871                self._retreat(index)
2872                return None
2873
2874            maybe_func = True
2875
2876        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2877            this = exp.DataType(
2878                this=exp.DataType.Type.ARRAY,
2879                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2880                nested=True,
2881            )
2882
2883            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2884                this = exp.DataType(
2885                    this=exp.DataType.Type.ARRAY,
2886                    expressions=[this],
2887                    nested=True,
2888                )
2889
2890            return this
2891
2892        if self._match(TokenType.L_BRACKET):
2893            self._retreat(index)
2894            return None
2895
2896        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2897        if nested and self._match(TokenType.LT):
2898            if is_struct:
2899                expressions = self._parse_csv(self._parse_struct_types)
2900            else:
2901                expressions = self._parse_csv(self._parse_types)
2902
2903            if not self._match(TokenType.GT):
2904                self.raise_error("Expecting >")
2905
2906            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2907                values = self._parse_csv(self._parse_conjunction)
2908                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2909
2910        value: t.Optional[exp.Expression] = None
2911        if type_token in self.TIMESTAMPS:
2912            if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ:
2913                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2914            elif (
2915                self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE")
2916                or type_token == TokenType.TIMESTAMPLTZ
2917            ):
2918                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2919            elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
2920                if type_token == TokenType.TIME:
2921                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2922                else:
2923                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2924
2925            maybe_func = maybe_func and value is None
2926
2927            if value is None:
2928                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2929        elif type_token == TokenType.INTERVAL:
2930            unit = self._parse_var()
2931
2932            if not unit:
2933                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2934            else:
2935                value = self.expression(exp.Interval, unit=unit)
2936
2937        if maybe_func and check_func:
2938            index2 = self._index
2939            peek = self._parse_string()
2940
2941            if not peek:
2942                self._retreat(index)
2943                return None
2944
2945            self._retreat(index2)
2946
2947        if value:
2948            return value
2949
2950        return exp.DataType(
2951            this=exp.DataType.Type[type_token.value.upper()],
2952            expressions=expressions,
2953            nested=nested,
2954            values=values,
2955            prefix=prefix,
2956        )
2957
2958    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
2959        this = self._parse_type() or self._parse_id_var()
2960        self._match(TokenType.COLON)
2961        return self._parse_column_def(this)
2962
2963    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2964        if not self._match_text_seq("AT", "TIME", "ZONE"):
2965            return this
2966        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2967
2968    def _parse_column(self) -> t.Optional[exp.Expression]:
2969        this = self._parse_field()
2970        if isinstance(this, exp.Identifier):
2971            this = self.expression(exp.Column, this=this)
2972        elif not this:
2973            return self._parse_bracket(this)
2974        this = self._parse_bracket(this)
2975
2976        while self._match_set(self.COLUMN_OPERATORS):
2977            op_token = self._prev.token_type
2978            op = self.COLUMN_OPERATORS.get(op_token)
2979
2980            if op_token == TokenType.DCOLON:
2981                field = self._parse_types()
2982                if not field:
2983                    self.raise_error("Expected type")
2984            elif op and self._curr:
2985                self._advance()
2986                value = self._prev.text
2987                field = (
2988                    exp.Literal.number(value)
2989                    if self._prev.token_type == TokenType.NUMBER
2990                    else exp.Literal.string(value)
2991                )
2992            else:
2993                field = (
2994                    self._parse_star()
2995                    or self._parse_function(anonymous=True)
2996                    or self._parse_id_var()
2997                )
2998
2999            if isinstance(field, exp.Func):
3000                # bigquery allows function calls like x.y.count(...)
3001                # SAFE.SUBSTR(...)
3002                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3003                this = self._replace_columns_with_dots(this)
3004
3005            if op:
3006                this = op(self, this, field)
3007            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3008                this = self.expression(
3009                    exp.Column,
3010                    this=field,
3011                    table=this.this,
3012                    db=this.args.get("table"),
3013                    catalog=this.args.get("db"),
3014                )
3015            else:
3016                this = self.expression(exp.Dot, this=this, expression=field)
3017            this = self._parse_bracket(this)
3018
3019        return this
3020
3021    def _parse_primary(self) -> t.Optional[exp.Expression]:
3022        if self._match_set(self.PRIMARY_PARSERS):
3023            token_type = self._prev.token_type
3024            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3025
3026            if token_type == TokenType.STRING:
3027                expressions = [primary]
3028                while self._match(TokenType.STRING):
3029                    expressions.append(exp.Literal.string(self._prev.text))
3030                if len(expressions) > 1:
3031                    return self.expression(exp.Concat, expressions=expressions)
3032            return primary
3033
3034        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3035            return exp.Literal.number(f"0.{self._prev.text}")
3036
3037        if self._match(TokenType.L_PAREN):
3038            comments = self._prev_comments
3039            query = self._parse_select()
3040
3041            if query:
3042                expressions = [query]
3043            else:
3044                expressions = self._parse_csv(self._parse_expression)
3045
3046            this = self._parse_query_modifiers(seq_get(expressions, 0))
3047
3048            if isinstance(this, exp.Subqueryable):
3049                this = self._parse_set_operations(
3050                    self._parse_subquery(this=this, parse_alias=False)
3051                )
3052            elif len(expressions) > 1:
3053                this = self.expression(exp.Tuple, expressions=expressions)
3054            else:
3055                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3056
3057            if this:
3058                this.add_comments(comments)
3059            self._match_r_paren(expression=this)
3060
3061            return this
3062
3063        return None
3064
3065    def _parse_field(
3066        self,
3067        any_token: bool = False,
3068        tokens: t.Optional[t.Collection[TokenType]] = None,
3069    ) -> t.Optional[exp.Expression]:
3070        return (
3071            self._parse_primary()
3072            or self._parse_function()
3073            or self._parse_id_var(any_token=any_token, tokens=tokens)
3074        )
3075
3076    def _parse_function(
3077        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3078    ) -> t.Optional[exp.Expression]:
3079        if not self._curr:
3080            return None
3081
3082        token_type = self._curr.token_type
3083
3084        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3085            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3086
3087        if not self._next or self._next.token_type != TokenType.L_PAREN:
3088            if token_type in self.NO_PAREN_FUNCTIONS:
3089                self._advance()
3090                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3091
3092            return None
3093
3094        if token_type not in self.FUNC_TOKENS:
3095            return None
3096
3097        this = self._curr.text
3098        upper = this.upper()
3099        self._advance(2)
3100
3101        parser = self.FUNCTION_PARSERS.get(upper)
3102
3103        if parser and not anonymous:
3104            this = parser(self)
3105        else:
3106            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3107
3108            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3109                this = self.expression(subquery_predicate, this=self._parse_select())
3110                self._match_r_paren()
3111                return this
3112
3113            if functions is None:
3114                functions = self.FUNCTIONS
3115
3116            function = functions.get(upper)
3117            args = self._parse_csv(self._parse_lambda)
3118
3119            if function and not anonymous:
3120                this = function(args)
3121                self.validate_expression(this, args)
3122            else:
3123                this = self.expression(exp.Anonymous, this=this, expressions=args)
3124
3125        self._match_r_paren(this)
3126        return self._parse_window(this)
3127
3128    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3129        return self._parse_column_def(self._parse_id_var())
3130
3131    def _parse_user_defined_function(
3132        self, kind: t.Optional[TokenType] = None
3133    ) -> t.Optional[exp.Expression]:
3134        this = self._parse_id_var()
3135
3136        while self._match(TokenType.DOT):
3137            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3138
3139        if not self._match(TokenType.L_PAREN):
3140            return this
3141
3142        expressions = self._parse_csv(self._parse_function_parameter)
3143        self._match_r_paren()
3144        return self.expression(
3145            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3146        )
3147
3148    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3149        literal = self._parse_primary()
3150        if literal:
3151            return self.expression(exp.Introducer, this=token.text, expression=literal)
3152
3153        return self.expression(exp.Identifier, this=token.text)
3154
3155    def _parse_national(self, token: Token) -> exp.Expression:
3156        return self.expression(exp.National, this=exp.Literal.string(token.text))
3157
3158    def _parse_session_parameter(self) -> exp.Expression:
3159        kind = None
3160        this = self._parse_id_var() or self._parse_primary()
3161
3162        if this and self._match(TokenType.DOT):
3163            kind = this.name
3164            this = self._parse_var() or self._parse_primary()
3165
3166        return self.expression(exp.SessionParameter, this=this, kind=kind)
3167
3168    def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]:
3169        index = self._index
3170
3171        if self._match(TokenType.L_PAREN):
3172            expressions = self._parse_csv(self._parse_id_var)
3173
3174            if not self._match(TokenType.R_PAREN):
3175                self._retreat(index)
3176        else:
3177            expressions = [self._parse_id_var()]
3178
3179        if self._match_set(self.LAMBDAS):
3180            return self.LAMBDAS[self._prev.token_type](self, expressions)
3181
3182        self._retreat(index)
3183
3184        this: t.Optional[exp.Expression]
3185
3186        if self._match(TokenType.DISTINCT):
3187            this = self.expression(
3188                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3189            )
3190        else:
3191            this = self._parse_select_or_expression(alias=alias)
3192
3193            if isinstance(this, exp.EQ):
3194                left = this.this
3195                if isinstance(left, exp.Column):
3196                    left.replace(exp.Var(this=left.text("this")))
3197
3198        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3199
3200    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3201        index = self._index
3202
3203        try:
3204            if self._parse_select(nested=True):
3205                return this
3206        except Exception:
3207            pass
3208        finally:
3209            self._retreat(index)
3210
3211        if not self._match(TokenType.L_PAREN):
3212            return this
3213
3214        args = self._parse_csv(
3215            lambda: self._parse_constraint()
3216            or self._parse_column_def(self._parse_field(any_token=True))
3217        )
3218        self._match_r_paren()
3219        return self.expression(exp.Schema, this=this, expressions=args)
3220
3221    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3222        # column defs are not really columns, they're identifiers
3223        if isinstance(this, exp.Column):
3224            this = this.this
3225        kind = self._parse_types()
3226
3227        if self._match_text_seq("FOR", "ORDINALITY"):
3228            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3229
3230        constraints = []
3231        while True:
3232            constraint = self._parse_column_constraint()
3233            if not constraint:
3234                break
3235            constraints.append(constraint)
3236
3237        if not kind and not constraints:
3238            return this
3239
3240        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3241
3242    def _parse_auto_increment(self) -> exp.Expression:
3243        start = None
3244        increment = None
3245
3246        if self._match(TokenType.L_PAREN, advance=False):
3247            args = self._parse_wrapped_csv(self._parse_bitwise)
3248            start = seq_get(args, 0)
3249            increment = seq_get(args, 1)
3250        elif self._match_text_seq("START"):
3251            start = self._parse_bitwise()
3252            self._match_text_seq("INCREMENT")
3253            increment = self._parse_bitwise()
3254
3255        if start and increment:
3256            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3257
3258        return exp.AutoIncrementColumnConstraint()
3259
3260    def _parse_compress(self) -> exp.Expression:
3261        if self._match(TokenType.L_PAREN, advance=False):
3262            return self.expression(
3263                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3264            )
3265
3266        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3267
3268    def _parse_generated_as_identity(self) -> exp.Expression:
3269        if self._match_text_seq("BY", "DEFAULT"):
3270            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3271            this = self.expression(
3272                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3273            )
3274        else:
3275            self._match_text_seq("ALWAYS")
3276            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3277
3278        self._match(TokenType.ALIAS)
3279        identity = self._match_text_seq("IDENTITY")
3280
3281        if self._match(TokenType.L_PAREN):
3282            if self._match_text_seq("START", "WITH"):
3283                this.set("start", self._parse_bitwise())
3284            if self._match_text_seq("INCREMENT", "BY"):
3285                this.set("increment", self._parse_bitwise())
3286            if self._match_text_seq("MINVALUE"):
3287                this.set("minvalue", self._parse_bitwise())
3288            if self._match_text_seq("MAXVALUE"):
3289                this.set("maxvalue", self._parse_bitwise())
3290
3291            if self._match_text_seq("CYCLE"):
3292                this.set("cycle", True)
3293            elif self._match_text_seq("NO", "CYCLE"):
3294                this.set("cycle", False)
3295
3296            if not identity:
3297                this.set("expression", self._parse_bitwise())
3298
3299            self._match_r_paren()
3300
3301        return this
3302
3303    def _parse_inline(self) -> t.Optional[exp.Expression]:
3304        self._match_text_seq("LENGTH")
3305        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3306
3307    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3308        if self._match_text_seq("NULL"):
3309            return self.expression(exp.NotNullColumnConstraint)
3310        if self._match_text_seq("CASESPECIFIC"):
3311            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3312        return None
3313
3314    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3315        if self._match(TokenType.CONSTRAINT):
3316            this = self._parse_id_var()
3317        else:
3318            this = None
3319
3320        if self._match_texts(self.CONSTRAINT_PARSERS):
3321            return self.expression(
3322                exp.ColumnConstraint,
3323                this=this,
3324                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3325            )
3326
3327        return this
3328
3329    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3330        if not self._match(TokenType.CONSTRAINT):
3331            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3332
3333        this = self._parse_id_var()
3334        expressions = []
3335
3336        while True:
3337            constraint = self._parse_unnamed_constraint() or self._parse_function()
3338            if not constraint:
3339                break
3340            expressions.append(constraint)
3341
3342        return self.expression(exp.Constraint, this=this, expressions=expressions)
3343
3344    def _parse_unnamed_constraint(
3345        self, constraints: t.Optional[t.Collection[str]] = None
3346    ) -> t.Optional[exp.Expression]:
3347        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3348            return None
3349
3350        constraint = self._prev.text.upper()
3351        if constraint not in self.CONSTRAINT_PARSERS:
3352            self.raise_error(f"No parser found for schema constraint {constraint}.")
3353
3354        return self.CONSTRAINT_PARSERS[constraint](self)
3355
3356    def _parse_unique(self) -> exp.Expression:
3357        if not self._match(TokenType.L_PAREN, advance=False):
3358            return self.expression(exp.UniqueColumnConstraint)
3359        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3360
3361    def _parse_key_constraint_options(self) -> t.List[str]:
3362        options = []
3363        while True:
3364            if not self._curr:
3365                break
3366
3367            if self._match(TokenType.ON):
3368                action = None
3369                on = self._advance_any() and self._prev.text
3370
3371                if self._match_text_seq("NO", "ACTION"):
3372                    action = "NO ACTION"
3373                elif self._match_text_seq("CASCADE"):
3374                    action = "CASCADE"
3375                elif self._match_pair(TokenType.SET, TokenType.NULL):
3376                    action = "SET NULL"
3377                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3378                    action = "SET DEFAULT"
3379                else:
3380                    self.raise_error("Invalid key constraint")
3381
3382                options.append(f"ON {on} {action}")
3383            elif self._match_text_seq("NOT", "ENFORCED"):
3384                options.append("NOT ENFORCED")
3385            elif self._match_text_seq("DEFERRABLE"):
3386                options.append("DEFERRABLE")
3387            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3388                options.append("INITIALLY DEFERRED")
3389            elif self._match_text_seq("NORELY"):
3390                options.append("NORELY")
3391            elif self._match_text_seq("MATCH", "FULL"):
3392                options.append("MATCH FULL")
3393            else:
3394                break
3395
3396        return options
3397
3398    def _parse_references(self, match=True) -> t.Optional[exp.Expression]:
3399        if match and not self._match(TokenType.REFERENCES):
3400            return None
3401
3402        expressions = None
3403        this = self._parse_id_var()
3404
3405        if self._match(TokenType.L_PAREN, advance=False):
3406            expressions = self._parse_wrapped_id_vars()
3407
3408        options = self._parse_key_constraint_options()
3409        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3410
3411    def _parse_foreign_key(self) -> exp.Expression:
3412        expressions = self._parse_wrapped_id_vars()
3413        reference = self._parse_references()
3414        options = {}
3415
3416        while self._match(TokenType.ON):
3417            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3418                self.raise_error("Expected DELETE or UPDATE")
3419
3420            kind = self._prev.text.lower()
3421
3422            if self._match_text_seq("NO", "ACTION"):
3423                action = "NO ACTION"
3424            elif self._match(TokenType.SET):
3425                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3426                action = "SET " + self._prev.text.upper()
3427            else:
3428                self._advance()
3429                action = self._prev.text.upper()
3430
3431            options[kind] = action
3432
3433        return self.expression(
3434            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3435        )
3436
3437    def _parse_primary_key(self) -> exp.Expression:
3438        desc = (
3439            self._match_set((TokenType.ASC, TokenType.DESC))
3440            and self._prev.token_type == TokenType.DESC
3441        )
3442
3443        if not self._match(TokenType.L_PAREN, advance=False):
3444            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3445
3446        expressions = self._parse_wrapped_csv(self._parse_field)
3447        options = self._parse_key_constraint_options()
3448        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3449
3450    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3451        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3452            return this
3453
3454        bracket_kind = self._prev.token_type
3455        expressions: t.List[t.Optional[exp.Expression]]
3456
3457        if self._match(TokenType.COLON):
3458            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3459        else:
3460            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3461
3462        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3463        if bracket_kind == TokenType.L_BRACE:
3464            this = self.expression(exp.Struct, expressions=expressions)
3465        elif not this or this.name.upper() == "ARRAY":
3466            this = self.expression(exp.Array, expressions=expressions)
3467        else:
3468            expressions = apply_index_offset(this, expressions, -self.index_offset)
3469            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3470
3471        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3472            self.raise_error("Expected ]")
3473        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3474            self.raise_error("Expected }")
3475
3476        self._add_comments(this)
3477        return self._parse_bracket(this)
3478
3479    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3480        if self._match(TokenType.COLON):
3481            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3482        return this
3483
3484    def _parse_case(self) -> t.Optional[exp.Expression]:
3485        ifs = []
3486        default = None
3487
3488        expression = self._parse_conjunction()
3489
3490        while self._match(TokenType.WHEN):
3491            this = self._parse_conjunction()
3492            self._match(TokenType.THEN)
3493            then = self._parse_conjunction()
3494            ifs.append(self.expression(exp.If, this=this, true=then))
3495
3496        if self._match(TokenType.ELSE):
3497            default = self._parse_conjunction()
3498
3499        if not self._match(TokenType.END):
3500            self.raise_error("Expected END after CASE", self._prev)
3501
3502        return self._parse_window(
3503            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3504        )
3505
3506    def _parse_if(self) -> t.Optional[exp.Expression]:
3507        if self._match(TokenType.L_PAREN):
3508            args = self._parse_csv(self._parse_conjunction)
3509            this = exp.If.from_arg_list(args)
3510            self.validate_expression(this, args)
3511            self._match_r_paren()
3512        else:
3513            index = self._index - 1
3514            condition = self._parse_conjunction()
3515
3516            if not condition:
3517                self._retreat(index)
3518                return None
3519
3520            self._match(TokenType.THEN)
3521            true = self._parse_conjunction()
3522            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3523            self._match(TokenType.END)
3524            this = self.expression(exp.If, this=condition, true=true, false=false)
3525
3526        return self._parse_window(this)
3527
3528    def _parse_extract(self) -> exp.Expression:
3529        this = self._parse_function() or self._parse_var() or self._parse_type()
3530
3531        if self._match(TokenType.FROM):
3532            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3533
3534        if not self._match(TokenType.COMMA):
3535            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3536
3537        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3538
3539    def _parse_cast(self, strict: bool) -> exp.Expression:
3540        this = self._parse_conjunction()
3541
3542        if not self._match(TokenType.ALIAS):
3543            if self._match(TokenType.COMMA):
3544                return self.expression(
3545                    exp.CastToStrType, this=this, expression=self._parse_string()
3546                )
3547            else:
3548                self.raise_error("Expected AS after CAST")
3549
3550        to = self._parse_types()
3551
3552        if not to:
3553            self.raise_error("Expected TYPE after CAST")
3554        elif to.this == exp.DataType.Type.CHAR:
3555            if self._match(TokenType.CHARACTER_SET):
3556                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3557
3558        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3559
3560    def _parse_string_agg(self) -> exp.Expression:
3561        expression: t.Optional[exp.Expression]
3562
3563        if self._match(TokenType.DISTINCT):
3564            args = self._parse_csv(self._parse_conjunction)
3565            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3566        else:
3567            args = self._parse_csv(self._parse_conjunction)
3568            expression = seq_get(args, 0)
3569
3570        index = self._index
3571        if not self._match(TokenType.R_PAREN):
3572            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3573            order = self._parse_order(this=expression)
3574            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3575
3576        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3577        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3578        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3579        if not self._match_text_seq("WITHIN", "GROUP"):
3580            self._retreat(index)
3581            this = exp.GroupConcat.from_arg_list(args)
3582            self.validate_expression(this, args)
3583            return this
3584
3585        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3586        order = self._parse_order(this=expression)
3587        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3588
3589    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3590        to: t.Optional[exp.Expression]
3591        this = self._parse_bitwise()
3592
3593        if self._match(TokenType.USING):
3594            to = self.expression(exp.CharacterSet, this=self._parse_var())
3595        elif self._match(TokenType.COMMA):
3596            to = self._parse_bitwise()
3597        else:
3598            to = None
3599
3600        # Swap the argument order if needed to produce the correct AST
3601        if self.CONVERT_TYPE_FIRST:
3602            this, to = to, this
3603
3604        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3605
3606    def _parse_decode(self) -> t.Optional[exp.Expression]:
3607        """
3608        There are generally two variants of the DECODE function:
3609
3610        - DECODE(bin, charset)
3611        - DECODE(expression, search, result [, search, result] ... [, default])
3612
3613        The second variant will always be parsed into a CASE expression. Note that NULL
3614        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3615        instead of relying on pattern matching.
3616        """
3617        args = self._parse_csv(self._parse_conjunction)
3618
3619        if len(args) < 3:
3620            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3621
3622        expression, *expressions = args
3623        if not expression:
3624            return None
3625
3626        ifs = []
3627        for search, result in zip(expressions[::2], expressions[1::2]):
3628            if not search or not result:
3629                return None
3630
3631            if isinstance(search, exp.Literal):
3632                ifs.append(
3633                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3634                )
3635            elif isinstance(search, exp.Null):
3636                ifs.append(
3637                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3638                )
3639            else:
3640                cond = exp.or_(
3641                    exp.EQ(this=expression.copy(), expression=search),
3642                    exp.and_(
3643                        exp.Is(this=expression.copy(), expression=exp.Null()),
3644                        exp.Is(this=search.copy(), expression=exp.Null()),
3645                        copy=False,
3646                    ),
3647                    copy=False,
3648                )
3649                ifs.append(exp.If(this=cond, true=result))
3650
3651        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3652
3653    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3654        self._match_text_seq("KEY")
3655        key = self._parse_field()
3656        self._match(TokenType.COLON)
3657        self._match_text_seq("VALUE")
3658        value = self._parse_field()
3659        if not key and not value:
3660            return None
3661        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3662
3663    def _parse_json_object(self) -> exp.Expression:
3664        expressions = self._parse_csv(self._parse_json_key_value)
3665
3666        null_handling = None
3667        if self._match_text_seq("NULL", "ON", "NULL"):
3668            null_handling = "NULL ON NULL"
3669        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3670            null_handling = "ABSENT ON NULL"
3671
3672        unique_keys = None
3673        if self._match_text_seq("WITH", "UNIQUE"):
3674            unique_keys = True
3675        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3676            unique_keys = False
3677
3678        self._match_text_seq("KEYS")
3679
3680        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3681        format_json = self._match_text_seq("FORMAT", "JSON")
3682        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3683
3684        return self.expression(
3685            exp.JSONObject,
3686            expressions=expressions,
3687            null_handling=null_handling,
3688            unique_keys=unique_keys,
3689            return_type=return_type,
3690            format_json=format_json,
3691            encoding=encoding,
3692        )
3693
3694    def _parse_logarithm(self) -> exp.Expression:
3695        # Default argument order is base, expression
3696        args = self._parse_csv(self._parse_range)
3697
3698        if len(args) > 1:
3699            if not self.LOG_BASE_FIRST:
3700                args.reverse()
3701            return exp.Log.from_arg_list(args)
3702
3703        return self.expression(
3704            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3705        )
3706
3707    def _parse_match_against(self) -> exp.Expression:
3708        expressions = self._parse_csv(self._parse_column)
3709
3710        self._match_text_seq(")", "AGAINST", "(")
3711
3712        this = self._parse_string()
3713
3714        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3715            modifier = "IN NATURAL LANGUAGE MODE"
3716            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3717                modifier = f"{modifier} WITH QUERY EXPANSION"
3718        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3719            modifier = "IN BOOLEAN MODE"
3720        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3721            modifier = "WITH QUERY EXPANSION"
3722        else:
3723            modifier = None
3724
3725        return self.expression(
3726            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3727        )
3728
3729    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3730    def _parse_open_json(self) -> exp.Expression:
3731        this = self._parse_bitwise()
3732        path = self._match(TokenType.COMMA) and self._parse_string()
3733
3734        def _parse_open_json_column_def() -> exp.Expression:
3735            this = self._parse_field(any_token=True)
3736            kind = self._parse_types()
3737            path = self._parse_string()
3738            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3739            return self.expression(
3740                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3741            )
3742
3743        expressions = None
3744        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3745            self._match_l_paren()
3746            expressions = self._parse_csv(_parse_open_json_column_def)
3747
3748        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3749
3750    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3751        args = self._parse_csv(self._parse_bitwise)
3752
3753        if self._match(TokenType.IN):
3754            return self.expression(
3755                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3756            )
3757
3758        if haystack_first:
3759            haystack = seq_get(args, 0)
3760            needle = seq_get(args, 1)
3761        else:
3762            needle = seq_get(args, 0)
3763            haystack = seq_get(args, 1)
3764
3765        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3766
3767        self.validate_expression(this, args)
3768
3769        return this
3770
3771    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3772        args = self._parse_csv(self._parse_table)
3773        return exp.JoinHint(this=func_name.upper(), expressions=args)
3774
3775    def _parse_substring(self) -> exp.Expression:
3776        # Postgres supports the form: substring(string [from int] [for int])
3777        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3778
3779        args = self._parse_csv(self._parse_bitwise)
3780
3781        if self._match(TokenType.FROM):
3782            args.append(self._parse_bitwise())
3783            if self._match(TokenType.FOR):
3784                args.append(self._parse_bitwise())
3785
3786        this = exp.Substring.from_arg_list(args)
3787        self.validate_expression(this, args)
3788
3789        return this
3790
3791    def _parse_struct(self) -> exp.Struct:
3792        return exp.Struct.from_arg_list(self._parse_csv(lambda: self._parse_lambda(alias=True)))
3793
3794    def _parse_trim(self) -> exp.Expression:
3795        # https://www.w3resource.com/sql/character-functions/trim.php
3796        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3797
3798        position = None
3799        collation = None
3800
3801        if self._match_texts(self.TRIM_TYPES):
3802            position = self._prev.text.upper()
3803
3804        expression = self._parse_bitwise()
3805        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3806            this = self._parse_bitwise()
3807        else:
3808            this = expression
3809            expression = None
3810
3811        if self._match(TokenType.COLLATE):
3812            collation = self._parse_bitwise()
3813
3814        return self.expression(
3815            exp.Trim,
3816            this=this,
3817            position=position,
3818            expression=expression,
3819            collation=collation,
3820        )
3821
3822    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3823        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3824
3825    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3826        return self._parse_window(self._parse_id_var(), alias=True)
3827
3828    def _parse_respect_or_ignore_nulls(
3829        self, this: t.Optional[exp.Expression]
3830    ) -> t.Optional[exp.Expression]:
3831        if self._match_text_seq("IGNORE", "NULLS"):
3832            return self.expression(exp.IgnoreNulls, this=this)
3833        if self._match_text_seq("RESPECT", "NULLS"):
3834            return self.expression(exp.RespectNulls, this=this)
3835        return this
3836
3837    def _parse_window(
3838        self, this: t.Optional[exp.Expression], alias: bool = False
3839    ) -> t.Optional[exp.Expression]:
3840        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3841            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3842            self._match_r_paren()
3843
3844        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3845        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3846        if self._match_text_seq("WITHIN", "GROUP"):
3847            order = self._parse_wrapped(self._parse_order)
3848            this = self.expression(exp.WithinGroup, this=this, expression=order)
3849
3850        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3851        # Some dialects choose to implement and some do not.
3852        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3853
3854        # There is some code above in _parse_lambda that handles
3855        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3856
3857        # The below changes handle
3858        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3859
3860        # Oracle allows both formats
3861        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3862        #   and Snowflake chose to do the same for familiarity
3863        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3864        this = self._parse_respect_or_ignore_nulls(this)
3865
3866        # bigquery select from window x AS (partition by ...)
3867        if alias:
3868            over = None
3869            self._match(TokenType.ALIAS)
3870        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3871            return this
3872        else:
3873            over = self._prev.text.upper()
3874
3875        if not self._match(TokenType.L_PAREN):
3876            return self.expression(
3877                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3878            )
3879
3880        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3881
3882        first = self._match(TokenType.FIRST)
3883        if self._match_text_seq("LAST"):
3884            first = False
3885
3886        partition = self._parse_partition_by()
3887        order = self._parse_order()
3888        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3889
3890        if kind:
3891            self._match(TokenType.BETWEEN)
3892            start = self._parse_window_spec()
3893            self._match(TokenType.AND)
3894            end = self._parse_window_spec()
3895
3896            spec = self.expression(
3897                exp.WindowSpec,
3898                kind=kind,
3899                start=start["value"],
3900                start_side=start["side"],
3901                end=end["value"],
3902                end_side=end["side"],
3903            )
3904        else:
3905            spec = None
3906
3907        self._match_r_paren()
3908
3909        return self.expression(
3910            exp.Window,
3911            this=this,
3912            partition_by=partition,
3913            order=order,
3914            spec=spec,
3915            alias=window_alias,
3916            over=over,
3917            first=first,
3918        )
3919
3920    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3921        self._match(TokenType.BETWEEN)
3922
3923        return {
3924            "value": (
3925                (self._match_text_seq("UNBOUNDED") and "UNBOUNDED")
3926                or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW")
3927                or self._parse_bitwise()
3928            ),
3929            "side": self._match_texts(("PRECEDING", "FOLLOWING")) and self._prev.text,
3930        }
3931
3932    def _parse_alias(
3933        self, this: t.Optional[exp.Expression], explicit: bool = False
3934    ) -> t.Optional[exp.Expression]:
3935        any_token = self._match(TokenType.ALIAS)
3936
3937        if explicit and not any_token:
3938            return this
3939
3940        if self._match(TokenType.L_PAREN):
3941            aliases = self.expression(
3942                exp.Aliases,
3943                this=this,
3944                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3945            )
3946            self._match_r_paren(aliases)
3947            return aliases
3948
3949        alias = self._parse_id_var(any_token)
3950
3951        if alias:
3952            return self.expression(exp.Alias, this=this, alias=alias)
3953
3954        return this
3955
3956    def _parse_id_var(
3957        self,
3958        any_token: bool = True,
3959        tokens: t.Optional[t.Collection[TokenType]] = None,
3960        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3961    ) -> t.Optional[exp.Expression]:
3962        identifier = self._parse_identifier()
3963
3964        if identifier:
3965            return identifier
3966
3967        prefix = ""
3968
3969        if prefix_tokens:
3970            while self._match_set(prefix_tokens):
3971                prefix += self._prev.text
3972
3973        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3974            quoted = self._prev.token_type == TokenType.STRING
3975            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3976
3977        return None
3978
3979    def _parse_string(self) -> t.Optional[exp.Expression]:
3980        if self._match(TokenType.STRING):
3981            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3982        return self._parse_placeholder()
3983
3984    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
3985        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
3986
3987    def _parse_number(self) -> t.Optional[exp.Expression]:
3988        if self._match(TokenType.NUMBER):
3989            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3990        return self._parse_placeholder()
3991
3992    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3993        if self._match(TokenType.IDENTIFIER):
3994            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3995        return self._parse_placeholder()
3996
3997    def _parse_var(
3998        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
3999    ) -> t.Optional[exp.Expression]:
4000        if (
4001            (any_token and self._advance_any())
4002            or self._match(TokenType.VAR)
4003            or (self._match_set(tokens) if tokens else False)
4004        ):
4005            return self.expression(exp.Var, this=self._prev.text)
4006        return self._parse_placeholder()
4007
4008    def _advance_any(self) -> t.Optional[Token]:
4009        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4010            self._advance()
4011            return self._prev
4012        return None
4013
4014    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4015        return self._parse_var() or self._parse_string()
4016
4017    def _parse_null(self) -> t.Optional[exp.Expression]:
4018        if self._match(TokenType.NULL):
4019            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4020        return None
4021
4022    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4023        if self._match(TokenType.TRUE):
4024            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4025        if self._match(TokenType.FALSE):
4026            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4027        return None
4028
4029    def _parse_star(self) -> t.Optional[exp.Expression]:
4030        if self._match(TokenType.STAR):
4031            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4032        return None
4033
4034    def _parse_parameter(self) -> exp.Expression:
4035        wrapped = self._match(TokenType.L_BRACE)
4036        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4037        self._match(TokenType.R_BRACE)
4038        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4039
4040    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4041        if self._match_set(self.PLACEHOLDER_PARSERS):
4042            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4043            if placeholder:
4044                return placeholder
4045            self._advance(-1)
4046        return None
4047
4048    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4049        if not self._match(TokenType.EXCEPT):
4050            return None
4051        if self._match(TokenType.L_PAREN, advance=False):
4052            return self._parse_wrapped_csv(self._parse_column)
4053        return self._parse_csv(self._parse_column)
4054
4055    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4056        if not self._match(TokenType.REPLACE):
4057            return None
4058        if self._match(TokenType.L_PAREN, advance=False):
4059            return self._parse_wrapped_csv(self._parse_expression)
4060        return self._parse_csv(self._parse_expression)
4061
4062    def _parse_csv(
4063        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4064    ) -> t.List[t.Optional[exp.Expression]]:
4065        parse_result = parse_method()
4066        items = [parse_result] if parse_result is not None else []
4067
4068        while self._match(sep):
4069            self._add_comments(parse_result)
4070            parse_result = parse_method()
4071            if parse_result is not None:
4072                items.append(parse_result)
4073
4074        return items
4075
4076    def _parse_tokens(
4077        self, parse_method: t.Callable, expressions: t.Dict
4078    ) -> t.Optional[exp.Expression]:
4079        this = parse_method()
4080
4081        while self._match_set(expressions):
4082            this = self.expression(
4083                expressions[self._prev.token_type],
4084                this=this,
4085                comments=self._prev_comments,
4086                expression=parse_method(),
4087            )
4088
4089        return this
4090
4091    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4092        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4093
4094    def _parse_wrapped_csv(
4095        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4096    ) -> t.List[t.Optional[exp.Expression]]:
4097        return self._parse_wrapped(
4098            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4099        )
4100
4101    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4102        wrapped = self._match(TokenType.L_PAREN)
4103        if not wrapped and not optional:
4104            self.raise_error("Expecting (")
4105        parse_result = parse_method()
4106        if wrapped:
4107            self._match_r_paren()
4108        return parse_result
4109
4110    def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]:
4111        return self._parse_select() or self._parse_set_operations(
4112            self._parse_expression() if alias else self._parse_conjunction()
4113        )
4114
4115    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4116        return self._parse_set_operations(
4117            self._parse_select(nested=True, parse_subquery_alias=False)
4118        )
4119
4120    def _parse_transaction(self) -> exp.Expression:
4121        this = None
4122        if self._match_texts(self.TRANSACTION_KIND):
4123            this = self._prev.text
4124
4125        self._match_texts({"TRANSACTION", "WORK"})
4126
4127        modes = []
4128        while True:
4129            mode = []
4130            while self._match(TokenType.VAR):
4131                mode.append(self._prev.text)
4132
4133            if mode:
4134                modes.append(" ".join(mode))
4135            if not self._match(TokenType.COMMA):
4136                break
4137
4138        return self.expression(exp.Transaction, this=this, modes=modes)
4139
4140    def _parse_commit_or_rollback(self) -> exp.Expression:
4141        chain = None
4142        savepoint = None
4143        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4144
4145        self._match_texts({"TRANSACTION", "WORK"})
4146
4147        if self._match_text_seq("TO"):
4148            self._match_text_seq("SAVEPOINT")
4149            savepoint = self._parse_id_var()
4150
4151        if self._match(TokenType.AND):
4152            chain = not self._match_text_seq("NO")
4153            self._match_text_seq("CHAIN")
4154
4155        if is_rollback:
4156            return self.expression(exp.Rollback, savepoint=savepoint)
4157        return self.expression(exp.Commit, chain=chain)
4158
4159    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4160        if not self._match_text_seq("ADD"):
4161            return None
4162
4163        self._match(TokenType.COLUMN)
4164        exists_column = self._parse_exists(not_=True)
4165        expression = self._parse_column_def(self._parse_field(any_token=True))
4166
4167        if expression:
4168            expression.set("exists", exists_column)
4169
4170            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4171            if self._match_texts(("FIRST", "AFTER")):
4172                position = self._prev.text
4173                column_position = self.expression(
4174                    exp.ColumnPosition, this=self._parse_column(), position=position
4175                )
4176                expression.set("position", column_position)
4177
4178        return expression
4179
4180    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4181        drop = self._match(TokenType.DROP) and self._parse_drop()
4182        if drop and not isinstance(drop, exp.Command):
4183            drop.set("kind", drop.args.get("kind", "COLUMN"))
4184        return drop
4185
4186    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4187    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4188        return self.expression(
4189            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4190        )
4191
4192    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4193        this = None
4194        kind = self._prev.token_type
4195
4196        if kind == TokenType.CONSTRAINT:
4197            this = self._parse_id_var()
4198
4199            if self._match_text_seq("CHECK"):
4200                expression = self._parse_wrapped(self._parse_conjunction)
4201                enforced = self._match_text_seq("ENFORCED")
4202
4203                return self.expression(
4204                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4205                )
4206
4207        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4208            expression = self._parse_foreign_key()
4209        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4210            expression = self._parse_primary_key()
4211        else:
4212            expression = None
4213
4214        return self.expression(exp.AddConstraint, this=this, expression=expression)
4215
4216    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4217        index = self._index - 1
4218
4219        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4220            return self._parse_csv(self._parse_add_constraint)
4221
4222        self._retreat(index)
4223        return self._parse_csv(self._parse_add_column)
4224
4225    def _parse_alter_table_alter(self) -> exp.Expression:
4226        self._match(TokenType.COLUMN)
4227        column = self._parse_field(any_token=True)
4228
4229        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4230            return self.expression(exp.AlterColumn, this=column, drop=True)
4231        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4232            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4233
4234        self._match_text_seq("SET", "DATA")
4235        return self.expression(
4236            exp.AlterColumn,
4237            this=column,
4238            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4239            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4240            using=self._match(TokenType.USING) and self._parse_conjunction(),
4241        )
4242
4243    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4244        index = self._index - 1
4245
4246        partition_exists = self._parse_exists()
4247        if self._match(TokenType.PARTITION, advance=False):
4248            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4249
4250        self._retreat(index)
4251        return self._parse_csv(self._parse_drop_column)
4252
4253    def _parse_alter_table_rename(self) -> exp.Expression:
4254        self._match_text_seq("TO")
4255        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4256
4257    def _parse_alter(self) -> t.Optional[exp.Expression]:
4258        start = self._prev
4259
4260        if not self._match(TokenType.TABLE):
4261            return self._parse_as_command(start)
4262
4263        exists = self._parse_exists()
4264        this = self._parse_table(schema=True)
4265
4266        if self._next:
4267            self._advance()
4268        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4269
4270        if parser:
4271            actions = ensure_list(parser(self))
4272
4273            if not self._curr:
4274                return self.expression(
4275                    exp.AlterTable,
4276                    this=this,
4277                    exists=exists,
4278                    actions=actions,
4279                )
4280        return self._parse_as_command(start)
4281
4282    def _parse_merge(self) -> exp.Expression:
4283        self._match(TokenType.INTO)
4284        target = self._parse_table()
4285
4286        self._match(TokenType.USING)
4287        using = self._parse_table()
4288
4289        self._match(TokenType.ON)
4290        on = self._parse_conjunction()
4291
4292        whens = []
4293        while self._match(TokenType.WHEN):
4294            matched = not self._match(TokenType.NOT)
4295            self._match_text_seq("MATCHED")
4296            source = (
4297                False
4298                if self._match_text_seq("BY", "TARGET")
4299                else self._match_text_seq("BY", "SOURCE")
4300            )
4301            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4302
4303            self._match(TokenType.THEN)
4304
4305            if self._match(TokenType.INSERT):
4306                _this = self._parse_star()
4307                if _this:
4308                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4309                else:
4310                    then = self.expression(
4311                        exp.Insert,
4312                        this=self._parse_value(),
4313                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4314                    )
4315            elif self._match(TokenType.UPDATE):
4316                expressions = self._parse_star()
4317                if expressions:
4318                    then = self.expression(exp.Update, expressions=expressions)
4319                else:
4320                    then = self.expression(
4321                        exp.Update,
4322                        expressions=self._match(TokenType.SET)
4323                        and self._parse_csv(self._parse_equality),
4324                    )
4325            elif self._match(TokenType.DELETE):
4326                then = self.expression(exp.Var, this=self._prev.text)
4327            else:
4328                then = None
4329
4330            whens.append(
4331                self.expression(
4332                    exp.When,
4333                    matched=matched,
4334                    source=source,
4335                    condition=condition,
4336                    then=then,
4337                )
4338            )
4339
4340        return self.expression(
4341            exp.Merge,
4342            this=target,
4343            using=using,
4344            on=on,
4345            expressions=whens,
4346        )
4347
4348    def _parse_show(self) -> t.Optional[exp.Expression]:
4349        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4350        if parser:
4351            return parser(self)
4352        self._advance()
4353        return self.expression(exp.Show, this=self._prev.text.upper())
4354
4355    def _parse_set_item_assignment(
4356        self, kind: t.Optional[str] = None
4357    ) -> t.Optional[exp.Expression]:
4358        index = self._index
4359
4360        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4361            return self._parse_set_transaction(global_=kind == "GLOBAL")
4362
4363        left = self._parse_primary() or self._parse_id_var()
4364
4365        if not self._match_texts(("=", "TO")):
4366            self._retreat(index)
4367            return None
4368
4369        right = self._parse_statement() or self._parse_id_var()
4370        this = self.expression(
4371            exp.EQ,
4372            this=left,
4373            expression=right,
4374        )
4375
4376        return self.expression(
4377            exp.SetItem,
4378            this=this,
4379            kind=kind,
4380        )
4381
4382    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4383        self._match_text_seq("TRANSACTION")
4384        characteristics = self._parse_csv(
4385            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4386        )
4387        return self.expression(
4388            exp.SetItem,
4389            expressions=characteristics,
4390            kind="TRANSACTION",
4391            **{"global": global_},  # type: ignore
4392        )
4393
4394    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4395        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4396        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4397
4398    def _parse_set(self) -> exp.Expression:
4399        index = self._index
4400        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4401
4402        if self._curr:
4403            self._retreat(index)
4404            return self._parse_as_command(self._prev)
4405
4406        return set_
4407
4408    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4409        for option in options:
4410            if self._match_text_seq(*option.split(" ")):
4411                return exp.Var(this=option)
4412        return None
4413
4414    def _parse_as_command(self, start: Token) -> exp.Command:
4415        while self._curr:
4416            self._advance()
4417        text = self._find_sql(start, self._prev)
4418        size = len(start.text)
4419        return exp.Command(this=text[:size], expression=text[size:])
4420
4421    def _find_parser(
4422        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4423    ) -> t.Optional[t.Callable]:
4424        if not self._curr:
4425            return None
4426
4427        index = self._index
4428        this = []
4429        while True:
4430            # The current token might be multiple words
4431            curr = self._curr.text.upper()
4432            key = curr.split(" ")
4433            this.append(curr)
4434            self._advance()
4435            result, trie = in_trie(trie, key)
4436            if result == 0:
4437                break
4438            if result == 2:
4439                subparser = parsers[" ".join(this)]
4440                return subparser
4441        self._retreat(index)
4442        return None
4443
4444    def _match(self, token_type, advance=True, expression=None):
4445        if not self._curr:
4446            return None
4447
4448        if self._curr.token_type == token_type:
4449            if advance:
4450                self._advance()
4451            self._add_comments(expression)
4452            return True
4453
4454        return None
4455
4456    def _match_set(self, types, advance=True):
4457        if not self._curr:
4458            return None
4459
4460        if self._curr.token_type in types:
4461            if advance:
4462                self._advance()
4463            return True
4464
4465        return None
4466
4467    def _match_pair(self, token_type_a, token_type_b, advance=True):
4468        if not self._curr or not self._next:
4469            return None
4470
4471        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4472            if advance:
4473                self._advance(2)
4474            return True
4475
4476        return None
4477
4478    def _match_l_paren(self, expression=None):
4479        if not self._match(TokenType.L_PAREN, expression=expression):
4480            self.raise_error("Expecting (")
4481
4482    def _match_r_paren(self, expression=None):
4483        if not self._match(TokenType.R_PAREN, expression=expression):
4484            self.raise_error("Expecting )")
4485
4486    def _match_texts(self, texts, advance=True):
4487        if self._curr and self._curr.text.upper() in texts:
4488            if advance:
4489                self._advance()
4490            return True
4491        return False
4492
4493    def _match_text_seq(self, *texts, advance=True):
4494        index = self._index
4495        for text in texts:
4496            if self._curr and self._curr.text.upper() == text:
4497                self._advance()
4498            else:
4499                self._retreat(index)
4500                return False
4501
4502        if not advance:
4503            self._retreat(index)
4504
4505        return True
4506
4507    def _replace_columns_with_dots(self, this):
4508        if isinstance(this, exp.Dot):
4509            exp.replace_children(this, self._replace_columns_with_dots)
4510        elif isinstance(this, exp.Column):
4511            exp.replace_children(this, self._replace_columns_with_dots)
4512            table = this.args.get("table")
4513            this = (
4514                self.expression(exp.Dot, this=table, expression=this.this)
4515                if table
4516                else self.expression(exp.Var, this=this.name)
4517            )
4518        elif isinstance(this, exp.Identifier):
4519            this = self.expression(exp.Var, this=this.name)
4520        return this
4521
4522    def _replace_lambda(self, node, lambda_variables):
4523        for column in node.find_all(exp.Column):
4524            if column.parts[0].name in lambda_variables:
4525                dot_or_id = column.to_dot() if column.table else column.this
4526                parent = column.parent
4527
4528                while isinstance(parent, exp.Dot):
4529                    if not isinstance(parent.parent, exp.Dot):
4530                        parent.replace(dot_or_id)
4531                        break
4532                    parent = parent.parent
4533                else:
4534                    if column is node:
4535                        node = dot_or_id
4536                    else:
4537                        column.replace(dot_or_id)
4538        return node
def parse_var_map(args: Sequence) -> sqlglot.expressions.Expression:
19def parse_var_map(args: t.Sequence) -> exp.Expression:
20    if len(args) == 1 and args[0].is_star:
21        return exp.StarMap(this=args[0])
22
23    keys = []
24    values = []
25    for i in range(0, len(args), 2):
26        keys.append(args[i])
27        values.append(args[i + 1])
28    return exp.VarMap(
29        keys=exp.Array(expressions=keys),
30        values=exp.Array(expressions=values),
31    )
def parse_like(args):
34def parse_like(args):
35    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
36    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
def binary_range_parser( expr_type: Type[sqlglot.expressions.Expression]) -> Callable[[sqlglot.parser.Parser, Optional[sqlglot.expressions.Expression]], Optional[sqlglot.expressions.Expression]]:
39def binary_range_parser(
40    expr_type: t.Type[exp.Expression],
41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
42    return lambda self, this: self._parse_escape(
43        self.expression(expr_type, this=this, expression=self._parse_bitwise())
44    )
class Parser:
  56class Parser(metaclass=_Parser):
  57    """
  58    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  59    a parsed syntax tree.
  60
  61    Args:
  62        error_level: the desired error level.
  63            Default: ErrorLevel.RAISE
  64        error_message_context: determines the amount of context to capture from a
  65            query string when displaying the error message (in number of characters).
  66            Default: 50.
  67        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  68            Default: 0
  69        alias_post_tablesample: If the table alias comes after tablesample.
  70            Default: False
  71        max_errors: Maximum number of error messages to include in a raised ParseError.
  72            This is only relevant if error_level is ErrorLevel.RAISE.
  73            Default: 3
  74        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  75            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  76            Default: "nulls_are_small"
  77    """
  78
  79    FUNCTIONS: t.Dict[str, t.Callable] = {
  80        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  81        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  82            this=seq_get(args, 0),
  83            to=exp.DataType(this=exp.DataType.Type.TEXT),
  84        ),
  85        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  86        "IFNULL": exp.Coalesce.from_arg_list,
  87        "LIKE": parse_like,
  88        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  89            this=seq_get(args, 0),
  90            to=exp.DataType(this=exp.DataType.Type.TEXT),
  91        ),
  92        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  93            this=exp.Cast(
  94                this=seq_get(args, 0),
  95                to=exp.DataType(this=exp.DataType.Type.TEXT),
  96            ),
  97            start=exp.Literal.number(1),
  98            length=exp.Literal.number(10),
  99        ),
 100        "VAR_MAP": parse_var_map,
 101    }
 102
 103    NO_PAREN_FUNCTIONS = {
 104        TokenType.CURRENT_DATE: exp.CurrentDate,
 105        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 106        TokenType.CURRENT_TIME: exp.CurrentTime,
 107        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 108        TokenType.CURRENT_USER: exp.CurrentUser,
 109    }
 110
 111    JOIN_HINTS: t.Set[str] = set()
 112
 113    NESTED_TYPE_TOKENS = {
 114        TokenType.ARRAY,
 115        TokenType.MAP,
 116        TokenType.NULLABLE,
 117        TokenType.STRUCT,
 118    }
 119
 120    TYPE_TOKENS = {
 121        TokenType.BIT,
 122        TokenType.BOOLEAN,
 123        TokenType.TINYINT,
 124        TokenType.UTINYINT,
 125        TokenType.SMALLINT,
 126        TokenType.USMALLINT,
 127        TokenType.INT,
 128        TokenType.UINT,
 129        TokenType.BIGINT,
 130        TokenType.UBIGINT,
 131        TokenType.INT128,
 132        TokenType.UINT128,
 133        TokenType.INT256,
 134        TokenType.UINT256,
 135        TokenType.FLOAT,
 136        TokenType.DOUBLE,
 137        TokenType.CHAR,
 138        TokenType.NCHAR,
 139        TokenType.VARCHAR,
 140        TokenType.NVARCHAR,
 141        TokenType.TEXT,
 142        TokenType.MEDIUMTEXT,
 143        TokenType.LONGTEXT,
 144        TokenType.MEDIUMBLOB,
 145        TokenType.LONGBLOB,
 146        TokenType.BINARY,
 147        TokenType.VARBINARY,
 148        TokenType.JSON,
 149        TokenType.JSONB,
 150        TokenType.INTERVAL,
 151        TokenType.TIME,
 152        TokenType.TIMESTAMP,
 153        TokenType.TIMESTAMPTZ,
 154        TokenType.TIMESTAMPLTZ,
 155        TokenType.DATETIME,
 156        TokenType.DATETIME64,
 157        TokenType.DATE,
 158        TokenType.DECIMAL,
 159        TokenType.BIGDECIMAL,
 160        TokenType.UUID,
 161        TokenType.GEOGRAPHY,
 162        TokenType.GEOMETRY,
 163        TokenType.HLLSKETCH,
 164        TokenType.HSTORE,
 165        TokenType.PSEUDO_TYPE,
 166        TokenType.SUPER,
 167        TokenType.SERIAL,
 168        TokenType.SMALLSERIAL,
 169        TokenType.BIGSERIAL,
 170        TokenType.XML,
 171        TokenType.UNIQUEIDENTIFIER,
 172        TokenType.MONEY,
 173        TokenType.SMALLMONEY,
 174        TokenType.ROWVERSION,
 175        TokenType.IMAGE,
 176        TokenType.VARIANT,
 177        TokenType.OBJECT,
 178        TokenType.INET,
 179        *NESTED_TYPE_TOKENS,
 180    }
 181
 182    SUBQUERY_PREDICATES = {
 183        TokenType.ANY: exp.Any,
 184        TokenType.ALL: exp.All,
 185        TokenType.EXISTS: exp.Exists,
 186        TokenType.SOME: exp.Any,
 187    }
 188
 189    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 190
 191    DB_CREATABLES = {
 192        TokenType.DATABASE,
 193        TokenType.SCHEMA,
 194        TokenType.TABLE,
 195        TokenType.VIEW,
 196    }
 197
 198    CREATABLES = {
 199        TokenType.COLUMN,
 200        TokenType.FUNCTION,
 201        TokenType.INDEX,
 202        TokenType.PROCEDURE,
 203        *DB_CREATABLES,
 204    }
 205
 206    ID_VAR_TOKENS = {
 207        TokenType.VAR,
 208        TokenType.ANTI,
 209        TokenType.APPLY,
 210        TokenType.ASC,
 211        TokenType.AUTO_INCREMENT,
 212        TokenType.BEGIN,
 213        TokenType.CACHE,
 214        TokenType.COLLATE,
 215        TokenType.COMMAND,
 216        TokenType.COMMENT,
 217        TokenType.COMMIT,
 218        TokenType.CONSTRAINT,
 219        TokenType.DEFAULT,
 220        TokenType.DELETE,
 221        TokenType.DESC,
 222        TokenType.DESCRIBE,
 223        TokenType.DIV,
 224        TokenType.END,
 225        TokenType.EXECUTE,
 226        TokenType.ESCAPE,
 227        TokenType.FALSE,
 228        TokenType.FIRST,
 229        TokenType.FILTER,
 230        TokenType.FORMAT,
 231        TokenType.FULL,
 232        TokenType.IF,
 233        TokenType.IS,
 234        TokenType.ISNULL,
 235        TokenType.INTERVAL,
 236        TokenType.KEEP,
 237        TokenType.LEFT,
 238        TokenType.LOAD,
 239        TokenType.MERGE,
 240        TokenType.NATURAL,
 241        TokenType.NEXT,
 242        TokenType.OFFSET,
 243        TokenType.ORDINALITY,
 244        TokenType.OVERWRITE,
 245        TokenType.PARTITION,
 246        TokenType.PERCENT,
 247        TokenType.PIVOT,
 248        TokenType.PRAGMA,
 249        TokenType.RANGE,
 250        TokenType.REFERENCES,
 251        TokenType.RIGHT,
 252        TokenType.ROW,
 253        TokenType.ROWS,
 254        TokenType.SEMI,
 255        TokenType.SET,
 256        TokenType.SETTINGS,
 257        TokenType.SHOW,
 258        TokenType.TEMPORARY,
 259        TokenType.TOP,
 260        TokenType.TRUE,
 261        TokenType.UNIQUE,
 262        TokenType.UNPIVOT,
 263        TokenType.VOLATILE,
 264        TokenType.WINDOW,
 265        *CREATABLES,
 266        *SUBQUERY_PREDICATES,
 267        *TYPE_TOKENS,
 268        *NO_PAREN_FUNCTIONS,
 269    }
 270
 271    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 272
 273    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 274        TokenType.APPLY,
 275        TokenType.FULL,
 276        TokenType.LEFT,
 277        TokenType.LOCK,
 278        TokenType.NATURAL,
 279        TokenType.OFFSET,
 280        TokenType.RIGHT,
 281        TokenType.WINDOW,
 282    }
 283
 284    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 285
 286    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 287
 288    TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"}
 289
 290    FUNC_TOKENS = {
 291        TokenType.COMMAND,
 292        TokenType.CURRENT_DATE,
 293        TokenType.CURRENT_DATETIME,
 294        TokenType.CURRENT_TIMESTAMP,
 295        TokenType.CURRENT_TIME,
 296        TokenType.CURRENT_USER,
 297        TokenType.FILTER,
 298        TokenType.FIRST,
 299        TokenType.FORMAT,
 300        TokenType.GLOB,
 301        TokenType.IDENTIFIER,
 302        TokenType.INDEX,
 303        TokenType.ISNULL,
 304        TokenType.ILIKE,
 305        TokenType.LIKE,
 306        TokenType.MERGE,
 307        TokenType.OFFSET,
 308        TokenType.PRIMARY_KEY,
 309        TokenType.RANGE,
 310        TokenType.REPLACE,
 311        TokenType.ROW,
 312        TokenType.UNNEST,
 313        TokenType.VAR,
 314        TokenType.LEFT,
 315        TokenType.RIGHT,
 316        TokenType.DATE,
 317        TokenType.DATETIME,
 318        TokenType.TABLE,
 319        TokenType.TIMESTAMP,
 320        TokenType.TIMESTAMPTZ,
 321        TokenType.WINDOW,
 322        *TYPE_TOKENS,
 323        *SUBQUERY_PREDICATES,
 324    }
 325
 326    CONJUNCTION = {
 327        TokenType.AND: exp.And,
 328        TokenType.OR: exp.Or,
 329    }
 330
 331    EQUALITY = {
 332        TokenType.EQ: exp.EQ,
 333        TokenType.NEQ: exp.NEQ,
 334        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 335    }
 336
 337    COMPARISON = {
 338        TokenType.GT: exp.GT,
 339        TokenType.GTE: exp.GTE,
 340        TokenType.LT: exp.LT,
 341        TokenType.LTE: exp.LTE,
 342    }
 343
 344    BITWISE = {
 345        TokenType.AMP: exp.BitwiseAnd,
 346        TokenType.CARET: exp.BitwiseXor,
 347        TokenType.PIPE: exp.BitwiseOr,
 348        TokenType.DPIPE: exp.DPipe,
 349    }
 350
 351    TERM = {
 352        TokenType.DASH: exp.Sub,
 353        TokenType.PLUS: exp.Add,
 354        TokenType.MOD: exp.Mod,
 355        TokenType.COLLATE: exp.Collate,
 356    }
 357
 358    FACTOR = {
 359        TokenType.DIV: exp.IntDiv,
 360        TokenType.LR_ARROW: exp.Distance,
 361        TokenType.SLASH: exp.Div,
 362        TokenType.STAR: exp.Mul,
 363    }
 364
 365    TIMESTAMPS = {
 366        TokenType.TIME,
 367        TokenType.TIMESTAMP,
 368        TokenType.TIMESTAMPTZ,
 369        TokenType.TIMESTAMPLTZ,
 370    }
 371
 372    SET_OPERATIONS = {
 373        TokenType.UNION,
 374        TokenType.INTERSECT,
 375        TokenType.EXCEPT,
 376    }
 377
 378    JOIN_SIDES = {
 379        TokenType.LEFT,
 380        TokenType.RIGHT,
 381        TokenType.FULL,
 382    }
 383
 384    JOIN_KINDS = {
 385        TokenType.INNER,
 386        TokenType.OUTER,
 387        TokenType.CROSS,
 388        TokenType.SEMI,
 389        TokenType.ANTI,
 390    }
 391
 392    LAMBDAS = {
 393        TokenType.ARROW: lambda self, expressions: self.expression(
 394            exp.Lambda,
 395            this=self._replace_lambda(
 396                self._parse_conjunction(),
 397                {node.name for node in expressions},
 398            ),
 399            expressions=expressions,
 400        ),
 401        TokenType.FARROW: lambda self, expressions: self.expression(
 402            exp.Kwarg,
 403            this=exp.Var(this=expressions[0].name),
 404            expression=self._parse_conjunction(),
 405        ),
 406    }
 407
 408    COLUMN_OPERATORS = {
 409        TokenType.DOT: None,
 410        TokenType.DCOLON: lambda self, this, to: self.expression(
 411            exp.Cast if self.STRICT_CAST else exp.TryCast,
 412            this=this,
 413            to=to,
 414        ),
 415        TokenType.ARROW: lambda self, this, path: self.expression(
 416            exp.JSONExtract,
 417            this=this,
 418            expression=path,
 419        ),
 420        TokenType.DARROW: lambda self, this, path: self.expression(
 421            exp.JSONExtractScalar,
 422            this=this,
 423            expression=path,
 424        ),
 425        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 426            exp.JSONBExtract,
 427            this=this,
 428            expression=path,
 429        ),
 430        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 431            exp.JSONBExtractScalar,
 432            this=this,
 433            expression=path,
 434        ),
 435        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 436            exp.JSONBContains,
 437            this=this,
 438            expression=key,
 439        ),
 440    }
 441
 442    EXPRESSION_PARSERS = {
 443        exp.Column: lambda self: self._parse_column(),
 444        exp.DataType: lambda self: self._parse_types(),
 445        exp.From: lambda self: self._parse_from(),
 446        exp.Group: lambda self: self._parse_group(),
 447        exp.Identifier: lambda self: self._parse_id_var(),
 448        exp.Lateral: lambda self: self._parse_lateral(),
 449        exp.Join: lambda self: self._parse_join(),
 450        exp.Order: lambda self: self._parse_order(),
 451        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"),
 452        exp.Sort: lambda self: self._parse_sort(exp.Sort, "SORT", "BY"),
 453        exp.Lambda: lambda self: self._parse_lambda(),
 454        exp.Limit: lambda self: self._parse_limit(),
 455        exp.Offset: lambda self: self._parse_offset(),
 456        exp.TableAlias: lambda self: self._parse_table_alias(),
 457        exp.Table: lambda self: self._parse_table_parts(),
 458        exp.Condition: lambda self: self._parse_conjunction(),
 459        exp.Expression: lambda self: self._parse_statement(),
 460        exp.Properties: lambda self: self._parse_properties(),
 461        exp.Where: lambda self: self._parse_where(),
 462        exp.Ordered: lambda self: self._parse_ordered(),
 463        exp.Having: lambda self: self._parse_having(),
 464        exp.With: lambda self: self._parse_with(),
 465        exp.Window: lambda self: self._parse_named_window(),
 466        exp.Qualify: lambda self: self._parse_qualify(),
 467        exp.Returning: lambda self: self._parse_returning(),
 468        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 469    }
 470
 471    STATEMENT_PARSERS = {
 472        TokenType.ALTER: lambda self: self._parse_alter(),
 473        TokenType.BEGIN: lambda self: self._parse_transaction(),
 474        TokenType.CACHE: lambda self: self._parse_cache(),
 475        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 476        TokenType.COMMENT: lambda self: self._parse_comment(),
 477        TokenType.CREATE: lambda self: self._parse_create(),
 478        TokenType.DELETE: lambda self: self._parse_delete(),
 479        TokenType.DESC: lambda self: self._parse_describe(),
 480        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 481        TokenType.DROP: lambda self: self._parse_drop(),
 482        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 483        TokenType.INSERT: lambda self: self._parse_insert(),
 484        TokenType.LOAD: lambda self: self._parse_load(),
 485        TokenType.MERGE: lambda self: self._parse_merge(),
 486        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 487        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 488        TokenType.SET: lambda self: self._parse_set(),
 489        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 490        TokenType.UPDATE: lambda self: self._parse_update(),
 491        TokenType.USE: lambda self: self.expression(
 492            exp.Use,
 493            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 494            and exp.Var(this=self._prev.text),
 495            this=self._parse_table(schema=False),
 496        ),
 497    }
 498
 499    UNARY_PARSERS = {
 500        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 501        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 502        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 503        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 504    }
 505
 506    PRIMARY_PARSERS = {
 507        TokenType.STRING: lambda self, token: self.expression(
 508            exp.Literal, this=token.text, is_string=True
 509        ),
 510        TokenType.NUMBER: lambda self, token: self.expression(
 511            exp.Literal, this=token.text, is_string=False
 512        ),
 513        TokenType.STAR: lambda self, _: self.expression(
 514            exp.Star,
 515            **{"except": self._parse_except(), "replace": self._parse_replace()},
 516        ),
 517        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 518        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 519        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 520        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 521        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 522        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 523        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 524        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 525        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 526    }
 527
 528    PLACEHOLDER_PARSERS = {
 529        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 530        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 531        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 532        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 533        else None,
 534    }
 535
 536    RANGE_PARSERS = {
 537        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 538        TokenType.GLOB: binary_range_parser(exp.Glob),
 539        TokenType.ILIKE: binary_range_parser(exp.ILike),
 540        TokenType.IN: lambda self, this: self._parse_in(this),
 541        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 542        TokenType.IS: lambda self, this: self._parse_is(this),
 543        TokenType.LIKE: binary_range_parser(exp.Like),
 544        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 545        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 546        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 547    }
 548
 549    PROPERTY_PARSERS: t.Dict[str, t.Callable] = {
 550        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 551        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 552        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 553        "CHARACTER SET": lambda self: self._parse_character_set(),
 554        "CHECKSUM": lambda self: self._parse_checksum(),
 555        "CLUSTER": lambda self: self._parse_cluster(),
 556        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 557        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 558        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 559        "DEFINER": lambda self: self._parse_definer(),
 560        "DETERMINISTIC": lambda self: self.expression(
 561            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 562        ),
 563        "DISTKEY": lambda self: self._parse_distkey(),
 564        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 565        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 566        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 567        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 568        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 569        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 570        "FREESPACE": lambda self: self._parse_freespace(),
 571        "IMMUTABLE": lambda self: self.expression(
 572            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 573        ),
 574        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 575        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 576        "LIKE": lambda self: self._parse_create_like(),
 577        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 578        "LOCK": lambda self: self._parse_locking(),
 579        "LOCKING": lambda self: self._parse_locking(),
 580        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 581        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 582        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 583        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 584        "NO": lambda self: self._parse_no_property(),
 585        "ON": lambda self: self._parse_on_property(),
 586        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 587        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 588        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 589        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 590        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 591        "RETURNS": lambda self: self._parse_returns(),
 592        "ROW": lambda self: self._parse_row(),
 593        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 594        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 595        "SETTINGS": lambda self: self.expression(
 596            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 597        ),
 598        "SORTKEY": lambda self: self._parse_sortkey(),
 599        "STABLE": lambda self: self.expression(
 600            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 601        ),
 602        "STORED": lambda self: self._parse_stored(),
 603        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 604        "TEMP": lambda self: self.expression(exp.TemporaryProperty),
 605        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty),
 606        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 607        "TTL": lambda self: self._parse_ttl(),
 608        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 609        "VOLATILE": lambda self: self._parse_volatile_property(),
 610        "WITH": lambda self: self._parse_with_property(),
 611    }
 612
 613    CONSTRAINT_PARSERS = {
 614        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 615        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 616        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 617        "CHARACTER SET": lambda self: self.expression(
 618            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 619        ),
 620        "CHECK": lambda self: self.expression(
 621            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 622        ),
 623        "COLLATE": lambda self: self.expression(
 624            exp.CollateColumnConstraint, this=self._parse_var()
 625        ),
 626        "COMMENT": lambda self: self.expression(
 627            exp.CommentColumnConstraint, this=self._parse_string()
 628        ),
 629        "COMPRESS": lambda self: self._parse_compress(),
 630        "DEFAULT": lambda self: self.expression(
 631            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 632        ),
 633        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 634        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 635        "FORMAT": lambda self: self.expression(
 636            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 637        ),
 638        "GENERATED": lambda self: self._parse_generated_as_identity(),
 639        "IDENTITY": lambda self: self._parse_auto_increment(),
 640        "INLINE": lambda self: self._parse_inline(),
 641        "LIKE": lambda self: self._parse_create_like(),
 642        "NOT": lambda self: self._parse_not_constraint(),
 643        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 644        "ON": lambda self: self._match(TokenType.UPDATE)
 645        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 646        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 647        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 648        "REFERENCES": lambda self: self._parse_references(match=False),
 649        "TITLE": lambda self: self.expression(
 650            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 651        ),
 652        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 653        "UNIQUE": lambda self: self._parse_unique(),
 654        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 655    }
 656
 657    ALTER_PARSERS = {
 658        "ADD": lambda self: self._parse_alter_table_add(),
 659        "ALTER": lambda self: self._parse_alter_table_alter(),
 660        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 661        "DROP": lambda self: self._parse_alter_table_drop(),
 662        "RENAME": lambda self: self._parse_alter_table_rename(),
 663    }
 664
 665    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 666
 667    NO_PAREN_FUNCTION_PARSERS = {
 668        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 669        TokenType.CASE: lambda self: self._parse_case(),
 670        TokenType.IF: lambda self: self._parse_if(),
 671        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 672            exp.NextValueFor,
 673            this=self._parse_column(),
 674            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 675        ),
 676    }
 677
 678    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 679        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 680        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 681        "DECODE": lambda self: self._parse_decode(),
 682        "EXTRACT": lambda self: self._parse_extract(),
 683        "JSON_OBJECT": lambda self: self._parse_json_object(),
 684        "LOG": lambda self: self._parse_logarithm(),
 685        "MATCH": lambda self: self._parse_match_against(),
 686        "OPENJSON": lambda self: self._parse_open_json(),
 687        "POSITION": lambda self: self._parse_position(),
 688        "STRING_AGG": lambda self: self._parse_string_agg(),
 689        "SUBSTRING": lambda self: self._parse_substring(),
 690        "STRUCT": lambda self: self._parse_struct(),
 691        "TRIM": lambda self: self._parse_trim(),
 692        "TRY_CAST": lambda self: self._parse_cast(False),
 693        "TRY_CONVERT": lambda self: self._parse_convert(False),
 694    }
 695
 696    QUERY_MODIFIER_PARSERS = {
 697        "joins": lambda self: list(iter(self._parse_join, None)),
 698        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 699        "match": lambda self: self._parse_match_recognize(),
 700        "where": lambda self: self._parse_where(),
 701        "group": lambda self: self._parse_group(),
 702        "having": lambda self: self._parse_having(),
 703        "qualify": lambda self: self._parse_qualify(),
 704        "windows": lambda self: self._parse_window_clause(),
 705        "order": lambda self: self._parse_order(),
 706        "limit": lambda self: self._parse_limit(),
 707        "offset": lambda self: self._parse_offset(),
 708        "locks": lambda self: self._parse_locks(),
 709        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 710    }
 711
 712    SET_PARSERS = {
 713        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 714        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 715        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 716        "TRANSACTION": lambda self: self._parse_set_transaction(),
 717    }
 718
 719    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 720
 721    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 722
 723    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 724
 725    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 726
 727    TRANSACTION_CHARACTERISTICS = {
 728        "ISOLATION LEVEL REPEATABLE READ",
 729        "ISOLATION LEVEL READ COMMITTED",
 730        "ISOLATION LEVEL READ UNCOMMITTED",
 731        "ISOLATION LEVEL SERIALIZABLE",
 732        "READ WRITE",
 733        "READ ONLY",
 734    }
 735
 736    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 737
 738    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 739
 740    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 741    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 742
 743    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 744
 745    STRICT_CAST = True
 746
 747    CONVERT_TYPE_FIRST = False
 748
 749    PREFIXED_PIVOT_COLUMNS = False
 750    IDENTIFY_PIVOT_STRINGS = False
 751
 752    LOG_BASE_FIRST = True
 753    LOG_DEFAULTS_TO_LN = False
 754
 755    __slots__ = (
 756        "error_level",
 757        "error_message_context",
 758        "sql",
 759        "errors",
 760        "index_offset",
 761        "unnest_column_only",
 762        "alias_post_tablesample",
 763        "max_errors",
 764        "null_ordering",
 765        "_tokens",
 766        "_index",
 767        "_curr",
 768        "_next",
 769        "_prev",
 770        "_prev_comments",
 771        "_show_trie",
 772        "_set_trie",
 773    )
 774
 775    def __init__(
 776        self,
 777        error_level: t.Optional[ErrorLevel] = None,
 778        error_message_context: int = 100,
 779        index_offset: int = 0,
 780        unnest_column_only: bool = False,
 781        alias_post_tablesample: bool = False,
 782        max_errors: int = 3,
 783        null_ordering: t.Optional[str] = None,
 784    ):
 785        self.error_level = error_level or ErrorLevel.IMMEDIATE
 786        self.error_message_context = error_message_context
 787        self.index_offset = index_offset
 788        self.unnest_column_only = unnest_column_only
 789        self.alias_post_tablesample = alias_post_tablesample
 790        self.max_errors = max_errors
 791        self.null_ordering = null_ordering
 792        self.reset()
 793
 794    def reset(self):
 795        self.sql = ""
 796        self.errors = []
 797        self._tokens = []
 798        self._index = 0
 799        self._curr = None
 800        self._next = None
 801        self._prev = None
 802        self._prev_comments = None
 803
 804    def parse(
 805        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 806    ) -> t.List[t.Optional[exp.Expression]]:
 807        """
 808        Parses a list of tokens and returns a list of syntax trees, one tree
 809        per parsed SQL statement.
 810
 811        Args:
 812            raw_tokens: the list of tokens.
 813            sql: the original SQL string, used to produce helpful debug messages.
 814
 815        Returns:
 816            The list of syntax trees.
 817        """
 818        return self._parse(
 819            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 820        )
 821
 822    def parse_into(
 823        self,
 824        expression_types: exp.IntoType,
 825        raw_tokens: t.List[Token],
 826        sql: t.Optional[str] = None,
 827    ) -> t.List[t.Optional[exp.Expression]]:
 828        """
 829        Parses a list of tokens into a given Expression type. If a collection of Expression
 830        types is given instead, this method will try to parse the token list into each one
 831        of them, stopping at the first for which the parsing succeeds.
 832
 833        Args:
 834            expression_types: the expression type(s) to try and parse the token list into.
 835            raw_tokens: the list of tokens.
 836            sql: the original SQL string, used to produce helpful debug messages.
 837
 838        Returns:
 839            The target Expression.
 840        """
 841        errors = []
 842        for expression_type in ensure_collection(expression_types):
 843            parser = self.EXPRESSION_PARSERS.get(expression_type)
 844            if not parser:
 845                raise TypeError(f"No parser registered for {expression_type}")
 846            try:
 847                return self._parse(parser, raw_tokens, sql)
 848            except ParseError as e:
 849                e.errors[0]["into_expression"] = expression_type
 850                errors.append(e)
 851        raise ParseError(
 852            f"Failed to parse into {expression_types}",
 853            errors=merge_errors(errors),
 854        ) from errors[-1]
 855
 856    def _parse(
 857        self,
 858        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 859        raw_tokens: t.List[Token],
 860        sql: t.Optional[str] = None,
 861    ) -> t.List[t.Optional[exp.Expression]]:
 862        self.reset()
 863        self.sql = sql or ""
 864        total = len(raw_tokens)
 865        chunks: t.List[t.List[Token]] = [[]]
 866
 867        for i, token in enumerate(raw_tokens):
 868            if token.token_type == TokenType.SEMICOLON:
 869                if i < total - 1:
 870                    chunks.append([])
 871            else:
 872                chunks[-1].append(token)
 873
 874        expressions = []
 875
 876        for tokens in chunks:
 877            self._index = -1
 878            self._tokens = tokens
 879            self._advance()
 880
 881            expressions.append(parse_method(self))
 882
 883            if self._index < len(self._tokens):
 884                self.raise_error("Invalid expression / Unexpected token")
 885
 886            self.check_errors()
 887
 888        return expressions
 889
 890    def check_errors(self) -> None:
 891        """
 892        Logs or raises any found errors, depending on the chosen error level setting.
 893        """
 894        if self.error_level == ErrorLevel.WARN:
 895            for error in self.errors:
 896                logger.error(str(error))
 897        elif self.error_level == ErrorLevel.RAISE and self.errors:
 898            raise ParseError(
 899                concat_messages(self.errors, self.max_errors),
 900                errors=merge_errors(self.errors),
 901            )
 902
 903    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 904        """
 905        Appends an error in the list of recorded errors or raises it, depending on the chosen
 906        error level setting.
 907        """
 908        token = token or self._curr or self._prev or Token.string("")
 909        start = token.start
 910        end = token.end + 1
 911        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 912        highlight = self.sql[start:end]
 913        end_context = self.sql[end : end + self.error_message_context]
 914
 915        error = ParseError.new(
 916            f"{message}. Line {token.line}, Col: {token.col}.\n"
 917            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 918            description=message,
 919            line=token.line,
 920            col=token.col,
 921            start_context=start_context,
 922            highlight=highlight,
 923            end_context=end_context,
 924        )
 925
 926        if self.error_level == ErrorLevel.IMMEDIATE:
 927            raise error
 928
 929        self.errors.append(error)
 930
 931    def expression(
 932        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 933    ) -> E:
 934        """
 935        Creates a new, validated Expression.
 936
 937        Args:
 938            exp_class: the expression class to instantiate.
 939            comments: an optional list of comments to attach to the expression.
 940            kwargs: the arguments to set for the expression along with their respective values.
 941
 942        Returns:
 943            The target expression.
 944        """
 945        instance = exp_class(**kwargs)
 946        instance.add_comments(comments) if comments else self._add_comments(instance)
 947        self.validate_expression(instance)
 948        return instance
 949
 950    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 951        if expression and self._prev_comments:
 952            expression.add_comments(self._prev_comments)
 953            self._prev_comments = None
 954
 955    def validate_expression(
 956        self, expression: exp.Expression, args: t.Optional[t.List] = None
 957    ) -> None:
 958        """
 959        Validates an already instantiated expression, making sure that all its mandatory arguments
 960        are set.
 961
 962        Args:
 963            expression: the expression to validate.
 964            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 965        """
 966        if self.error_level == ErrorLevel.IGNORE:
 967            return
 968
 969        for error_message in expression.error_messages(args):
 970            self.raise_error(error_message)
 971
 972    def _find_sql(self, start: Token, end: Token) -> str:
 973        return self.sql[start.start : end.end + 1]
 974
 975    def _advance(self, times: int = 1) -> None:
 976        self._index += times
 977        self._curr = seq_get(self._tokens, self._index)
 978        self._next = seq_get(self._tokens, self._index + 1)
 979        if self._index > 0:
 980            self._prev = self._tokens[self._index - 1]
 981            self._prev_comments = self._prev.comments
 982        else:
 983            self._prev = None
 984            self._prev_comments = None
 985
 986    def _retreat(self, index: int) -> None:
 987        if index != self._index:
 988            self._advance(index - self._index)
 989
 990    def _parse_command(self) -> exp.Command:
 991        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
 992
 993    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
 994        start = self._prev
 995        exists = self._parse_exists() if allow_exists else None
 996
 997        self._match(TokenType.ON)
 998
 999        kind = self._match_set(self.CREATABLES) and self._prev
1000
1001        if not kind:
1002            return self._parse_as_command(start)
1003
1004        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1005            this = self._parse_user_defined_function(kind=kind.token_type)
1006        elif kind.token_type == TokenType.TABLE:
1007            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1008        elif kind.token_type == TokenType.COLUMN:
1009            this = self._parse_column()
1010        else:
1011            this = self._parse_id_var()
1012
1013        self._match(TokenType.IS)
1014
1015        return self.expression(
1016            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1017        )
1018
1019    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1020    def _parse_ttl(self) -> exp.Expression:
1021        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1022            this = self._parse_bitwise()
1023
1024            if self._match_text_seq("DELETE"):
1025                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1026            if self._match_text_seq("RECOMPRESS"):
1027                return self.expression(
1028                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1029                )
1030            if self._match_text_seq("TO", "DISK"):
1031                return self.expression(
1032                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1033                )
1034            if self._match_text_seq("TO", "VOLUME"):
1035                return self.expression(
1036                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1037                )
1038
1039            return this
1040
1041        expressions = self._parse_csv(_parse_ttl_action)
1042        where = self._parse_where()
1043        group = self._parse_group()
1044
1045        aggregates = None
1046        if group and self._match(TokenType.SET):
1047            aggregates = self._parse_csv(self._parse_set_item)
1048
1049        return self.expression(
1050            exp.MergeTreeTTL,
1051            expressions=expressions,
1052            where=where,
1053            group=group,
1054            aggregates=aggregates,
1055        )
1056
1057    def _parse_statement(self) -> t.Optional[exp.Expression]:
1058        if self._curr is None:
1059            return None
1060
1061        if self._match_set(self.STATEMENT_PARSERS):
1062            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1063
1064        if self._match_set(Tokenizer.COMMANDS):
1065            return self._parse_command()
1066
1067        expression = self._parse_expression()
1068        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1069        return self._parse_query_modifiers(expression)
1070
1071    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1072        start = self._prev
1073        temporary = self._match(TokenType.TEMPORARY)
1074        materialized = self._match_text_seq("MATERIALIZED")
1075        kind = self._match_set(self.CREATABLES) and self._prev.text
1076        if not kind:
1077            return self._parse_as_command(start)
1078
1079        return self.expression(
1080            exp.Drop,
1081            exists=self._parse_exists(),
1082            this=self._parse_table(schema=True),
1083            kind=kind,
1084            temporary=temporary,
1085            materialized=materialized,
1086            cascade=self._match_text_seq("CASCADE"),
1087            constraints=self._match_text_seq("CONSTRAINTS"),
1088            purge=self._match_text_seq("PURGE"),
1089        )
1090
1091    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1092        return (
1093            self._match(TokenType.IF)
1094            and (not not_ or self._match(TokenType.NOT))
1095            and self._match(TokenType.EXISTS)
1096        )
1097
1098    def _parse_create(self) -> t.Optional[exp.Expression]:
1099        start = self._prev
1100        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1101            TokenType.OR, TokenType.REPLACE
1102        )
1103        unique = self._match(TokenType.UNIQUE)
1104
1105        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1106            self._match(TokenType.TABLE)
1107
1108        properties = None
1109        create_token = self._match_set(self.CREATABLES) and self._prev
1110
1111        if not create_token:
1112            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1113            create_token = self._match_set(self.CREATABLES) and self._prev
1114
1115            if not properties or not create_token:
1116                return self._parse_as_command(start)
1117
1118        exists = self._parse_exists(not_=True)
1119        this = None
1120        expression = None
1121        indexes = None
1122        no_schema_binding = None
1123        begin = None
1124        clone = None
1125
1126        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1127            this = self._parse_user_defined_function(kind=create_token.token_type)
1128            temp_properties = self._parse_properties()
1129            if properties and temp_properties:
1130                properties.expressions.extend(temp_properties.expressions)
1131            elif temp_properties:
1132                properties = temp_properties
1133
1134            self._match(TokenType.ALIAS)
1135            begin = self._match(TokenType.BEGIN)
1136            return_ = self._match_text_seq("RETURN")
1137            expression = self._parse_statement()
1138
1139            if return_:
1140                expression = self.expression(exp.Return, this=expression)
1141        elif create_token.token_type == TokenType.INDEX:
1142            this = self._parse_index()
1143        elif create_token.token_type in self.DB_CREATABLES:
1144            table_parts = self._parse_table_parts(schema=True)
1145
1146            # exp.Properties.Location.POST_NAME
1147            if self._match(TokenType.COMMA):
1148                temp_properties = self._parse_properties(before=True)
1149                if properties and temp_properties:
1150                    properties.expressions.extend(temp_properties.expressions)
1151                elif temp_properties:
1152                    properties = temp_properties
1153
1154            this = self._parse_schema(this=table_parts)
1155
1156            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1157            temp_properties = self._parse_properties()
1158            if properties and temp_properties:
1159                properties.expressions.extend(temp_properties.expressions)
1160            elif temp_properties:
1161                properties = temp_properties
1162
1163            self._match(TokenType.ALIAS)
1164
1165            # exp.Properties.Location.POST_ALIAS
1166            if not (
1167                self._match(TokenType.SELECT, advance=False)
1168                or self._match(TokenType.WITH, advance=False)
1169                or self._match(TokenType.L_PAREN, advance=False)
1170            ):
1171                temp_properties = self._parse_properties()
1172                if properties and temp_properties:
1173                    properties.expressions.extend(temp_properties.expressions)
1174                elif temp_properties:
1175                    properties = temp_properties
1176
1177            expression = self._parse_ddl_select()
1178
1179            if create_token.token_type == TokenType.TABLE:
1180                indexes = []
1181                while True:
1182                    index = self._parse_create_table_index()
1183
1184                    # exp.Properties.Location.POST_EXPRESSION or exp.Properties.Location.POST_INDEX
1185                    temp_properties = self._parse_properties()
1186                    if properties and temp_properties:
1187                        properties.expressions.extend(temp_properties.expressions)
1188                    elif temp_properties:
1189                        properties = temp_properties
1190
1191                    if not index:
1192                        break
1193                    else:
1194                        self._match(TokenType.COMMA)
1195                        indexes.append(index)
1196            elif create_token.token_type == TokenType.VIEW:
1197                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1198                    no_schema_binding = True
1199
1200            if self._match_text_seq("CLONE"):
1201                clone = self._parse_table(schema=True)
1202                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1203                clone_kind = (
1204                    self._match(TokenType.L_PAREN)
1205                    and self._match_texts(self.CLONE_KINDS)
1206                    and self._prev.text.upper()
1207                )
1208                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1209                self._match(TokenType.R_PAREN)
1210                clone = self.expression(
1211                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1212                )
1213
1214        return self.expression(
1215            exp.Create,
1216            this=this,
1217            kind=create_token.text,
1218            replace=replace,
1219            unique=unique,
1220            expression=expression,
1221            exists=exists,
1222            properties=properties,
1223            indexes=indexes,
1224            no_schema_binding=no_schema_binding,
1225            begin=begin,
1226            clone=clone,
1227        )
1228
1229    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1230        # only used for teradata currently
1231        self._match(TokenType.COMMA)
1232
1233        kwargs = {
1234            "no": self._match_text_seq("NO"),
1235            "dual": self._match_text_seq("DUAL"),
1236            "before": self._match_text_seq("BEFORE"),
1237            "default": self._match_text_seq("DEFAULT"),
1238            "local": (self._match_text_seq("LOCAL") and "LOCAL")
1239            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
1240            "after": self._match_text_seq("AFTER"),
1241            "minimum": self._match_texts(("MIN", "MINIMUM")),
1242            "maximum": self._match_texts(("MAX", "MAXIMUM")),
1243        }
1244
1245        if self._match_texts(self.PROPERTY_PARSERS):
1246            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
1247            try:
1248                return parser(self, **{k: v for k, v in kwargs.items() if v})
1249            except TypeError:
1250                self.raise_error(f"Cannot parse property '{self._prev.text}'")
1251
1252        return None
1253
1254    def _parse_property(self) -> t.Optional[exp.Expression]:
1255        if self._match_texts(self.PROPERTY_PARSERS):
1256            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1257
1258        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1259            return self._parse_character_set(default=True)
1260
1261        if self._match_text_seq("COMPOUND", "SORTKEY"):
1262            return self._parse_sortkey(compound=True)
1263
1264        if self._match_text_seq("SQL", "SECURITY"):
1265            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1266
1267        assignment = self._match_pair(
1268            TokenType.VAR, TokenType.EQ, advance=False
1269        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1270
1271        if assignment:
1272            key = self._parse_var_or_string()
1273            self._match(TokenType.EQ)
1274            return self.expression(exp.Property, this=key, value=self._parse_column())
1275
1276        return None
1277
1278    def _parse_stored(self) -> exp.Expression:
1279        self._match(TokenType.ALIAS)
1280
1281        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1282        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1283
1284        return self.expression(
1285            exp.FileFormatProperty,
1286            this=self.expression(
1287                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1288            )
1289            if input_format or output_format
1290            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1291        )
1292
1293    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1294        self._match(TokenType.EQ)
1295        self._match(TokenType.ALIAS)
1296        return self.expression(exp_class, this=self._parse_field())
1297
1298    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1299        properties = []
1300
1301        while True:
1302            if before:
1303                prop = self._parse_property_before()
1304            else:
1305                prop = self._parse_property()
1306
1307            if not prop:
1308                break
1309            for p in ensure_list(prop):
1310                properties.append(p)
1311
1312        if properties:
1313            return self.expression(exp.Properties, expressions=properties)
1314
1315        return None
1316
1317    def _parse_fallback(self, no: bool = False) -> exp.Expression:
1318        return self.expression(
1319            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1320        )
1321
1322    def _parse_volatile_property(self) -> exp.Expression:
1323        if self._index >= 2:
1324            pre_volatile_token = self._tokens[self._index - 2]
1325        else:
1326            pre_volatile_token = None
1327
1328        if pre_volatile_token and pre_volatile_token.token_type in (
1329            TokenType.CREATE,
1330            TokenType.REPLACE,
1331            TokenType.UNIQUE,
1332        ):
1333            return exp.VolatileProperty()
1334
1335        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1336
1337    def _parse_with_property(
1338        self,
1339    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1340        self._match(TokenType.WITH)
1341        if self._match(TokenType.L_PAREN, advance=False):
1342            return self._parse_wrapped_csv(self._parse_property)
1343
1344        if self._match_text_seq("JOURNAL"):
1345            return self._parse_withjournaltable()
1346
1347        if self._match_text_seq("DATA"):
1348            return self._parse_withdata(no=False)
1349        elif self._match_text_seq("NO", "DATA"):
1350            return self._parse_withdata(no=True)
1351
1352        if not self._next:
1353            return None
1354
1355        return self._parse_withisolatedloading()
1356
1357    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1358    def _parse_definer(self) -> t.Optional[exp.Expression]:
1359        self._match(TokenType.EQ)
1360
1361        user = self._parse_id_var()
1362        self._match(TokenType.PARAMETER)
1363        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1364
1365        if not user or not host:
1366            return None
1367
1368        return exp.DefinerProperty(this=f"{user}@{host}")
1369
1370    def _parse_withjournaltable(self) -> exp.Expression:
1371        self._match(TokenType.TABLE)
1372        self._match(TokenType.EQ)
1373        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1374
1375    def _parse_log(self, no: bool = False) -> exp.Expression:
1376        return self.expression(exp.LogProperty, no=no)
1377
1378    def _parse_journal(self, **kwargs) -> exp.Expression:
1379        return self.expression(exp.JournalProperty, **kwargs)
1380
1381    def _parse_checksum(self) -> exp.Expression:
1382        self._match(TokenType.EQ)
1383
1384        on = None
1385        if self._match(TokenType.ON):
1386            on = True
1387        elif self._match_text_seq("OFF"):
1388            on = False
1389        default = self._match(TokenType.DEFAULT)
1390
1391        return self.expression(
1392            exp.ChecksumProperty,
1393            on=on,
1394            default=default,
1395        )
1396
1397    def _parse_cluster(self) -> t.Optional[exp.Expression]:
1398        if not self._match_text_seq("BY"):
1399            self._retreat(self._index - 1)
1400            return None
1401        return self.expression(
1402            exp.Cluster,
1403            expressions=self._parse_csv(self._parse_ordered),
1404        )
1405
1406    def _parse_freespace(self) -> exp.Expression:
1407        self._match(TokenType.EQ)
1408        return self.expression(
1409            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1410        )
1411
1412    def _parse_mergeblockratio(self, no: bool = False, default: bool = False) -> exp.Expression:
1413        if self._match(TokenType.EQ):
1414            return self.expression(
1415                exp.MergeBlockRatioProperty,
1416                this=self._parse_number(),
1417                percent=self._match(TokenType.PERCENT),
1418            )
1419        return self.expression(
1420            exp.MergeBlockRatioProperty,
1421            no=no,
1422            default=default,
1423        )
1424
1425    def _parse_datablocksize(
1426        self,
1427        default: t.Optional[bool] = None,
1428        minimum: t.Optional[bool] = None,
1429        maximum: t.Optional[bool] = None,
1430    ) -> exp.Expression:
1431        self._match(TokenType.EQ)
1432        size = self._parse_number()
1433        units = None
1434        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1435            units = self._prev.text
1436        return self.expression(
1437            exp.DataBlocksizeProperty,
1438            size=size,
1439            units=units,
1440            default=default,
1441            minimum=minimum,
1442            maximum=maximum,
1443        )
1444
1445    def _parse_blockcompression(self) -> exp.Expression:
1446        self._match(TokenType.EQ)
1447        always = self._match_text_seq("ALWAYS")
1448        manual = self._match_text_seq("MANUAL")
1449        never = self._match_text_seq("NEVER")
1450        default = self._match_text_seq("DEFAULT")
1451        autotemp = None
1452        if self._match_text_seq("AUTOTEMP"):
1453            autotemp = self._parse_schema()
1454
1455        return self.expression(
1456            exp.BlockCompressionProperty,
1457            always=always,
1458            manual=manual,
1459            never=never,
1460            default=default,
1461            autotemp=autotemp,
1462        )
1463
1464    def _parse_withisolatedloading(self) -> exp.Expression:
1465        no = self._match_text_seq("NO")
1466        concurrent = self._match_text_seq("CONCURRENT")
1467        self._match_text_seq("ISOLATED", "LOADING")
1468        for_all = self._match_text_seq("FOR", "ALL")
1469        for_insert = self._match_text_seq("FOR", "INSERT")
1470        for_none = self._match_text_seq("FOR", "NONE")
1471        return self.expression(
1472            exp.IsolatedLoadingProperty,
1473            no=no,
1474            concurrent=concurrent,
1475            for_all=for_all,
1476            for_insert=for_insert,
1477            for_none=for_none,
1478        )
1479
1480    def _parse_locking(self) -> exp.Expression:
1481        if self._match(TokenType.TABLE):
1482            kind = "TABLE"
1483        elif self._match(TokenType.VIEW):
1484            kind = "VIEW"
1485        elif self._match(TokenType.ROW):
1486            kind = "ROW"
1487        elif self._match_text_seq("DATABASE"):
1488            kind = "DATABASE"
1489        else:
1490            kind = None
1491
1492        if kind in ("DATABASE", "TABLE", "VIEW"):
1493            this = self._parse_table_parts()
1494        else:
1495            this = None
1496
1497        if self._match(TokenType.FOR):
1498            for_or_in = "FOR"
1499        elif self._match(TokenType.IN):
1500            for_or_in = "IN"
1501        else:
1502            for_or_in = None
1503
1504        if self._match_text_seq("ACCESS"):
1505            lock_type = "ACCESS"
1506        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1507            lock_type = "EXCLUSIVE"
1508        elif self._match_text_seq("SHARE"):
1509            lock_type = "SHARE"
1510        elif self._match_text_seq("READ"):
1511            lock_type = "READ"
1512        elif self._match_text_seq("WRITE"):
1513            lock_type = "WRITE"
1514        elif self._match_text_seq("CHECKSUM"):
1515            lock_type = "CHECKSUM"
1516        else:
1517            lock_type = None
1518
1519        override = self._match_text_seq("OVERRIDE")
1520
1521        return self.expression(
1522            exp.LockingProperty,
1523            this=this,
1524            kind=kind,
1525            for_or_in=for_or_in,
1526            lock_type=lock_type,
1527            override=override,
1528        )
1529
1530    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1531        if self._match(TokenType.PARTITION_BY):
1532            return self._parse_csv(self._parse_conjunction)
1533        return []
1534
1535    def _parse_partitioned_by(self) -> exp.Expression:
1536        self._match(TokenType.EQ)
1537        return self.expression(
1538            exp.PartitionedByProperty,
1539            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1540        )
1541
1542    def _parse_withdata(self, no=False) -> exp.Expression:
1543        if self._match_text_seq("AND", "STATISTICS"):
1544            statistics = True
1545        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1546            statistics = False
1547        else:
1548            statistics = None
1549
1550        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1551
1552    def _parse_no_property(self) -> t.Optional[exp.Property]:
1553        if self._match_text_seq("PRIMARY", "INDEX"):
1554            return exp.NoPrimaryIndexProperty()
1555        return None
1556
1557    def _parse_on_property(self) -> t.Optional[exp.Property]:
1558        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
1559            return exp.OnCommitProperty()
1560        elif self._match_text_seq("COMMIT", "DELETE", "ROWS"):
1561            return exp.OnCommitProperty(delete=True)
1562        return None
1563
1564    def _parse_distkey(self) -> exp.Expression:
1565        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1566
1567    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1568        table = self._parse_table(schema=True)
1569        options = []
1570        while self._match_texts(("INCLUDING", "EXCLUDING")):
1571            this = self._prev.text.upper()
1572            id_var = self._parse_id_var()
1573
1574            if not id_var:
1575                return None
1576
1577            options.append(
1578                self.expression(
1579                    exp.Property,
1580                    this=this,
1581                    value=exp.Var(this=id_var.this.upper()),
1582                )
1583            )
1584        return self.expression(exp.LikeProperty, this=table, expressions=options)
1585
1586    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1587        return self.expression(
1588            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1589        )
1590
1591    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1592        self._match(TokenType.EQ)
1593        return self.expression(
1594            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1595        )
1596
1597    def _parse_returns(self) -> exp.Expression:
1598        value: t.Optional[exp.Expression]
1599        is_table = self._match(TokenType.TABLE)
1600
1601        if is_table:
1602            if self._match(TokenType.LT):
1603                value = self.expression(
1604                    exp.Schema,
1605                    this="TABLE",
1606                    expressions=self._parse_csv(self._parse_struct_types),
1607                )
1608                if not self._match(TokenType.GT):
1609                    self.raise_error("Expecting >")
1610            else:
1611                value = self._parse_schema(exp.Var(this="TABLE"))
1612        else:
1613            value = self._parse_types()
1614
1615        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1616
1617    def _parse_describe(self) -> exp.Expression:
1618        kind = self._match_set(self.CREATABLES) and self._prev.text
1619        this = self._parse_table()
1620
1621        return self.expression(exp.Describe, this=this, kind=kind)
1622
1623    def _parse_insert(self) -> exp.Expression:
1624        overwrite = self._match(TokenType.OVERWRITE)
1625        local = self._match_text_seq("LOCAL")
1626        alternative = None
1627
1628        if self._match_text_seq("DIRECTORY"):
1629            this: t.Optional[exp.Expression] = self.expression(
1630                exp.Directory,
1631                this=self._parse_var_or_string(),
1632                local=local,
1633                row_format=self._parse_row_format(match_row=True),
1634            )
1635        else:
1636            if self._match(TokenType.OR):
1637                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1638
1639            self._match(TokenType.INTO)
1640            self._match(TokenType.TABLE)
1641            this = self._parse_table(schema=True)
1642
1643        return self.expression(
1644            exp.Insert,
1645            this=this,
1646            exists=self._parse_exists(),
1647            partition=self._parse_partition(),
1648            expression=self._parse_ddl_select(),
1649            conflict=self._parse_on_conflict(),
1650            returning=self._parse_returning(),
1651            overwrite=overwrite,
1652            alternative=alternative,
1653        )
1654
1655    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1656        conflict = self._match_text_seq("ON", "CONFLICT")
1657        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1658
1659        if not (conflict or duplicate):
1660            return None
1661
1662        nothing = None
1663        expressions = None
1664        key = None
1665        constraint = None
1666
1667        if conflict:
1668            if self._match_text_seq("ON", "CONSTRAINT"):
1669                constraint = self._parse_id_var()
1670            else:
1671                key = self._parse_csv(self._parse_value)
1672
1673        self._match_text_seq("DO")
1674        if self._match_text_seq("NOTHING"):
1675            nothing = True
1676        else:
1677            self._match(TokenType.UPDATE)
1678            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1679
1680        return self.expression(
1681            exp.OnConflict,
1682            duplicate=duplicate,
1683            expressions=expressions,
1684            nothing=nothing,
1685            key=key,
1686            constraint=constraint,
1687        )
1688
1689    def _parse_returning(self) -> t.Optional[exp.Expression]:
1690        if not self._match(TokenType.RETURNING):
1691            return None
1692
1693        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1694
1695    def _parse_row(self) -> t.Optional[exp.Expression]:
1696        if not self._match(TokenType.FORMAT):
1697            return None
1698        return self._parse_row_format()
1699
1700    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1701        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1702            return None
1703
1704        if self._match_text_seq("SERDE"):
1705            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1706
1707        self._match_text_seq("DELIMITED")
1708
1709        kwargs = {}
1710
1711        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1712            kwargs["fields"] = self._parse_string()
1713            if self._match_text_seq("ESCAPED", "BY"):
1714                kwargs["escaped"] = self._parse_string()
1715        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1716            kwargs["collection_items"] = self._parse_string()
1717        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1718            kwargs["map_keys"] = self._parse_string()
1719        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1720            kwargs["lines"] = self._parse_string()
1721        if self._match_text_seq("NULL", "DEFINED", "AS"):
1722            kwargs["null"] = self._parse_string()
1723
1724        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1725
1726    def _parse_load(self) -> exp.Expression:
1727        if self._match_text_seq("DATA"):
1728            local = self._match_text_seq("LOCAL")
1729            self._match_text_seq("INPATH")
1730            inpath = self._parse_string()
1731            overwrite = self._match(TokenType.OVERWRITE)
1732            self._match_pair(TokenType.INTO, TokenType.TABLE)
1733
1734            return self.expression(
1735                exp.LoadData,
1736                this=self._parse_table(schema=True),
1737                local=local,
1738                overwrite=overwrite,
1739                inpath=inpath,
1740                partition=self._parse_partition(),
1741                input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1742                serde=self._match_text_seq("SERDE") and self._parse_string(),
1743            )
1744        return self._parse_as_command(self._prev)
1745
1746    def _parse_delete(self) -> exp.Expression:
1747        self._match(TokenType.FROM)
1748
1749        return self.expression(
1750            exp.Delete,
1751            this=self._parse_table(),
1752            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1753            where=self._parse_where(),
1754            returning=self._parse_returning(),
1755        )
1756
1757    def _parse_update(self) -> exp.Expression:
1758        return self.expression(
1759            exp.Update,
1760            **{  # type: ignore
1761                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1762                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1763                "from": self._parse_from(modifiers=True),
1764                "where": self._parse_where(),
1765                "returning": self._parse_returning(),
1766            },
1767        )
1768
1769    def _parse_uncache(self) -> exp.Expression:
1770        if not self._match(TokenType.TABLE):
1771            self.raise_error("Expecting TABLE after UNCACHE")
1772
1773        return self.expression(
1774            exp.Uncache,
1775            exists=self._parse_exists(),
1776            this=self._parse_table(schema=True),
1777        )
1778
1779    def _parse_cache(self) -> exp.Expression:
1780        lazy = self._match_text_seq("LAZY")
1781        self._match(TokenType.TABLE)
1782        table = self._parse_table(schema=True)
1783        options = []
1784
1785        if self._match_text_seq("OPTIONS"):
1786            self._match_l_paren()
1787            k = self._parse_string()
1788            self._match(TokenType.EQ)
1789            v = self._parse_string()
1790            options = [k, v]
1791            self._match_r_paren()
1792
1793        self._match(TokenType.ALIAS)
1794        return self.expression(
1795            exp.Cache,
1796            this=table,
1797            lazy=lazy,
1798            options=options,
1799            expression=self._parse_select(nested=True),
1800        )
1801
1802    def _parse_partition(self) -> t.Optional[exp.Expression]:
1803        if not self._match(TokenType.PARTITION):
1804            return None
1805
1806        return self.expression(
1807            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1808        )
1809
1810    def _parse_value(self) -> exp.Expression:
1811        if self._match(TokenType.L_PAREN):
1812            expressions = self._parse_csv(self._parse_conjunction)
1813            self._match_r_paren()
1814            return self.expression(exp.Tuple, expressions=expressions)
1815
1816        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1817        # Source: https://prestodb.io/docs/current/sql/values.html
1818        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1819
1820    def _parse_select(
1821        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1822    ) -> t.Optional[exp.Expression]:
1823        cte = self._parse_with()
1824        if cte:
1825            this = self._parse_statement()
1826
1827            if not this:
1828                self.raise_error("Failed to parse any statement following CTE")
1829                return cte
1830
1831            if "with" in this.arg_types:
1832                this.set("with", cte)
1833            else:
1834                self.raise_error(f"{this.key} does not support CTE")
1835                this = cte
1836        elif self._match(TokenType.SELECT):
1837            comments = self._prev_comments
1838
1839            hint = self._parse_hint()
1840            all_ = self._match(TokenType.ALL)
1841            distinct = self._match(TokenType.DISTINCT)
1842
1843            kind = (
1844                self._match(TokenType.ALIAS)
1845                and self._match_texts(("STRUCT", "VALUE"))
1846                and self._prev.text
1847            )
1848
1849            if distinct:
1850                distinct = self.expression(
1851                    exp.Distinct,
1852                    on=self._parse_value() if self._match(TokenType.ON) else None,
1853                )
1854
1855            if all_ and distinct:
1856                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1857
1858            limit = self._parse_limit(top=True)
1859            expressions = self._parse_csv(self._parse_expression)
1860
1861            this = self.expression(
1862                exp.Select,
1863                kind=kind,
1864                hint=hint,
1865                distinct=distinct,
1866                expressions=expressions,
1867                limit=limit,
1868            )
1869            this.comments = comments
1870
1871            into = self._parse_into()
1872            if into:
1873                this.set("into", into)
1874
1875            from_ = self._parse_from()
1876            if from_:
1877                this.set("from", from_)
1878
1879            this = self._parse_query_modifiers(this)
1880        elif (table or nested) and self._match(TokenType.L_PAREN):
1881            this = self._parse_table() if table else self._parse_select(nested=True)
1882            this = self._parse_set_operations(self._parse_query_modifiers(this))
1883            self._match_r_paren()
1884
1885            # early return so that subquery unions aren't parsed again
1886            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1887            # Union ALL should be a property of the top select node, not the subquery
1888            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1889        elif self._match(TokenType.VALUES):
1890            this = self.expression(
1891                exp.Values,
1892                expressions=self._parse_csv(self._parse_value),
1893                alias=self._parse_table_alias(),
1894            )
1895        else:
1896            this = None
1897
1898        return self._parse_set_operations(this)
1899
1900    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1901        if not skip_with_token and not self._match(TokenType.WITH):
1902            return None
1903
1904        comments = self._prev_comments
1905        recursive = self._match(TokenType.RECURSIVE)
1906
1907        expressions = []
1908        while True:
1909            expressions.append(self._parse_cte())
1910
1911            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1912                break
1913            else:
1914                self._match(TokenType.WITH)
1915
1916        return self.expression(
1917            exp.With, comments=comments, expressions=expressions, recursive=recursive
1918        )
1919
1920    def _parse_cte(self) -> exp.Expression:
1921        alias = self._parse_table_alias()
1922        if not alias or not alias.this:
1923            self.raise_error("Expected CTE to have alias")
1924
1925        self._match(TokenType.ALIAS)
1926
1927        return self.expression(
1928            exp.CTE,
1929            this=self._parse_wrapped(self._parse_statement),
1930            alias=alias,
1931        )
1932
1933    def _parse_table_alias(
1934        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1935    ) -> t.Optional[exp.Expression]:
1936        any_token = self._match(TokenType.ALIAS)
1937        alias = (
1938            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1939            or self._parse_string_as_identifier()
1940        )
1941
1942        index = self._index
1943        if self._match(TokenType.L_PAREN):
1944            columns = self._parse_csv(self._parse_function_parameter)
1945            self._match_r_paren() if columns else self._retreat(index)
1946        else:
1947            columns = None
1948
1949        if not alias and not columns:
1950            return None
1951
1952        return self.expression(exp.TableAlias, this=alias, columns=columns)
1953
1954    def _parse_subquery(
1955        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1956    ) -> exp.Expression:
1957        return self.expression(
1958            exp.Subquery,
1959            this=this,
1960            pivots=self._parse_pivots(),
1961            alias=self._parse_table_alias() if parse_alias else None,
1962        )
1963
1964    def _parse_query_modifiers(
1965        self, this: t.Optional[exp.Expression]
1966    ) -> t.Optional[exp.Expression]:
1967        if isinstance(this, self.MODIFIABLES):
1968            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
1969                expression = parser(self)
1970
1971                if expression:
1972                    this.set(key, expression)
1973        return this
1974
1975    def _parse_hint(self) -> t.Optional[exp.Expression]:
1976        if self._match(TokenType.HINT):
1977            hints = self._parse_csv(self._parse_function)
1978            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
1979                self.raise_error("Expected */ after HINT")
1980            return self.expression(exp.Hint, expressions=hints)
1981
1982        return None
1983
1984    def _parse_into(self) -> t.Optional[exp.Expression]:
1985        if not self._match(TokenType.INTO):
1986            return None
1987
1988        temp = self._match(TokenType.TEMPORARY)
1989        unlogged = self._match_text_seq("UNLOGGED")
1990        self._match(TokenType.TABLE)
1991
1992        return self.expression(
1993            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
1994        )
1995
1996    def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]:
1997        if not self._match(TokenType.FROM):
1998            return None
1999
2000        comments = self._prev_comments
2001        this = self._parse_table()
2002
2003        return self.expression(
2004            exp.From,
2005            comments=comments,
2006            this=self._parse_query_modifiers(this) if modifiers else this,
2007        )
2008
2009    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2010        if not self._match(TokenType.MATCH_RECOGNIZE):
2011            return None
2012
2013        self._match_l_paren()
2014
2015        partition = self._parse_partition_by()
2016        order = self._parse_order()
2017        measures = (
2018            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2019        )
2020
2021        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2022            rows = exp.Var(this="ONE ROW PER MATCH")
2023        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2024            text = "ALL ROWS PER MATCH"
2025            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2026                text += f" SHOW EMPTY MATCHES"
2027            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2028                text += f" OMIT EMPTY MATCHES"
2029            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2030                text += f" WITH UNMATCHED ROWS"
2031            rows = exp.Var(this=text)
2032        else:
2033            rows = None
2034
2035        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2036            text = "AFTER MATCH SKIP"
2037            if self._match_text_seq("PAST", "LAST", "ROW"):
2038                text += f" PAST LAST ROW"
2039            elif self._match_text_seq("TO", "NEXT", "ROW"):
2040                text += f" TO NEXT ROW"
2041            elif self._match_text_seq("TO", "FIRST"):
2042                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2043            elif self._match_text_seq("TO", "LAST"):
2044                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2045            after = exp.Var(this=text)
2046        else:
2047            after = None
2048
2049        if self._match_text_seq("PATTERN"):
2050            self._match_l_paren()
2051
2052            if not self._curr:
2053                self.raise_error("Expecting )", self._curr)
2054
2055            paren = 1
2056            start = self._curr
2057
2058            while self._curr and paren > 0:
2059                if self._curr.token_type == TokenType.L_PAREN:
2060                    paren += 1
2061                if self._curr.token_type == TokenType.R_PAREN:
2062                    paren -= 1
2063                end = self._prev
2064                self._advance()
2065            if paren > 0:
2066                self.raise_error("Expecting )", self._curr)
2067            pattern = exp.Var(this=self._find_sql(start, end))
2068        else:
2069            pattern = None
2070
2071        define = (
2072            self._parse_csv(
2073                lambda: self.expression(
2074                    exp.Alias,
2075                    alias=self._parse_id_var(any_token=True),
2076                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2077                )
2078            )
2079            if self._match_text_seq("DEFINE")
2080            else None
2081        )
2082
2083        self._match_r_paren()
2084
2085        return self.expression(
2086            exp.MatchRecognize,
2087            partition_by=partition,
2088            order=order,
2089            measures=measures,
2090            rows=rows,
2091            after=after,
2092            pattern=pattern,
2093            define=define,
2094            alias=self._parse_table_alias(),
2095        )
2096
2097    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2098        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2099        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2100
2101        if outer_apply or cross_apply:
2102            this = self._parse_select(table=True)
2103            view = None
2104            outer = not cross_apply
2105        elif self._match(TokenType.LATERAL):
2106            this = self._parse_select(table=True)
2107            view = self._match(TokenType.VIEW)
2108            outer = self._match(TokenType.OUTER)
2109        else:
2110            return None
2111
2112        if not this:
2113            this = self._parse_function() or self._parse_id_var(any_token=False)
2114            while self._match(TokenType.DOT):
2115                this = exp.Dot(
2116                    this=this,
2117                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2118                )
2119
2120        table_alias: t.Optional[exp.Expression]
2121
2122        if view:
2123            table = self._parse_id_var(any_token=False)
2124            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2125            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2126        else:
2127            table_alias = self._parse_table_alias()
2128
2129        expression = self.expression(
2130            exp.Lateral,
2131            this=this,
2132            view=view,
2133            outer=outer,
2134            alias=table_alias,
2135        )
2136
2137        return expression
2138
2139    def _parse_join_side_and_kind(
2140        self,
2141    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2142        return (
2143            self._match(TokenType.NATURAL) and self._prev,
2144            self._match_set(self.JOIN_SIDES) and self._prev,
2145            self._match_set(self.JOIN_KINDS) and self._prev,
2146        )
2147
2148    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2149        if self._match(TokenType.COMMA):
2150            return self.expression(exp.Join, this=self._parse_table())
2151
2152        index = self._index
2153        natural, side, kind = self._parse_join_side_and_kind()
2154        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2155        join = self._match(TokenType.JOIN)
2156
2157        if not skip_join_token and not join:
2158            self._retreat(index)
2159            kind = None
2160            natural = None
2161            side = None
2162
2163        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2164        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2165
2166        if not skip_join_token and not join and not outer_apply and not cross_apply:
2167            return None
2168
2169        if outer_apply:
2170            side = Token(TokenType.LEFT, "LEFT")
2171
2172        kwargs: t.Dict[
2173            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2174        ] = {"this": self._parse_table()}
2175
2176        if natural:
2177            kwargs["natural"] = True
2178        if side:
2179            kwargs["side"] = side.text
2180        if kind:
2181            kwargs["kind"] = kind.text
2182        if hint:
2183            kwargs["hint"] = hint
2184
2185        if self._match(TokenType.ON):
2186            kwargs["on"] = self._parse_conjunction()
2187        elif self._match(TokenType.USING):
2188            kwargs["using"] = self._parse_wrapped_id_vars()
2189
2190        return self.expression(exp.Join, **kwargs)  # type: ignore
2191
2192    def _parse_index(self) -> exp.Expression:
2193        index = self._parse_id_var()
2194        self._match(TokenType.ON)
2195        self._match(TokenType.TABLE)  # hive
2196
2197        return self.expression(
2198            exp.Index,
2199            this=index,
2200            table=self.expression(exp.Table, this=self._parse_id_var()),
2201            columns=self._parse_expression(),
2202        )
2203
2204    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2205        unique = self._match(TokenType.UNIQUE)
2206        primary = self._match_text_seq("PRIMARY")
2207        amp = self._match_text_seq("AMP")
2208        if not self._match(TokenType.INDEX):
2209            return None
2210        index = self._parse_id_var()
2211        columns = None
2212        if self._match(TokenType.L_PAREN, advance=False):
2213            columns = self._parse_wrapped_csv(self._parse_column)
2214        return self.expression(
2215            exp.Index,
2216            this=index,
2217            columns=columns,
2218            unique=unique,
2219            primary=primary,
2220            amp=amp,
2221        )
2222
2223    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2224        return (
2225            (not schema and self._parse_function())
2226            or self._parse_id_var(any_token=False)
2227            or self._parse_string_as_identifier()
2228            or self._parse_placeholder()
2229        )
2230
2231    def _parse_table_parts(self, schema: bool = False) -> exp.Table:
2232        catalog = None
2233        db = None
2234        table = self._parse_table_part(schema=schema)
2235
2236        while self._match(TokenType.DOT):
2237            if catalog:
2238                # This allows nesting the table in arbitrarily many dot expressions if needed
2239                table = self.expression(
2240                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2241                )
2242            else:
2243                catalog = db
2244                db = table
2245                table = self._parse_table_part(schema=schema)
2246
2247        if not table:
2248            self.raise_error(f"Expected table name but got {self._curr}")
2249
2250        return self.expression(
2251            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2252        )
2253
2254    def _parse_table(
2255        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2256    ) -> t.Optional[exp.Expression]:
2257        lateral = self._parse_lateral()
2258        if lateral:
2259            return lateral
2260
2261        unnest = self._parse_unnest()
2262        if unnest:
2263            return unnest
2264
2265        values = self._parse_derived_table_values()
2266        if values:
2267            return values
2268
2269        subquery = self._parse_select(table=True)
2270        if subquery:
2271            if not subquery.args.get("pivots"):
2272                subquery.set("pivots", self._parse_pivots())
2273            return subquery
2274
2275        this: exp.Expression = self._parse_table_parts(schema=schema)
2276
2277        if schema:
2278            return self._parse_schema(this=this)
2279
2280        if self.alias_post_tablesample:
2281            table_sample = self._parse_table_sample()
2282
2283        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2284        if alias:
2285            this.set("alias", alias)
2286
2287        if not this.args.get("pivots"):
2288            this.set("pivots", self._parse_pivots())
2289
2290        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2291            this.set(
2292                "hints",
2293                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2294            )
2295            self._match_r_paren()
2296
2297        if not self.alias_post_tablesample:
2298            table_sample = self._parse_table_sample()
2299
2300        if table_sample:
2301            table_sample.set("this", this)
2302            this = table_sample
2303
2304        return this
2305
2306    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2307        if not self._match(TokenType.UNNEST):
2308            return None
2309
2310        expressions = self._parse_wrapped_csv(self._parse_type)
2311        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2312        alias = self._parse_table_alias()
2313
2314        if alias and self.unnest_column_only:
2315            if alias.args.get("columns"):
2316                self.raise_error("Unexpected extra column alias in unnest.")
2317            alias.set("columns", [alias.this])
2318            alias.set("this", None)
2319
2320        offset = None
2321        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2322            self._match(TokenType.ALIAS)
2323            offset = self._parse_id_var() or exp.Identifier(this="offset")
2324
2325        return self.expression(
2326            exp.Unnest,
2327            expressions=expressions,
2328            ordinality=ordinality,
2329            alias=alias,
2330            offset=offset,
2331        )
2332
2333    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2334        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2335        if not is_derived and not self._match(TokenType.VALUES):
2336            return None
2337
2338        expressions = self._parse_csv(self._parse_value)
2339
2340        if is_derived:
2341            self._match_r_paren()
2342
2343        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2344
2345    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2346        if not self._match(TokenType.TABLE_SAMPLE) and not (
2347            as_modifier and self._match_text_seq("USING", "SAMPLE")
2348        ):
2349            return None
2350
2351        bucket_numerator = None
2352        bucket_denominator = None
2353        bucket_field = None
2354        percent = None
2355        rows = None
2356        size = None
2357        seed = None
2358
2359        kind = (
2360            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2361        )
2362        method = self._parse_var(tokens=(TokenType.ROW,))
2363
2364        self._match(TokenType.L_PAREN)
2365
2366        num = self._parse_number()
2367
2368        if self._match_text_seq("BUCKET"):
2369            bucket_numerator = self._parse_number()
2370            self._match_text_seq("OUT", "OF")
2371            bucket_denominator = bucket_denominator = self._parse_number()
2372            self._match(TokenType.ON)
2373            bucket_field = self._parse_field()
2374        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2375            percent = num
2376        elif self._match(TokenType.ROWS):
2377            rows = num
2378        else:
2379            size = num
2380
2381        self._match(TokenType.R_PAREN)
2382
2383        if self._match(TokenType.L_PAREN):
2384            method = self._parse_var()
2385            seed = self._match(TokenType.COMMA) and self._parse_number()
2386            self._match_r_paren()
2387        elif self._match_texts(("SEED", "REPEATABLE")):
2388            seed = self._parse_wrapped(self._parse_number)
2389
2390        return self.expression(
2391            exp.TableSample,
2392            method=method,
2393            bucket_numerator=bucket_numerator,
2394            bucket_denominator=bucket_denominator,
2395            bucket_field=bucket_field,
2396            percent=percent,
2397            rows=rows,
2398            size=size,
2399            seed=seed,
2400            kind=kind,
2401        )
2402
2403    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2404        return list(iter(self._parse_pivot, None))
2405
2406    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2407        index = self._index
2408
2409        if self._match(TokenType.PIVOT):
2410            unpivot = False
2411        elif self._match(TokenType.UNPIVOT):
2412            unpivot = True
2413        else:
2414            return None
2415
2416        expressions = []
2417        field = None
2418
2419        if not self._match(TokenType.L_PAREN):
2420            self._retreat(index)
2421            return None
2422
2423        if unpivot:
2424            expressions = self._parse_csv(self._parse_column)
2425        else:
2426            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2427
2428        if not expressions:
2429            self.raise_error("Failed to parse PIVOT's aggregation list")
2430
2431        if not self._match(TokenType.FOR):
2432            self.raise_error("Expecting FOR")
2433
2434        value = self._parse_column()
2435
2436        if not self._match(TokenType.IN):
2437            self.raise_error("Expecting IN")
2438
2439        field = self._parse_in(value, alias=True)
2440
2441        self._match_r_paren()
2442
2443        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2444
2445        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2446            pivot.set("alias", self._parse_table_alias())
2447
2448        if not unpivot:
2449            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2450
2451            columns: t.List[exp.Expression] = []
2452            for fld in pivot.args["field"].expressions:
2453                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2454                for name in names:
2455                    if self.PREFIXED_PIVOT_COLUMNS:
2456                        name = f"{name}_{field_name}" if name else field_name
2457                    else:
2458                        name = f"{field_name}_{name}" if name else field_name
2459
2460                    columns.append(exp.to_identifier(name))
2461
2462            pivot.set("columns", columns)
2463
2464        return pivot
2465
2466    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2467        return [agg.alias for agg in aggregations]
2468
2469    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2470        if not skip_where_token and not self._match(TokenType.WHERE):
2471            return None
2472
2473        return self.expression(
2474            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2475        )
2476
2477    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2478        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2479            return None
2480
2481        elements = defaultdict(list)
2482
2483        while True:
2484            expressions = self._parse_csv(self._parse_conjunction)
2485            if expressions:
2486                elements["expressions"].extend(expressions)
2487
2488            grouping_sets = self._parse_grouping_sets()
2489            if grouping_sets:
2490                elements["grouping_sets"].extend(grouping_sets)
2491
2492            rollup = None
2493            cube = None
2494            totals = None
2495
2496            with_ = self._match(TokenType.WITH)
2497            if self._match(TokenType.ROLLUP):
2498                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2499                elements["rollup"].extend(ensure_list(rollup))
2500
2501            if self._match(TokenType.CUBE):
2502                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2503                elements["cube"].extend(ensure_list(cube))
2504
2505            if self._match_text_seq("TOTALS"):
2506                totals = True
2507                elements["totals"] = True  # type: ignore
2508
2509            if not (grouping_sets or rollup or cube or totals):
2510                break
2511
2512        return self.expression(exp.Group, **elements)  # type: ignore
2513
2514    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2515        if not self._match(TokenType.GROUPING_SETS):
2516            return None
2517
2518        return self._parse_wrapped_csv(self._parse_grouping_set)
2519
2520    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2521        if self._match(TokenType.L_PAREN):
2522            grouping_set = self._parse_csv(self._parse_column)
2523            self._match_r_paren()
2524            return self.expression(exp.Tuple, expressions=grouping_set)
2525
2526        return self._parse_column()
2527
2528    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2529        if not skip_having_token and not self._match(TokenType.HAVING):
2530            return None
2531        return self.expression(exp.Having, this=self._parse_conjunction())
2532
2533    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2534        if not self._match(TokenType.QUALIFY):
2535            return None
2536        return self.expression(exp.Qualify, this=self._parse_conjunction())
2537
2538    def _parse_order(
2539        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2540    ) -> t.Optional[exp.Expression]:
2541        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2542            return this
2543
2544        return self.expression(
2545            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2546        )
2547
2548    def _parse_sort(
2549        self, exp_class: t.Type[exp.Expression], *texts: str
2550    ) -> t.Optional[exp.Expression]:
2551        if not self._match_text_seq(*texts):
2552            return None
2553        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2554
2555    def _parse_ordered(self) -> exp.Expression:
2556        this = self._parse_conjunction()
2557        self._match(TokenType.ASC)
2558        is_desc = self._match(TokenType.DESC)
2559        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
2560        is_nulls_last = self._match_text_seq("NULLS", "LAST")
2561        desc = is_desc or False
2562        asc = not desc
2563        nulls_first = is_nulls_first or False
2564        explicitly_null_ordered = is_nulls_first or is_nulls_last
2565        if (
2566            not explicitly_null_ordered
2567            and (
2568                (asc and self.null_ordering == "nulls_are_small")
2569                or (desc and self.null_ordering != "nulls_are_small")
2570            )
2571            and self.null_ordering != "nulls_are_last"
2572        ):
2573            nulls_first = True
2574
2575        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2576
2577    def _parse_limit(
2578        self, this: t.Optional[exp.Expression] = None, top: bool = False
2579    ) -> t.Optional[exp.Expression]:
2580        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2581            limit_paren = self._match(TokenType.L_PAREN)
2582            limit_exp = self.expression(
2583                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2584            )
2585
2586            if limit_paren:
2587                self._match_r_paren()
2588
2589            return limit_exp
2590
2591        if self._match(TokenType.FETCH):
2592            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2593            direction = self._prev.text if direction else "FIRST"
2594
2595            count = self._parse_number()
2596            percent = self._match(TokenType.PERCENT)
2597
2598            self._match_set((TokenType.ROW, TokenType.ROWS))
2599
2600            only = self._match_text_seq("ONLY")
2601            with_ties = self._match_text_seq("WITH", "TIES")
2602
2603            if only and with_ties:
2604                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2605
2606            return self.expression(
2607                exp.Fetch,
2608                direction=direction,
2609                count=count,
2610                percent=percent,
2611                with_ties=with_ties,
2612            )
2613
2614        return this
2615
2616    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2617        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2618            return this
2619
2620        count = self._parse_number()
2621        self._match_set((TokenType.ROW, TokenType.ROWS))
2622        return self.expression(exp.Offset, this=this, expression=count)
2623
2624    def _parse_locks(self) -> t.List[exp.Expression]:
2625        # Lists are invariant, so we need to use a type hint here
2626        locks: t.List[exp.Expression] = []
2627
2628        while True:
2629            if self._match_text_seq("FOR", "UPDATE"):
2630                update = True
2631            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2632                "LOCK", "IN", "SHARE", "MODE"
2633            ):
2634                update = False
2635            else:
2636                break
2637
2638            expressions = None
2639            if self._match_text_seq("OF"):
2640                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2641
2642            wait: t.Optional[bool | exp.Expression] = None
2643            if self._match_text_seq("NOWAIT"):
2644                wait = True
2645            elif self._match_text_seq("WAIT"):
2646                wait = self._parse_primary()
2647            elif self._match_text_seq("SKIP", "LOCKED"):
2648                wait = False
2649
2650            locks.append(
2651                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2652            )
2653
2654        return locks
2655
2656    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2657        if not self._match_set(self.SET_OPERATIONS):
2658            return this
2659
2660        token_type = self._prev.token_type
2661
2662        if token_type == TokenType.UNION:
2663            expression = exp.Union
2664        elif token_type == TokenType.EXCEPT:
2665            expression = exp.Except
2666        else:
2667            expression = exp.Intersect
2668
2669        return self.expression(
2670            expression,
2671            this=this,
2672            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2673            expression=self._parse_set_operations(self._parse_select(nested=True)),
2674        )
2675
2676    def _parse_expression(self) -> t.Optional[exp.Expression]:
2677        return self._parse_alias(self._parse_conjunction())
2678
2679    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2680        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2681
2682    def _parse_equality(self) -> t.Optional[exp.Expression]:
2683        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2684
2685    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2686        return self._parse_tokens(self._parse_range, self.COMPARISON)
2687
2688    def _parse_range(self) -> t.Optional[exp.Expression]:
2689        this = self._parse_bitwise()
2690        negate = self._match(TokenType.NOT)
2691
2692        if self._match_set(self.RANGE_PARSERS):
2693            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2694            if not expression:
2695                return this
2696
2697            this = expression
2698        elif self._match(TokenType.ISNULL):
2699            this = self.expression(exp.Is, this=this, expression=exp.Null())
2700
2701        # Postgres supports ISNULL and NOTNULL for conditions.
2702        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2703        if self._match(TokenType.NOTNULL):
2704            this = self.expression(exp.Is, this=this, expression=exp.Null())
2705            this = self.expression(exp.Not, this=this)
2706
2707        if negate:
2708            this = self.expression(exp.Not, this=this)
2709
2710        if self._match(TokenType.IS):
2711            this = self._parse_is(this)
2712
2713        return this
2714
2715    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2716        index = self._index - 1
2717        negate = self._match(TokenType.NOT)
2718        if self._match_text_seq("DISTINCT", "FROM"):
2719            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2720            return self.expression(klass, this=this, expression=self._parse_expression())
2721
2722        expression = self._parse_null() or self._parse_boolean()
2723        if not expression:
2724            self._retreat(index)
2725            return None
2726
2727        this = self.expression(exp.Is, this=this, expression=expression)
2728        return self.expression(exp.Not, this=this) if negate else this
2729
2730    def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.Expression:
2731        unnest = self._parse_unnest()
2732        if unnest:
2733            this = self.expression(exp.In, this=this, unnest=unnest)
2734        elif self._match(TokenType.L_PAREN):
2735            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
2736
2737            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2738                this = self.expression(exp.In, this=this, query=expressions[0])
2739            else:
2740                this = self.expression(exp.In, this=this, expressions=expressions)
2741
2742            self._match_r_paren(this)
2743        else:
2744            this = self.expression(exp.In, this=this, field=self._parse_field())
2745
2746        return this
2747
2748    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2749        low = self._parse_bitwise()
2750        self._match(TokenType.AND)
2751        high = self._parse_bitwise()
2752        return self.expression(exp.Between, this=this, low=low, high=high)
2753
2754    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2755        if not self._match(TokenType.ESCAPE):
2756            return this
2757        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2758
2759    def _parse_interval(self) -> t.Optional[exp.Expression]:
2760        if not self._match(TokenType.INTERVAL):
2761            return None
2762
2763        this = self._parse_primary() or self._parse_term()
2764        unit = self._parse_function() or self._parse_var()
2765
2766        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2767        # each INTERVAL expression into this canonical form so it's easy to transpile
2768        if this and isinstance(this, exp.Literal):
2769            if this.is_number:
2770                this = exp.Literal.string(this.name)
2771
2772            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2773            parts = this.name.split()
2774            if not unit and len(parts) <= 2:
2775                this = exp.Literal.string(seq_get(parts, 0))
2776                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2777
2778        return self.expression(exp.Interval, this=this, unit=unit)
2779
2780    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2781        this = self._parse_term()
2782
2783        while True:
2784            if self._match_set(self.BITWISE):
2785                this = self.expression(
2786                    self.BITWISE[self._prev.token_type],
2787                    this=this,
2788                    expression=self._parse_term(),
2789                )
2790            elif self._match_pair(TokenType.LT, TokenType.LT):
2791                this = self.expression(
2792                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2793                )
2794            elif self._match_pair(TokenType.GT, TokenType.GT):
2795                this = self.expression(
2796                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2797                )
2798            else:
2799                break
2800
2801        return this
2802
2803    def _parse_term(self) -> t.Optional[exp.Expression]:
2804        return self._parse_tokens(self._parse_factor, self.TERM)
2805
2806    def _parse_factor(self) -> t.Optional[exp.Expression]:
2807        return self._parse_tokens(self._parse_unary, self.FACTOR)
2808
2809    def _parse_unary(self) -> t.Optional[exp.Expression]:
2810        if self._match_set(self.UNARY_PARSERS):
2811            return self.UNARY_PARSERS[self._prev.token_type](self)
2812        return self._parse_at_time_zone(self._parse_type())
2813
2814    def _parse_type(self) -> t.Optional[exp.Expression]:
2815        interval = self._parse_interval()
2816        if interval:
2817            return interval
2818
2819        index = self._index
2820        data_type = self._parse_types(check_func=True)
2821        this = self._parse_column()
2822
2823        if data_type:
2824            if isinstance(this, exp.Literal):
2825                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2826                if parser:
2827                    return parser(self, this, data_type)
2828                return self.expression(exp.Cast, this=this, to=data_type)
2829            if not data_type.expressions:
2830                self._retreat(index)
2831                return self._parse_column()
2832            return data_type
2833
2834        return this
2835
2836    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2837        this = self._parse_type()
2838        if not this:
2839            return None
2840
2841        return self.expression(
2842            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2843        )
2844
2845    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2846        index = self._index
2847
2848        prefix = self._match_text_seq("SYSUDTLIB", ".")
2849
2850        if not self._match_set(self.TYPE_TOKENS):
2851            return None
2852
2853        type_token = self._prev.token_type
2854
2855        if type_token == TokenType.PSEUDO_TYPE:
2856            return self.expression(exp.PseudoType, this=self._prev.text)
2857
2858        nested = type_token in self.NESTED_TYPE_TOKENS
2859        is_struct = type_token == TokenType.STRUCT
2860        expressions = None
2861        maybe_func = False
2862
2863        if self._match(TokenType.L_PAREN):
2864            if is_struct:
2865                expressions = self._parse_csv(self._parse_struct_types)
2866            elif nested:
2867                expressions = self._parse_csv(self._parse_types)
2868            else:
2869                expressions = self._parse_csv(self._parse_type_size)
2870
2871            if not expressions or not self._match(TokenType.R_PAREN):
2872                self._retreat(index)
2873                return None
2874
2875            maybe_func = True
2876
2877        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2878            this = exp.DataType(
2879                this=exp.DataType.Type.ARRAY,
2880                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2881                nested=True,
2882            )
2883
2884            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2885                this = exp.DataType(
2886                    this=exp.DataType.Type.ARRAY,
2887                    expressions=[this],
2888                    nested=True,
2889                )
2890
2891            return this
2892
2893        if self._match(TokenType.L_BRACKET):
2894            self._retreat(index)
2895            return None
2896
2897        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2898        if nested and self._match(TokenType.LT):
2899            if is_struct:
2900                expressions = self._parse_csv(self._parse_struct_types)
2901            else:
2902                expressions = self._parse_csv(self._parse_types)
2903
2904            if not self._match(TokenType.GT):
2905                self.raise_error("Expecting >")
2906
2907            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2908                values = self._parse_csv(self._parse_conjunction)
2909                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2910
2911        value: t.Optional[exp.Expression] = None
2912        if type_token in self.TIMESTAMPS:
2913            if self._match_text_seq("WITH", "TIME", "ZONE") or type_token == TokenType.TIMESTAMPTZ:
2914                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2915            elif (
2916                self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE")
2917                or type_token == TokenType.TIMESTAMPLTZ
2918            ):
2919                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2920            elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
2921                if type_token == TokenType.TIME:
2922                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2923                else:
2924                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2925
2926            maybe_func = maybe_func and value is None
2927
2928            if value is None:
2929                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2930        elif type_token == TokenType.INTERVAL:
2931            unit = self._parse_var()
2932
2933            if not unit:
2934                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2935            else:
2936                value = self.expression(exp.Interval, unit=unit)
2937
2938        if maybe_func and check_func:
2939            index2 = self._index
2940            peek = self._parse_string()
2941
2942            if not peek:
2943                self._retreat(index)
2944                return None
2945
2946            self._retreat(index2)
2947
2948        if value:
2949            return value
2950
2951        return exp.DataType(
2952            this=exp.DataType.Type[type_token.value.upper()],
2953            expressions=expressions,
2954            nested=nested,
2955            values=values,
2956            prefix=prefix,
2957        )
2958
2959    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
2960        this = self._parse_type() or self._parse_id_var()
2961        self._match(TokenType.COLON)
2962        return self._parse_column_def(this)
2963
2964    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2965        if not self._match_text_seq("AT", "TIME", "ZONE"):
2966            return this
2967        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2968
2969    def _parse_column(self) -> t.Optional[exp.Expression]:
2970        this = self._parse_field()
2971        if isinstance(this, exp.Identifier):
2972            this = self.expression(exp.Column, this=this)
2973        elif not this:
2974            return self._parse_bracket(this)
2975        this = self._parse_bracket(this)
2976
2977        while self._match_set(self.COLUMN_OPERATORS):
2978            op_token = self._prev.token_type
2979            op = self.COLUMN_OPERATORS.get(op_token)
2980
2981            if op_token == TokenType.DCOLON:
2982                field = self._parse_types()
2983                if not field:
2984                    self.raise_error("Expected type")
2985            elif op and self._curr:
2986                self._advance()
2987                value = self._prev.text
2988                field = (
2989                    exp.Literal.number(value)
2990                    if self._prev.token_type == TokenType.NUMBER
2991                    else exp.Literal.string(value)
2992                )
2993            else:
2994                field = (
2995                    self._parse_star()
2996                    or self._parse_function(anonymous=True)
2997                    or self._parse_id_var()
2998                )
2999
3000            if isinstance(field, exp.Func):
3001                # bigquery allows function calls like x.y.count(...)
3002                # SAFE.SUBSTR(...)
3003                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3004                this = self._replace_columns_with_dots(this)
3005
3006            if op:
3007                this = op(self, this, field)
3008            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3009                this = self.expression(
3010                    exp.Column,
3011                    this=field,
3012                    table=this.this,
3013                    db=this.args.get("table"),
3014                    catalog=this.args.get("db"),
3015                )
3016            else:
3017                this = self.expression(exp.Dot, this=this, expression=field)
3018            this = self._parse_bracket(this)
3019
3020        return this
3021
3022    def _parse_primary(self) -> t.Optional[exp.Expression]:
3023        if self._match_set(self.PRIMARY_PARSERS):
3024            token_type = self._prev.token_type
3025            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3026
3027            if token_type == TokenType.STRING:
3028                expressions = [primary]
3029                while self._match(TokenType.STRING):
3030                    expressions.append(exp.Literal.string(self._prev.text))
3031                if len(expressions) > 1:
3032                    return self.expression(exp.Concat, expressions=expressions)
3033            return primary
3034
3035        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3036            return exp.Literal.number(f"0.{self._prev.text}")
3037
3038        if self._match(TokenType.L_PAREN):
3039            comments = self._prev_comments
3040            query = self._parse_select()
3041
3042            if query:
3043                expressions = [query]
3044            else:
3045                expressions = self._parse_csv(self._parse_expression)
3046
3047            this = self._parse_query_modifiers(seq_get(expressions, 0))
3048
3049            if isinstance(this, exp.Subqueryable):
3050                this = self._parse_set_operations(
3051                    self._parse_subquery(this=this, parse_alias=False)
3052                )
3053            elif len(expressions) > 1:
3054                this = self.expression(exp.Tuple, expressions=expressions)
3055            else:
3056                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3057
3058            if this:
3059                this.add_comments(comments)
3060            self._match_r_paren(expression=this)
3061
3062            return this
3063
3064        return None
3065
3066    def _parse_field(
3067        self,
3068        any_token: bool = False,
3069        tokens: t.Optional[t.Collection[TokenType]] = None,
3070    ) -> t.Optional[exp.Expression]:
3071        return (
3072            self._parse_primary()
3073            or self._parse_function()
3074            or self._parse_id_var(any_token=any_token, tokens=tokens)
3075        )
3076
3077    def _parse_function(
3078        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3079    ) -> t.Optional[exp.Expression]:
3080        if not self._curr:
3081            return None
3082
3083        token_type = self._curr.token_type
3084
3085        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3086            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3087
3088        if not self._next or self._next.token_type != TokenType.L_PAREN:
3089            if token_type in self.NO_PAREN_FUNCTIONS:
3090                self._advance()
3091                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3092
3093            return None
3094
3095        if token_type not in self.FUNC_TOKENS:
3096            return None
3097
3098        this = self._curr.text
3099        upper = this.upper()
3100        self._advance(2)
3101
3102        parser = self.FUNCTION_PARSERS.get(upper)
3103
3104        if parser and not anonymous:
3105            this = parser(self)
3106        else:
3107            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3108
3109            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3110                this = self.expression(subquery_predicate, this=self._parse_select())
3111                self._match_r_paren()
3112                return this
3113
3114            if functions is None:
3115                functions = self.FUNCTIONS
3116
3117            function = functions.get(upper)
3118            args = self._parse_csv(self._parse_lambda)
3119
3120            if function and not anonymous:
3121                this = function(args)
3122                self.validate_expression(this, args)
3123            else:
3124                this = self.expression(exp.Anonymous, this=this, expressions=args)
3125
3126        self._match_r_paren(this)
3127        return self._parse_window(this)
3128
3129    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3130        return self._parse_column_def(self._parse_id_var())
3131
3132    def _parse_user_defined_function(
3133        self, kind: t.Optional[TokenType] = None
3134    ) -> t.Optional[exp.Expression]:
3135        this = self._parse_id_var()
3136
3137        while self._match(TokenType.DOT):
3138            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3139
3140        if not self._match(TokenType.L_PAREN):
3141            return this
3142
3143        expressions = self._parse_csv(self._parse_function_parameter)
3144        self._match_r_paren()
3145        return self.expression(
3146            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3147        )
3148
3149    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3150        literal = self._parse_primary()
3151        if literal:
3152            return self.expression(exp.Introducer, this=token.text, expression=literal)
3153
3154        return self.expression(exp.Identifier, this=token.text)
3155
3156    def _parse_national(self, token: Token) -> exp.Expression:
3157        return self.expression(exp.National, this=exp.Literal.string(token.text))
3158
3159    def _parse_session_parameter(self) -> exp.Expression:
3160        kind = None
3161        this = self._parse_id_var() or self._parse_primary()
3162
3163        if this and self._match(TokenType.DOT):
3164            kind = this.name
3165            this = self._parse_var() or self._parse_primary()
3166
3167        return self.expression(exp.SessionParameter, this=this, kind=kind)
3168
3169    def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]:
3170        index = self._index
3171
3172        if self._match(TokenType.L_PAREN):
3173            expressions = self._parse_csv(self._parse_id_var)
3174
3175            if not self._match(TokenType.R_PAREN):
3176                self._retreat(index)
3177        else:
3178            expressions = [self._parse_id_var()]
3179
3180        if self._match_set(self.LAMBDAS):
3181            return self.LAMBDAS[self._prev.token_type](self, expressions)
3182
3183        self._retreat(index)
3184
3185        this: t.Optional[exp.Expression]
3186
3187        if self._match(TokenType.DISTINCT):
3188            this = self.expression(
3189                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3190            )
3191        else:
3192            this = self._parse_select_or_expression(alias=alias)
3193
3194            if isinstance(this, exp.EQ):
3195                left = this.this
3196                if isinstance(left, exp.Column):
3197                    left.replace(exp.Var(this=left.text("this")))
3198
3199        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3200
3201    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3202        index = self._index
3203
3204        try:
3205            if self._parse_select(nested=True):
3206                return this
3207        except Exception:
3208            pass
3209        finally:
3210            self._retreat(index)
3211
3212        if not self._match(TokenType.L_PAREN):
3213            return this
3214
3215        args = self._parse_csv(
3216            lambda: self._parse_constraint()
3217            or self._parse_column_def(self._parse_field(any_token=True))
3218        )
3219        self._match_r_paren()
3220        return self.expression(exp.Schema, this=this, expressions=args)
3221
3222    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3223        # column defs are not really columns, they're identifiers
3224        if isinstance(this, exp.Column):
3225            this = this.this
3226        kind = self._parse_types()
3227
3228        if self._match_text_seq("FOR", "ORDINALITY"):
3229            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3230
3231        constraints = []
3232        while True:
3233            constraint = self._parse_column_constraint()
3234            if not constraint:
3235                break
3236            constraints.append(constraint)
3237
3238        if not kind and not constraints:
3239            return this
3240
3241        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3242
3243    def _parse_auto_increment(self) -> exp.Expression:
3244        start = None
3245        increment = None
3246
3247        if self._match(TokenType.L_PAREN, advance=False):
3248            args = self._parse_wrapped_csv(self._parse_bitwise)
3249            start = seq_get(args, 0)
3250            increment = seq_get(args, 1)
3251        elif self._match_text_seq("START"):
3252            start = self._parse_bitwise()
3253            self._match_text_seq("INCREMENT")
3254            increment = self._parse_bitwise()
3255
3256        if start and increment:
3257            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3258
3259        return exp.AutoIncrementColumnConstraint()
3260
3261    def _parse_compress(self) -> exp.Expression:
3262        if self._match(TokenType.L_PAREN, advance=False):
3263            return self.expression(
3264                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3265            )
3266
3267        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3268
3269    def _parse_generated_as_identity(self) -> exp.Expression:
3270        if self._match_text_seq("BY", "DEFAULT"):
3271            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3272            this = self.expression(
3273                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3274            )
3275        else:
3276            self._match_text_seq("ALWAYS")
3277            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3278
3279        self._match(TokenType.ALIAS)
3280        identity = self._match_text_seq("IDENTITY")
3281
3282        if self._match(TokenType.L_PAREN):
3283            if self._match_text_seq("START", "WITH"):
3284                this.set("start", self._parse_bitwise())
3285            if self._match_text_seq("INCREMENT", "BY"):
3286                this.set("increment", self._parse_bitwise())
3287            if self._match_text_seq("MINVALUE"):
3288                this.set("minvalue", self._parse_bitwise())
3289            if self._match_text_seq("MAXVALUE"):
3290                this.set("maxvalue", self._parse_bitwise())
3291
3292            if self._match_text_seq("CYCLE"):
3293                this.set("cycle", True)
3294            elif self._match_text_seq("NO", "CYCLE"):
3295                this.set("cycle", False)
3296
3297            if not identity:
3298                this.set("expression", self._parse_bitwise())
3299
3300            self._match_r_paren()
3301
3302        return this
3303
3304    def _parse_inline(self) -> t.Optional[exp.Expression]:
3305        self._match_text_seq("LENGTH")
3306        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3307
3308    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3309        if self._match_text_seq("NULL"):
3310            return self.expression(exp.NotNullColumnConstraint)
3311        if self._match_text_seq("CASESPECIFIC"):
3312            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3313        return None
3314
3315    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3316        if self._match(TokenType.CONSTRAINT):
3317            this = self._parse_id_var()
3318        else:
3319            this = None
3320
3321        if self._match_texts(self.CONSTRAINT_PARSERS):
3322            return self.expression(
3323                exp.ColumnConstraint,
3324                this=this,
3325                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3326            )
3327
3328        return this
3329
3330    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3331        if not self._match(TokenType.CONSTRAINT):
3332            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3333
3334        this = self._parse_id_var()
3335        expressions = []
3336
3337        while True:
3338            constraint = self._parse_unnamed_constraint() or self._parse_function()
3339            if not constraint:
3340                break
3341            expressions.append(constraint)
3342
3343        return self.expression(exp.Constraint, this=this, expressions=expressions)
3344
3345    def _parse_unnamed_constraint(
3346        self, constraints: t.Optional[t.Collection[str]] = None
3347    ) -> t.Optional[exp.Expression]:
3348        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3349            return None
3350
3351        constraint = self._prev.text.upper()
3352        if constraint not in self.CONSTRAINT_PARSERS:
3353            self.raise_error(f"No parser found for schema constraint {constraint}.")
3354
3355        return self.CONSTRAINT_PARSERS[constraint](self)
3356
3357    def _parse_unique(self) -> exp.Expression:
3358        if not self._match(TokenType.L_PAREN, advance=False):
3359            return self.expression(exp.UniqueColumnConstraint)
3360        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3361
3362    def _parse_key_constraint_options(self) -> t.List[str]:
3363        options = []
3364        while True:
3365            if not self._curr:
3366                break
3367
3368            if self._match(TokenType.ON):
3369                action = None
3370                on = self._advance_any() and self._prev.text
3371
3372                if self._match_text_seq("NO", "ACTION"):
3373                    action = "NO ACTION"
3374                elif self._match_text_seq("CASCADE"):
3375                    action = "CASCADE"
3376                elif self._match_pair(TokenType.SET, TokenType.NULL):
3377                    action = "SET NULL"
3378                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3379                    action = "SET DEFAULT"
3380                else:
3381                    self.raise_error("Invalid key constraint")
3382
3383                options.append(f"ON {on} {action}")
3384            elif self._match_text_seq("NOT", "ENFORCED"):
3385                options.append("NOT ENFORCED")
3386            elif self._match_text_seq("DEFERRABLE"):
3387                options.append("DEFERRABLE")
3388            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3389                options.append("INITIALLY DEFERRED")
3390            elif self._match_text_seq("NORELY"):
3391                options.append("NORELY")
3392            elif self._match_text_seq("MATCH", "FULL"):
3393                options.append("MATCH FULL")
3394            else:
3395                break
3396
3397        return options
3398
3399    def _parse_references(self, match=True) -> t.Optional[exp.Expression]:
3400        if match and not self._match(TokenType.REFERENCES):
3401            return None
3402
3403        expressions = None
3404        this = self._parse_id_var()
3405
3406        if self._match(TokenType.L_PAREN, advance=False):
3407            expressions = self._parse_wrapped_id_vars()
3408
3409        options = self._parse_key_constraint_options()
3410        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3411
3412    def _parse_foreign_key(self) -> exp.Expression:
3413        expressions = self._parse_wrapped_id_vars()
3414        reference = self._parse_references()
3415        options = {}
3416
3417        while self._match(TokenType.ON):
3418            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3419                self.raise_error("Expected DELETE or UPDATE")
3420
3421            kind = self._prev.text.lower()
3422
3423            if self._match_text_seq("NO", "ACTION"):
3424                action = "NO ACTION"
3425            elif self._match(TokenType.SET):
3426                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3427                action = "SET " + self._prev.text.upper()
3428            else:
3429                self._advance()
3430                action = self._prev.text.upper()
3431
3432            options[kind] = action
3433
3434        return self.expression(
3435            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3436        )
3437
3438    def _parse_primary_key(self) -> exp.Expression:
3439        desc = (
3440            self._match_set((TokenType.ASC, TokenType.DESC))
3441            and self._prev.token_type == TokenType.DESC
3442        )
3443
3444        if not self._match(TokenType.L_PAREN, advance=False):
3445            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3446
3447        expressions = self._parse_wrapped_csv(self._parse_field)
3448        options = self._parse_key_constraint_options()
3449        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3450
3451    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3452        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3453            return this
3454
3455        bracket_kind = self._prev.token_type
3456        expressions: t.List[t.Optional[exp.Expression]]
3457
3458        if self._match(TokenType.COLON):
3459            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3460        else:
3461            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3462
3463        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3464        if bracket_kind == TokenType.L_BRACE:
3465            this = self.expression(exp.Struct, expressions=expressions)
3466        elif not this or this.name.upper() == "ARRAY":
3467            this = self.expression(exp.Array, expressions=expressions)
3468        else:
3469            expressions = apply_index_offset(this, expressions, -self.index_offset)
3470            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3471
3472        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3473            self.raise_error("Expected ]")
3474        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3475            self.raise_error("Expected }")
3476
3477        self._add_comments(this)
3478        return self._parse_bracket(this)
3479
3480    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3481        if self._match(TokenType.COLON):
3482            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3483        return this
3484
3485    def _parse_case(self) -> t.Optional[exp.Expression]:
3486        ifs = []
3487        default = None
3488
3489        expression = self._parse_conjunction()
3490
3491        while self._match(TokenType.WHEN):
3492            this = self._parse_conjunction()
3493            self._match(TokenType.THEN)
3494            then = self._parse_conjunction()
3495            ifs.append(self.expression(exp.If, this=this, true=then))
3496
3497        if self._match(TokenType.ELSE):
3498            default = self._parse_conjunction()
3499
3500        if not self._match(TokenType.END):
3501            self.raise_error("Expected END after CASE", self._prev)
3502
3503        return self._parse_window(
3504            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3505        )
3506
3507    def _parse_if(self) -> t.Optional[exp.Expression]:
3508        if self._match(TokenType.L_PAREN):
3509            args = self._parse_csv(self._parse_conjunction)
3510            this = exp.If.from_arg_list(args)
3511            self.validate_expression(this, args)
3512            self._match_r_paren()
3513        else:
3514            index = self._index - 1
3515            condition = self._parse_conjunction()
3516
3517            if not condition:
3518                self._retreat(index)
3519                return None
3520
3521            self._match(TokenType.THEN)
3522            true = self._parse_conjunction()
3523            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3524            self._match(TokenType.END)
3525            this = self.expression(exp.If, this=condition, true=true, false=false)
3526
3527        return self._parse_window(this)
3528
3529    def _parse_extract(self) -> exp.Expression:
3530        this = self._parse_function() or self._parse_var() or self._parse_type()
3531
3532        if self._match(TokenType.FROM):
3533            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3534
3535        if not self._match(TokenType.COMMA):
3536            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3537
3538        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3539
3540    def _parse_cast(self, strict: bool) -> exp.Expression:
3541        this = self._parse_conjunction()
3542
3543        if not self._match(TokenType.ALIAS):
3544            if self._match(TokenType.COMMA):
3545                return self.expression(
3546                    exp.CastToStrType, this=this, expression=self._parse_string()
3547                )
3548            else:
3549                self.raise_error("Expected AS after CAST")
3550
3551        to = self._parse_types()
3552
3553        if not to:
3554            self.raise_error("Expected TYPE after CAST")
3555        elif to.this == exp.DataType.Type.CHAR:
3556            if self._match(TokenType.CHARACTER_SET):
3557                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3558
3559        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3560
3561    def _parse_string_agg(self) -> exp.Expression:
3562        expression: t.Optional[exp.Expression]
3563
3564        if self._match(TokenType.DISTINCT):
3565            args = self._parse_csv(self._parse_conjunction)
3566            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3567        else:
3568            args = self._parse_csv(self._parse_conjunction)
3569            expression = seq_get(args, 0)
3570
3571        index = self._index
3572        if not self._match(TokenType.R_PAREN):
3573            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3574            order = self._parse_order(this=expression)
3575            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3576
3577        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3578        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3579        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3580        if not self._match_text_seq("WITHIN", "GROUP"):
3581            self._retreat(index)
3582            this = exp.GroupConcat.from_arg_list(args)
3583            self.validate_expression(this, args)
3584            return this
3585
3586        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3587        order = self._parse_order(this=expression)
3588        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3589
3590    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3591        to: t.Optional[exp.Expression]
3592        this = self._parse_bitwise()
3593
3594        if self._match(TokenType.USING):
3595            to = self.expression(exp.CharacterSet, this=self._parse_var())
3596        elif self._match(TokenType.COMMA):
3597            to = self._parse_bitwise()
3598        else:
3599            to = None
3600
3601        # Swap the argument order if needed to produce the correct AST
3602        if self.CONVERT_TYPE_FIRST:
3603            this, to = to, this
3604
3605        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3606
3607    def _parse_decode(self) -> t.Optional[exp.Expression]:
3608        """
3609        There are generally two variants of the DECODE function:
3610
3611        - DECODE(bin, charset)
3612        - DECODE(expression, search, result [, search, result] ... [, default])
3613
3614        The second variant will always be parsed into a CASE expression. Note that NULL
3615        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3616        instead of relying on pattern matching.
3617        """
3618        args = self._parse_csv(self._parse_conjunction)
3619
3620        if len(args) < 3:
3621            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3622
3623        expression, *expressions = args
3624        if not expression:
3625            return None
3626
3627        ifs = []
3628        for search, result in zip(expressions[::2], expressions[1::2]):
3629            if not search or not result:
3630                return None
3631
3632            if isinstance(search, exp.Literal):
3633                ifs.append(
3634                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3635                )
3636            elif isinstance(search, exp.Null):
3637                ifs.append(
3638                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3639                )
3640            else:
3641                cond = exp.or_(
3642                    exp.EQ(this=expression.copy(), expression=search),
3643                    exp.and_(
3644                        exp.Is(this=expression.copy(), expression=exp.Null()),
3645                        exp.Is(this=search.copy(), expression=exp.Null()),
3646                        copy=False,
3647                    ),
3648                    copy=False,
3649                )
3650                ifs.append(exp.If(this=cond, true=result))
3651
3652        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3653
3654    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3655        self._match_text_seq("KEY")
3656        key = self._parse_field()
3657        self._match(TokenType.COLON)
3658        self._match_text_seq("VALUE")
3659        value = self._parse_field()
3660        if not key and not value:
3661            return None
3662        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3663
3664    def _parse_json_object(self) -> exp.Expression:
3665        expressions = self._parse_csv(self._parse_json_key_value)
3666
3667        null_handling = None
3668        if self._match_text_seq("NULL", "ON", "NULL"):
3669            null_handling = "NULL ON NULL"
3670        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3671            null_handling = "ABSENT ON NULL"
3672
3673        unique_keys = None
3674        if self._match_text_seq("WITH", "UNIQUE"):
3675            unique_keys = True
3676        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3677            unique_keys = False
3678
3679        self._match_text_seq("KEYS")
3680
3681        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3682        format_json = self._match_text_seq("FORMAT", "JSON")
3683        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3684
3685        return self.expression(
3686            exp.JSONObject,
3687            expressions=expressions,
3688            null_handling=null_handling,
3689            unique_keys=unique_keys,
3690            return_type=return_type,
3691            format_json=format_json,
3692            encoding=encoding,
3693        )
3694
3695    def _parse_logarithm(self) -> exp.Expression:
3696        # Default argument order is base, expression
3697        args = self._parse_csv(self._parse_range)
3698
3699        if len(args) > 1:
3700            if not self.LOG_BASE_FIRST:
3701                args.reverse()
3702            return exp.Log.from_arg_list(args)
3703
3704        return self.expression(
3705            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3706        )
3707
3708    def _parse_match_against(self) -> exp.Expression:
3709        expressions = self._parse_csv(self._parse_column)
3710
3711        self._match_text_seq(")", "AGAINST", "(")
3712
3713        this = self._parse_string()
3714
3715        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3716            modifier = "IN NATURAL LANGUAGE MODE"
3717            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3718                modifier = f"{modifier} WITH QUERY EXPANSION"
3719        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3720            modifier = "IN BOOLEAN MODE"
3721        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3722            modifier = "WITH QUERY EXPANSION"
3723        else:
3724            modifier = None
3725
3726        return self.expression(
3727            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3728        )
3729
3730    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3731    def _parse_open_json(self) -> exp.Expression:
3732        this = self._parse_bitwise()
3733        path = self._match(TokenType.COMMA) and self._parse_string()
3734
3735        def _parse_open_json_column_def() -> exp.Expression:
3736            this = self._parse_field(any_token=True)
3737            kind = self._parse_types()
3738            path = self._parse_string()
3739            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3740            return self.expression(
3741                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3742            )
3743
3744        expressions = None
3745        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3746            self._match_l_paren()
3747            expressions = self._parse_csv(_parse_open_json_column_def)
3748
3749        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3750
3751    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3752        args = self._parse_csv(self._parse_bitwise)
3753
3754        if self._match(TokenType.IN):
3755            return self.expression(
3756                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3757            )
3758
3759        if haystack_first:
3760            haystack = seq_get(args, 0)
3761            needle = seq_get(args, 1)
3762        else:
3763            needle = seq_get(args, 0)
3764            haystack = seq_get(args, 1)
3765
3766        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3767
3768        self.validate_expression(this, args)
3769
3770        return this
3771
3772    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3773        args = self._parse_csv(self._parse_table)
3774        return exp.JoinHint(this=func_name.upper(), expressions=args)
3775
3776    def _parse_substring(self) -> exp.Expression:
3777        # Postgres supports the form: substring(string [from int] [for int])
3778        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3779
3780        args = self._parse_csv(self._parse_bitwise)
3781
3782        if self._match(TokenType.FROM):
3783            args.append(self._parse_bitwise())
3784            if self._match(TokenType.FOR):
3785                args.append(self._parse_bitwise())
3786
3787        this = exp.Substring.from_arg_list(args)
3788        self.validate_expression(this, args)
3789
3790        return this
3791
3792    def _parse_struct(self) -> exp.Struct:
3793        return exp.Struct.from_arg_list(self._parse_csv(lambda: self._parse_lambda(alias=True)))
3794
3795    def _parse_trim(self) -> exp.Expression:
3796        # https://www.w3resource.com/sql/character-functions/trim.php
3797        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3798
3799        position = None
3800        collation = None
3801
3802        if self._match_texts(self.TRIM_TYPES):
3803            position = self._prev.text.upper()
3804
3805        expression = self._parse_bitwise()
3806        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3807            this = self._parse_bitwise()
3808        else:
3809            this = expression
3810            expression = None
3811
3812        if self._match(TokenType.COLLATE):
3813            collation = self._parse_bitwise()
3814
3815        return self.expression(
3816            exp.Trim,
3817            this=this,
3818            position=position,
3819            expression=expression,
3820            collation=collation,
3821        )
3822
3823    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3824        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3825
3826    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3827        return self._parse_window(self._parse_id_var(), alias=True)
3828
3829    def _parse_respect_or_ignore_nulls(
3830        self, this: t.Optional[exp.Expression]
3831    ) -> t.Optional[exp.Expression]:
3832        if self._match_text_seq("IGNORE", "NULLS"):
3833            return self.expression(exp.IgnoreNulls, this=this)
3834        if self._match_text_seq("RESPECT", "NULLS"):
3835            return self.expression(exp.RespectNulls, this=this)
3836        return this
3837
3838    def _parse_window(
3839        self, this: t.Optional[exp.Expression], alias: bool = False
3840    ) -> t.Optional[exp.Expression]:
3841        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3842            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3843            self._match_r_paren()
3844
3845        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3846        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3847        if self._match_text_seq("WITHIN", "GROUP"):
3848            order = self._parse_wrapped(self._parse_order)
3849            this = self.expression(exp.WithinGroup, this=this, expression=order)
3850
3851        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3852        # Some dialects choose to implement and some do not.
3853        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3854
3855        # There is some code above in _parse_lambda that handles
3856        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3857
3858        # The below changes handle
3859        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3860
3861        # Oracle allows both formats
3862        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3863        #   and Snowflake chose to do the same for familiarity
3864        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3865        this = self._parse_respect_or_ignore_nulls(this)
3866
3867        # bigquery select from window x AS (partition by ...)
3868        if alias:
3869            over = None
3870            self._match(TokenType.ALIAS)
3871        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3872            return this
3873        else:
3874            over = self._prev.text.upper()
3875
3876        if not self._match(TokenType.L_PAREN):
3877            return self.expression(
3878                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3879            )
3880
3881        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3882
3883        first = self._match(TokenType.FIRST)
3884        if self._match_text_seq("LAST"):
3885            first = False
3886
3887        partition = self._parse_partition_by()
3888        order = self._parse_order()
3889        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3890
3891        if kind:
3892            self._match(TokenType.BETWEEN)
3893            start = self._parse_window_spec()
3894            self._match(TokenType.AND)
3895            end = self._parse_window_spec()
3896
3897            spec = self.expression(
3898                exp.WindowSpec,
3899                kind=kind,
3900                start=start["value"],
3901                start_side=start["side"],
3902                end=end["value"],
3903                end_side=end["side"],
3904            )
3905        else:
3906            spec = None
3907
3908        self._match_r_paren()
3909
3910        return self.expression(
3911            exp.Window,
3912            this=this,
3913            partition_by=partition,
3914            order=order,
3915            spec=spec,
3916            alias=window_alias,
3917            over=over,
3918            first=first,
3919        )
3920
3921    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3922        self._match(TokenType.BETWEEN)
3923
3924        return {
3925            "value": (
3926                (self._match_text_seq("UNBOUNDED") and "UNBOUNDED")
3927                or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW")
3928                or self._parse_bitwise()
3929            ),
3930            "side": self._match_texts(("PRECEDING", "FOLLOWING")) and self._prev.text,
3931        }
3932
3933    def _parse_alias(
3934        self, this: t.Optional[exp.Expression], explicit: bool = False
3935    ) -> t.Optional[exp.Expression]:
3936        any_token = self._match(TokenType.ALIAS)
3937
3938        if explicit and not any_token:
3939            return this
3940
3941        if self._match(TokenType.L_PAREN):
3942            aliases = self.expression(
3943                exp.Aliases,
3944                this=this,
3945                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3946            )
3947            self._match_r_paren(aliases)
3948            return aliases
3949
3950        alias = self._parse_id_var(any_token)
3951
3952        if alias:
3953            return self.expression(exp.Alias, this=this, alias=alias)
3954
3955        return this
3956
3957    def _parse_id_var(
3958        self,
3959        any_token: bool = True,
3960        tokens: t.Optional[t.Collection[TokenType]] = None,
3961        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3962    ) -> t.Optional[exp.Expression]:
3963        identifier = self._parse_identifier()
3964
3965        if identifier:
3966            return identifier
3967
3968        prefix = ""
3969
3970        if prefix_tokens:
3971            while self._match_set(prefix_tokens):
3972                prefix += self._prev.text
3973
3974        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3975            quoted = self._prev.token_type == TokenType.STRING
3976            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3977
3978        return None
3979
3980    def _parse_string(self) -> t.Optional[exp.Expression]:
3981        if self._match(TokenType.STRING):
3982            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3983        return self._parse_placeholder()
3984
3985    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
3986        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
3987
3988    def _parse_number(self) -> t.Optional[exp.Expression]:
3989        if self._match(TokenType.NUMBER):
3990            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3991        return self._parse_placeholder()
3992
3993    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3994        if self._match(TokenType.IDENTIFIER):
3995            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3996        return self._parse_placeholder()
3997
3998    def _parse_var(
3999        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4000    ) -> t.Optional[exp.Expression]:
4001        if (
4002            (any_token and self._advance_any())
4003            or self._match(TokenType.VAR)
4004            or (self._match_set(tokens) if tokens else False)
4005        ):
4006            return self.expression(exp.Var, this=self._prev.text)
4007        return self._parse_placeholder()
4008
4009    def _advance_any(self) -> t.Optional[Token]:
4010        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4011            self._advance()
4012            return self._prev
4013        return None
4014
4015    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4016        return self._parse_var() or self._parse_string()
4017
4018    def _parse_null(self) -> t.Optional[exp.Expression]:
4019        if self._match(TokenType.NULL):
4020            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4021        return None
4022
4023    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4024        if self._match(TokenType.TRUE):
4025            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4026        if self._match(TokenType.FALSE):
4027            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4028        return None
4029
4030    def _parse_star(self) -> t.Optional[exp.Expression]:
4031        if self._match(TokenType.STAR):
4032            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4033        return None
4034
4035    def _parse_parameter(self) -> exp.Expression:
4036        wrapped = self._match(TokenType.L_BRACE)
4037        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4038        self._match(TokenType.R_BRACE)
4039        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4040
4041    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4042        if self._match_set(self.PLACEHOLDER_PARSERS):
4043            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4044            if placeholder:
4045                return placeholder
4046            self._advance(-1)
4047        return None
4048
4049    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4050        if not self._match(TokenType.EXCEPT):
4051            return None
4052        if self._match(TokenType.L_PAREN, advance=False):
4053            return self._parse_wrapped_csv(self._parse_column)
4054        return self._parse_csv(self._parse_column)
4055
4056    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4057        if not self._match(TokenType.REPLACE):
4058            return None
4059        if self._match(TokenType.L_PAREN, advance=False):
4060            return self._parse_wrapped_csv(self._parse_expression)
4061        return self._parse_csv(self._parse_expression)
4062
4063    def _parse_csv(
4064        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4065    ) -> t.List[t.Optional[exp.Expression]]:
4066        parse_result = parse_method()
4067        items = [parse_result] if parse_result is not None else []
4068
4069        while self._match(sep):
4070            self._add_comments(parse_result)
4071            parse_result = parse_method()
4072            if parse_result is not None:
4073                items.append(parse_result)
4074
4075        return items
4076
4077    def _parse_tokens(
4078        self, parse_method: t.Callable, expressions: t.Dict
4079    ) -> t.Optional[exp.Expression]:
4080        this = parse_method()
4081
4082        while self._match_set(expressions):
4083            this = self.expression(
4084                expressions[self._prev.token_type],
4085                this=this,
4086                comments=self._prev_comments,
4087                expression=parse_method(),
4088            )
4089
4090        return this
4091
4092    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4093        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4094
4095    def _parse_wrapped_csv(
4096        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4097    ) -> t.List[t.Optional[exp.Expression]]:
4098        return self._parse_wrapped(
4099            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4100        )
4101
4102    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4103        wrapped = self._match(TokenType.L_PAREN)
4104        if not wrapped and not optional:
4105            self.raise_error("Expecting (")
4106        parse_result = parse_method()
4107        if wrapped:
4108            self._match_r_paren()
4109        return parse_result
4110
4111    def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]:
4112        return self._parse_select() or self._parse_set_operations(
4113            self._parse_expression() if alias else self._parse_conjunction()
4114        )
4115
4116    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4117        return self._parse_set_operations(
4118            self._parse_select(nested=True, parse_subquery_alias=False)
4119        )
4120
4121    def _parse_transaction(self) -> exp.Expression:
4122        this = None
4123        if self._match_texts(self.TRANSACTION_KIND):
4124            this = self._prev.text
4125
4126        self._match_texts({"TRANSACTION", "WORK"})
4127
4128        modes = []
4129        while True:
4130            mode = []
4131            while self._match(TokenType.VAR):
4132                mode.append(self._prev.text)
4133
4134            if mode:
4135                modes.append(" ".join(mode))
4136            if not self._match(TokenType.COMMA):
4137                break
4138
4139        return self.expression(exp.Transaction, this=this, modes=modes)
4140
4141    def _parse_commit_or_rollback(self) -> exp.Expression:
4142        chain = None
4143        savepoint = None
4144        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4145
4146        self._match_texts({"TRANSACTION", "WORK"})
4147
4148        if self._match_text_seq("TO"):
4149            self._match_text_seq("SAVEPOINT")
4150            savepoint = self._parse_id_var()
4151
4152        if self._match(TokenType.AND):
4153            chain = not self._match_text_seq("NO")
4154            self._match_text_seq("CHAIN")
4155
4156        if is_rollback:
4157            return self.expression(exp.Rollback, savepoint=savepoint)
4158        return self.expression(exp.Commit, chain=chain)
4159
4160    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4161        if not self._match_text_seq("ADD"):
4162            return None
4163
4164        self._match(TokenType.COLUMN)
4165        exists_column = self._parse_exists(not_=True)
4166        expression = self._parse_column_def(self._parse_field(any_token=True))
4167
4168        if expression:
4169            expression.set("exists", exists_column)
4170
4171            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4172            if self._match_texts(("FIRST", "AFTER")):
4173                position = self._prev.text
4174                column_position = self.expression(
4175                    exp.ColumnPosition, this=self._parse_column(), position=position
4176                )
4177                expression.set("position", column_position)
4178
4179        return expression
4180
4181    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4182        drop = self._match(TokenType.DROP) and self._parse_drop()
4183        if drop and not isinstance(drop, exp.Command):
4184            drop.set("kind", drop.args.get("kind", "COLUMN"))
4185        return drop
4186
4187    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4188    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4189        return self.expression(
4190            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4191        )
4192
4193    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4194        this = None
4195        kind = self._prev.token_type
4196
4197        if kind == TokenType.CONSTRAINT:
4198            this = self._parse_id_var()
4199
4200            if self._match_text_seq("CHECK"):
4201                expression = self._parse_wrapped(self._parse_conjunction)
4202                enforced = self._match_text_seq("ENFORCED")
4203
4204                return self.expression(
4205                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4206                )
4207
4208        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4209            expression = self._parse_foreign_key()
4210        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4211            expression = self._parse_primary_key()
4212        else:
4213            expression = None
4214
4215        return self.expression(exp.AddConstraint, this=this, expression=expression)
4216
4217    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4218        index = self._index - 1
4219
4220        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4221            return self._parse_csv(self._parse_add_constraint)
4222
4223        self._retreat(index)
4224        return self._parse_csv(self._parse_add_column)
4225
4226    def _parse_alter_table_alter(self) -> exp.Expression:
4227        self._match(TokenType.COLUMN)
4228        column = self._parse_field(any_token=True)
4229
4230        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4231            return self.expression(exp.AlterColumn, this=column, drop=True)
4232        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4233            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4234
4235        self._match_text_seq("SET", "DATA")
4236        return self.expression(
4237            exp.AlterColumn,
4238            this=column,
4239            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4240            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4241            using=self._match(TokenType.USING) and self._parse_conjunction(),
4242        )
4243
4244    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4245        index = self._index - 1
4246
4247        partition_exists = self._parse_exists()
4248        if self._match(TokenType.PARTITION, advance=False):
4249            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4250
4251        self._retreat(index)
4252        return self._parse_csv(self._parse_drop_column)
4253
4254    def _parse_alter_table_rename(self) -> exp.Expression:
4255        self._match_text_seq("TO")
4256        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4257
4258    def _parse_alter(self) -> t.Optional[exp.Expression]:
4259        start = self._prev
4260
4261        if not self._match(TokenType.TABLE):
4262            return self._parse_as_command(start)
4263
4264        exists = self._parse_exists()
4265        this = self._parse_table(schema=True)
4266
4267        if self._next:
4268            self._advance()
4269        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4270
4271        if parser:
4272            actions = ensure_list(parser(self))
4273
4274            if not self._curr:
4275                return self.expression(
4276                    exp.AlterTable,
4277                    this=this,
4278                    exists=exists,
4279                    actions=actions,
4280                )
4281        return self._parse_as_command(start)
4282
4283    def _parse_merge(self) -> exp.Expression:
4284        self._match(TokenType.INTO)
4285        target = self._parse_table()
4286
4287        self._match(TokenType.USING)
4288        using = self._parse_table()
4289
4290        self._match(TokenType.ON)
4291        on = self._parse_conjunction()
4292
4293        whens = []
4294        while self._match(TokenType.WHEN):
4295            matched = not self._match(TokenType.NOT)
4296            self._match_text_seq("MATCHED")
4297            source = (
4298                False
4299                if self._match_text_seq("BY", "TARGET")
4300                else self._match_text_seq("BY", "SOURCE")
4301            )
4302            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4303
4304            self._match(TokenType.THEN)
4305
4306            if self._match(TokenType.INSERT):
4307                _this = self._parse_star()
4308                if _this:
4309                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4310                else:
4311                    then = self.expression(
4312                        exp.Insert,
4313                        this=self._parse_value(),
4314                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4315                    )
4316            elif self._match(TokenType.UPDATE):
4317                expressions = self._parse_star()
4318                if expressions:
4319                    then = self.expression(exp.Update, expressions=expressions)
4320                else:
4321                    then = self.expression(
4322                        exp.Update,
4323                        expressions=self._match(TokenType.SET)
4324                        and self._parse_csv(self._parse_equality),
4325                    )
4326            elif self._match(TokenType.DELETE):
4327                then = self.expression(exp.Var, this=self._prev.text)
4328            else:
4329                then = None
4330
4331            whens.append(
4332                self.expression(
4333                    exp.When,
4334                    matched=matched,
4335                    source=source,
4336                    condition=condition,
4337                    then=then,
4338                )
4339            )
4340
4341        return self.expression(
4342            exp.Merge,
4343            this=target,
4344            using=using,
4345            on=on,
4346            expressions=whens,
4347        )
4348
4349    def _parse_show(self) -> t.Optional[exp.Expression]:
4350        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4351        if parser:
4352            return parser(self)
4353        self._advance()
4354        return self.expression(exp.Show, this=self._prev.text.upper())
4355
4356    def _parse_set_item_assignment(
4357        self, kind: t.Optional[str] = None
4358    ) -> t.Optional[exp.Expression]:
4359        index = self._index
4360
4361        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4362            return self._parse_set_transaction(global_=kind == "GLOBAL")
4363
4364        left = self._parse_primary() or self._parse_id_var()
4365
4366        if not self._match_texts(("=", "TO")):
4367            self._retreat(index)
4368            return None
4369
4370        right = self._parse_statement() or self._parse_id_var()
4371        this = self.expression(
4372            exp.EQ,
4373            this=left,
4374            expression=right,
4375        )
4376
4377        return self.expression(
4378            exp.SetItem,
4379            this=this,
4380            kind=kind,
4381        )
4382
4383    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4384        self._match_text_seq("TRANSACTION")
4385        characteristics = self._parse_csv(
4386            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4387        )
4388        return self.expression(
4389            exp.SetItem,
4390            expressions=characteristics,
4391            kind="TRANSACTION",
4392            **{"global": global_},  # type: ignore
4393        )
4394
4395    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4396        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4397        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4398
4399    def _parse_set(self) -> exp.Expression:
4400        index = self._index
4401        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4402
4403        if self._curr:
4404            self._retreat(index)
4405            return self._parse_as_command(self._prev)
4406
4407        return set_
4408
4409    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4410        for option in options:
4411            if self._match_text_seq(*option.split(" ")):
4412                return exp.Var(this=option)
4413        return None
4414
4415    def _parse_as_command(self, start: Token) -> exp.Command:
4416        while self._curr:
4417            self._advance()
4418        text = self._find_sql(start, self._prev)
4419        size = len(start.text)
4420        return exp.Command(this=text[:size], expression=text[size:])
4421
4422    def _find_parser(
4423        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4424    ) -> t.Optional[t.Callable]:
4425        if not self._curr:
4426            return None
4427
4428        index = self._index
4429        this = []
4430        while True:
4431            # The current token might be multiple words
4432            curr = self._curr.text.upper()
4433            key = curr.split(" ")
4434            this.append(curr)
4435            self._advance()
4436            result, trie = in_trie(trie, key)
4437            if result == 0:
4438                break
4439            if result == 2:
4440                subparser = parsers[" ".join(this)]
4441                return subparser
4442        self._retreat(index)
4443        return None
4444
4445    def _match(self, token_type, advance=True, expression=None):
4446        if not self._curr:
4447            return None
4448
4449        if self._curr.token_type == token_type:
4450            if advance:
4451                self._advance()
4452            self._add_comments(expression)
4453            return True
4454
4455        return None
4456
4457    def _match_set(self, types, advance=True):
4458        if not self._curr:
4459            return None
4460
4461        if self._curr.token_type in types:
4462            if advance:
4463                self._advance()
4464            return True
4465
4466        return None
4467
4468    def _match_pair(self, token_type_a, token_type_b, advance=True):
4469        if not self._curr or not self._next:
4470            return None
4471
4472        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4473            if advance:
4474                self._advance(2)
4475            return True
4476
4477        return None
4478
4479    def _match_l_paren(self, expression=None):
4480        if not self._match(TokenType.L_PAREN, expression=expression):
4481            self.raise_error("Expecting (")
4482
4483    def _match_r_paren(self, expression=None):
4484        if not self._match(TokenType.R_PAREN, expression=expression):
4485            self.raise_error("Expecting )")
4486
4487    def _match_texts(self, texts, advance=True):
4488        if self._curr and self._curr.text.upper() in texts:
4489            if advance:
4490                self._advance()
4491            return True
4492        return False
4493
4494    def _match_text_seq(self, *texts, advance=True):
4495        index = self._index
4496        for text in texts:
4497            if self._curr and self._curr.text.upper() == text:
4498                self._advance()
4499            else:
4500                self._retreat(index)
4501                return False
4502
4503        if not advance:
4504            self._retreat(index)
4505
4506        return True
4507
4508    def _replace_columns_with_dots(self, this):
4509        if isinstance(this, exp.Dot):
4510            exp.replace_children(this, self._replace_columns_with_dots)
4511        elif isinstance(this, exp.Column):
4512            exp.replace_children(this, self._replace_columns_with_dots)
4513            table = this.args.get("table")
4514            this = (
4515                self.expression(exp.Dot, this=table, expression=this.this)
4516                if table
4517                else self.expression(exp.Var, this=this.name)
4518            )
4519        elif isinstance(this, exp.Identifier):
4520            this = self.expression(exp.Var, this=this.name)
4521        return this
4522
4523    def _replace_lambda(self, node, lambda_variables):
4524        for column in node.find_all(exp.Column):
4525            if column.parts[0].name in lambda_variables:
4526                dot_or_id = column.to_dot() if column.table else column.this
4527                parent = column.parent
4528
4529                while isinstance(parent, exp.Dot):
4530                    if not isinstance(parent.parent, exp.Dot):
4531                        parent.replace(dot_or_id)
4532                        break
4533                    parent = parent.parent
4534                else:
4535                    if column is node:
4536                        node = dot_or_id
4537                    else:
4538                        column.replace(dot_or_id)
4539        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
775    def __init__(
776        self,
777        error_level: t.Optional[ErrorLevel] = None,
778        error_message_context: int = 100,
779        index_offset: int = 0,
780        unnest_column_only: bool = False,
781        alias_post_tablesample: bool = False,
782        max_errors: int = 3,
783        null_ordering: t.Optional[str] = None,
784    ):
785        self.error_level = error_level or ErrorLevel.IMMEDIATE
786        self.error_message_context = error_message_context
787        self.index_offset = index_offset
788        self.unnest_column_only = unnest_column_only
789        self.alias_post_tablesample = alias_post_tablesample
790        self.max_errors = max_errors
791        self.null_ordering = null_ordering
792        self.reset()
def reset(self):
794    def reset(self):
795        self.sql = ""
796        self.errors = []
797        self._tokens = []
798        self._index = 0
799        self._curr = None
800        self._next = None
801        self._prev = None
802        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
804    def parse(
805        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
806    ) -> t.List[t.Optional[exp.Expression]]:
807        """
808        Parses a list of tokens and returns a list of syntax trees, one tree
809        per parsed SQL statement.
810
811        Args:
812            raw_tokens: the list of tokens.
813            sql: the original SQL string, used to produce helpful debug messages.
814
815        Returns:
816            The list of syntax trees.
817        """
818        return self._parse(
819            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
820        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
822    def parse_into(
823        self,
824        expression_types: exp.IntoType,
825        raw_tokens: t.List[Token],
826        sql: t.Optional[str] = None,
827    ) -> t.List[t.Optional[exp.Expression]]:
828        """
829        Parses a list of tokens into a given Expression type. If a collection of Expression
830        types is given instead, this method will try to parse the token list into each one
831        of them, stopping at the first for which the parsing succeeds.
832
833        Args:
834            expression_types: the expression type(s) to try and parse the token list into.
835            raw_tokens: the list of tokens.
836            sql: the original SQL string, used to produce helpful debug messages.
837
838        Returns:
839            The target Expression.
840        """
841        errors = []
842        for expression_type in ensure_collection(expression_types):
843            parser = self.EXPRESSION_PARSERS.get(expression_type)
844            if not parser:
845                raise TypeError(f"No parser registered for {expression_type}")
846            try:
847                return self._parse(parser, raw_tokens, sql)
848            except ParseError as e:
849                e.errors[0]["into_expression"] = expression_type
850                errors.append(e)
851        raise ParseError(
852            f"Failed to parse into {expression_types}",
853            errors=merge_errors(errors),
854        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
890    def check_errors(self) -> None:
891        """
892        Logs or raises any found errors, depending on the chosen error level setting.
893        """
894        if self.error_level == ErrorLevel.WARN:
895            for error in self.errors:
896                logger.error(str(error))
897        elif self.error_level == ErrorLevel.RAISE and self.errors:
898            raise ParseError(
899                concat_messages(self.errors, self.max_errors),
900                errors=merge_errors(self.errors),
901            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
903    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
904        """
905        Appends an error in the list of recorded errors or raises it, depending on the chosen
906        error level setting.
907        """
908        token = token or self._curr or self._prev or Token.string("")
909        start = token.start
910        end = token.end + 1
911        start_context = self.sql[max(start - self.error_message_context, 0) : start]
912        highlight = self.sql[start:end]
913        end_context = self.sql[end : end + self.error_message_context]
914
915        error = ParseError.new(
916            f"{message}. Line {token.line}, Col: {token.col}.\n"
917            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
918            description=message,
919            line=token.line,
920            col=token.col,
921            start_context=start_context,
922            highlight=highlight,
923            end_context=end_context,
924        )
925
926        if self.error_level == ErrorLevel.IMMEDIATE:
927            raise error
928
929        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[~E], comments: Optional[List[str]] = None, **kwargs) -> ~E:
931    def expression(
932        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
933    ) -> E:
934        """
935        Creates a new, validated Expression.
936
937        Args:
938            exp_class: the expression class to instantiate.
939            comments: an optional list of comments to attach to the expression.
940            kwargs: the arguments to set for the expression along with their respective values.
941
942        Returns:
943            The target expression.
944        """
945        instance = exp_class(**kwargs)
946        instance.add_comments(comments) if comments else self._add_comments(instance)
947        self.validate_expression(instance)
948        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
955    def validate_expression(
956        self, expression: exp.Expression, args: t.Optional[t.List] = None
957    ) -> None:
958        """
959        Validates an already instantiated expression, making sure that all its mandatory arguments
960        are set.
961
962        Args:
963            expression: the expression to validate.
964            args: an optional list of items that was used to instantiate the expression, if it's a Func.
965        """
966        if self.error_level == ErrorLevel.IGNORE:
967            return
968
969        for error_message in expression.error_messages(args):
970            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.