sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_safe_divide_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 sha256_sql, 25 struct_extract_sql, 26 str_position_sql, 27 timestamptrunc_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_str, 31 sequence_sql, 32) 33from sqlglot.dialects.hive import Hive 34from sqlglot.dialects.mysql import MySQL 35from sqlglot.helper import apply_index_offset, seq_get 36from sqlglot.tokens import TokenType 37from sqlglot.transforms import unqualify_columns 38 39DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 40 41 42def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 43 if isinstance(expression.this, exp.Explode): 44 return self.sql( 45 exp.Join( 46 this=exp.Unnest( 47 expressions=[expression.this.this], 48 alias=expression.args.get("alias"), 49 offset=isinstance(expression.this, exp.Posexplode), 50 ), 51 kind="cross", 52 ) 53 ) 54 return self.lateral_sql(expression) 55 56 57def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 58 regex = r"(\w)(\w*)" 59 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 60 61 62def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 63 if expression.args.get("asc") == exp.false(): 64 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 65 else: 66 comparator = None 67 return self.func("ARRAY_SORT", expression.this, comparator) 68 69 70def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 71 if isinstance(expression.parent, exp.Property): 72 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 73 return f"ARRAY[{columns}]" 74 75 if expression.parent: 76 for schema in expression.parent.find_all(exp.Schema): 77 column_defs = schema.find_all(exp.ColumnDef) 78 if column_defs and isinstance(schema.parent, exp.Property): 79 expression.expressions.extend(column_defs) 80 81 return self.schema_sql(expression) 82 83 84def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 85 self.unsupported("Presto does not support exact quantiles") 86 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 87 88 89def _str_to_time_sql( 90 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 91) -> str: 92 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 93 94 95def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 96 time_format = self.format_time(expression) 97 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 98 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 99 return self.sql( 100 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 101 ) 102 103 104def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 105 expression = ts_or_ds_add_cast(expression) 106 unit = unit_to_str(expression) 107 return self.func("DATE_ADD", unit, expression.expression, expression.this) 108 109 110def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 111 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 112 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 113 unit = unit_to_str(expression) 114 return self.func("DATE_DIFF", unit, expr, this) 115 116 117def _build_approx_percentile(args: t.List) -> exp.Expression: 118 if len(args) == 4: 119 return exp.ApproxQuantile( 120 this=seq_get(args, 0), 121 weight=seq_get(args, 1), 122 quantile=seq_get(args, 2), 123 accuracy=seq_get(args, 3), 124 ) 125 if len(args) == 3: 126 return exp.ApproxQuantile( 127 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 128 ) 129 return exp.ApproxQuantile.from_arg_list(args) 130 131 132def _build_from_unixtime(args: t.List) -> exp.Expression: 133 if len(args) == 3: 134 return exp.UnixToTime( 135 this=seq_get(args, 0), 136 hours=seq_get(args, 1), 137 minutes=seq_get(args, 2), 138 ) 139 if len(args) == 2: 140 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 141 142 return exp.UnixToTime.from_arg_list(args) 143 144 145def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 146 """ 147 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 148 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 149 they're converted into an ARBITRARY call. 150 151 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 152 """ 153 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 154 return self.function_fallback_sql(expression) 155 156 return rename_func("ARBITRARY")(self, expression) 157 158 159def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 160 scale = expression.args.get("scale") 161 timestamp = self.sql(expression, "this") 162 if scale in (None, exp.UnixToTime.SECONDS): 163 return rename_func("FROM_UNIXTIME")(self, expression) 164 165 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 166 167 168def _jsonextract_sql(self: Presto.Generator, expression: exp.JSONExtract) -> str: 169 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 170 171 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 172 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 173 if not expression.args.get("variant_extract") or is_json_extract: 174 return self.func( 175 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 176 ) 177 178 this = self.sql(expression, "this") 179 180 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 181 segments = [] 182 for path_key in expression.expression.expressions[1:]: 183 if not isinstance(path_key, exp.JSONPathKey): 184 # Cannot transpile subscripts, wildcards etc to dot notation 185 self.unsupported(f"Cannot transpile JSONPath segment '{path_key}' to ROW access") 186 continue 187 key = path_key.this 188 if not exp.SAFE_IDENTIFIER_RE.match(key): 189 key = f'"{key}"' 190 segments.append(f".{key}") 191 192 expr = "".join(segments) 193 194 return f"{this}{expr}" 195 196 197def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 198 if not expression.type: 199 from sqlglot.optimizer.annotate_types import annotate_types 200 201 annotate_types(expression, dialect=self.dialect) 202 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 203 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 204 return expression 205 206 207def _build_to_char(args: t.List) -> exp.TimeToStr: 208 fmt = seq_get(args, 1) 209 if isinstance(fmt, exp.Literal): 210 # We uppercase this to match Teradata's format mapping keys 211 fmt.set("this", fmt.this.upper()) 212 213 # We use "teradata" on purpose here, because the time formats are different in Presto. 214 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 215 return build_formatted_time(exp.TimeToStr, "teradata")(args) 216 217 218def _date_delta_sql( 219 name: str, negate_interval: bool = False 220) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 221 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 222 interval = _to_int(self, expression.expression) 223 return self.func( 224 name, 225 unit_to_str(expression), 226 interval * (-1) if negate_interval else interval, 227 expression.this, 228 ) 229 230 return _delta_sql 231 232 233class Presto(Dialect): 234 INDEX_OFFSET = 1 235 NULL_ORDERING = "nulls_are_last" 236 TIME_FORMAT = MySQL.TIME_FORMAT 237 STRICT_STRING_CONCAT = True 238 SUPPORTS_SEMI_ANTI_JOIN = False 239 TYPED_DIVISION = True 240 TABLESAMPLE_SIZE_IS_PERCENT = True 241 LOG_BASE_FIRST: t.Optional[bool] = None 242 243 TIME_MAPPING = { 244 **MySQL.TIME_MAPPING, 245 "%W": "%A", 246 } 247 248 # https://github.com/trinodb/trino/issues/17 249 # https://github.com/trinodb/trino/issues/12289 250 # https://github.com/prestodb/presto/issues/2863 251 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 252 253 # The result of certain math functions in Presto/Trino is of type 254 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 255 ANNOTATORS = { 256 **Dialect.ANNOTATORS, 257 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 258 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 259 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 260 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 261 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 262 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 263 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 264 if e.this 265 else self._set_type(e, exp.DataType.Type.DOUBLE), 266 } 267 268 class Tokenizer(tokens.Tokenizer): 269 UNICODE_STRINGS = [ 270 (prefix + q, q) 271 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 272 for prefix in ("U&", "u&") 273 ] 274 275 KEYWORDS = { 276 **tokens.Tokenizer.KEYWORDS, 277 "START": TokenType.BEGIN, 278 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 279 "ROW": TokenType.STRUCT, 280 "IPADDRESS": TokenType.IPADDRESS, 281 "IPPREFIX": TokenType.IPPREFIX, 282 "TDIGEST": TokenType.TDIGEST, 283 "HYPERLOGLOG": TokenType.HLLSKETCH, 284 } 285 KEYWORDS.pop("/*+") 286 KEYWORDS.pop("QUALIFY") 287 288 class Parser(parser.Parser): 289 VALUES_FOLLOWED_BY_PAREN = False 290 291 FUNCTIONS = { 292 **parser.Parser.FUNCTIONS, 293 "ARBITRARY": exp.AnyValue.from_arg_list, 294 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 295 "APPROX_PERCENTILE": _build_approx_percentile, 296 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 297 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 298 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 299 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 300 "CARDINALITY": exp.ArraySize.from_arg_list, 301 "CONTAINS": exp.ArrayContains.from_arg_list, 302 "DATE_ADD": lambda args: exp.DateAdd( 303 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 304 ), 305 "DATE_DIFF": lambda args: exp.DateDiff( 306 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 307 ), 308 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 309 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 310 "DATE_TRUNC": date_trunc_to_time, 311 "ELEMENT_AT": lambda args: exp.Bracket( 312 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 313 ), 314 "FROM_HEX": exp.Unhex.from_arg_list, 315 "FROM_UNIXTIME": _build_from_unixtime, 316 "FROM_UTF8": lambda args: exp.Decode( 317 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 318 ), 319 "NOW": exp.CurrentTimestamp.from_arg_list, 320 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 321 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 322 ), 323 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 324 this=seq_get(args, 0), 325 expression=seq_get(args, 1), 326 replacement=seq_get(args, 2) or exp.Literal.string(""), 327 ), 328 "ROW": exp.Struct.from_arg_list, 329 "SEQUENCE": exp.GenerateSeries.from_arg_list, 330 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 331 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 332 "STRPOS": lambda args: exp.StrPosition( 333 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 334 ), 335 "TO_CHAR": _build_to_char, 336 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 337 "TO_UTF8": lambda args: exp.Encode( 338 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 339 ), 340 "MD5": exp.MD5Digest.from_arg_list, 341 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 342 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 343 } 344 345 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 346 FUNCTION_PARSERS.pop("TRIM") 347 348 class Generator(generator.Generator): 349 INTERVAL_ALLOWS_PLURAL_FORM = False 350 JOIN_HINTS = False 351 TABLE_HINTS = False 352 QUERY_HINTS = False 353 IS_BOOL_ALLOWED = False 354 TZ_TO_WITH_TIME_ZONE = True 355 NVL2_SUPPORTED = False 356 STRUCT_DELIMITER = ("(", ")") 357 LIMIT_ONLY_LITERALS = True 358 SUPPORTS_SINGLE_ARG_CONCAT = False 359 LIKE_PROPERTY_INSIDE_SCHEMA = True 360 MULTI_ARG_DISTINCT = False 361 SUPPORTS_TO_NUMBER = False 362 HEX_FUNC = "TO_HEX" 363 PARSE_JSON_NAME = "JSON_PARSE" 364 PAD_FILL_PATTERN_IS_REQUIRED = True 365 366 PROPERTIES_LOCATION = { 367 **generator.Generator.PROPERTIES_LOCATION, 368 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 369 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 370 } 371 372 TYPE_MAPPING = { 373 **generator.Generator.TYPE_MAPPING, 374 exp.DataType.Type.INT: "INTEGER", 375 exp.DataType.Type.FLOAT: "REAL", 376 exp.DataType.Type.BINARY: "VARBINARY", 377 exp.DataType.Type.TEXT: "VARCHAR", 378 exp.DataType.Type.TIMETZ: "TIME", 379 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 380 exp.DataType.Type.STRUCT: "ROW", 381 exp.DataType.Type.DATETIME: "TIMESTAMP", 382 exp.DataType.Type.DATETIME64: "TIMESTAMP", 383 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 384 } 385 386 TRANSFORMS = { 387 **generator.Generator.TRANSFORMS, 388 exp.AnyValue: rename_func("ARBITRARY"), 389 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 390 exp.ArgMax: rename_func("MAX_BY"), 391 exp.ArgMin: rename_func("MIN_BY"), 392 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 393 exp.ArrayAny: rename_func("ANY_MATCH"), 394 exp.ArrayConcat: rename_func("CONCAT"), 395 exp.ArrayContains: rename_func("CONTAINS"), 396 exp.ArraySize: rename_func("CARDINALITY"), 397 exp.ArrayToString: rename_func("ARRAY_JOIN"), 398 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 399 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 400 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 401 exp.BitwiseLeftShift: lambda self, e: self.func( 402 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 403 ), 404 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 405 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 406 exp.BitwiseRightShift: lambda self, e: self.func( 407 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 408 ), 409 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 410 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 411 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 412 exp.DateAdd: _date_delta_sql("DATE_ADD"), 413 exp.DateDiff: lambda self, e: self.func( 414 "DATE_DIFF", unit_to_str(e), e.expression, e.this 415 ), 416 exp.DateStrToDate: datestrtodate_sql, 417 exp.DateToDi: lambda self, 418 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 419 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 420 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 421 exp.DiToDate: lambda self, 422 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 423 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 424 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 425 exp.First: _first_last_sql, 426 exp.FirstValue: _first_last_sql, 427 exp.FromTimeZone: lambda self, 428 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 429 exp.GenerateSeries: sequence_sql, 430 exp.Group: transforms.preprocess([transforms.unalias_group]), 431 exp.GroupConcat: lambda self, e: self.func( 432 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 433 ), 434 exp.If: if_sql(), 435 exp.ILike: no_ilike_sql, 436 exp.Initcap: _initcap_sql, 437 exp.JSONExtract: _jsonextract_sql, 438 exp.Last: _first_last_sql, 439 exp.LastValue: _first_last_sql, 440 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 441 exp.Lateral: _explode_to_unnest_sql, 442 exp.Left: left_to_substring_sql, 443 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 444 exp.LogicalAnd: rename_func("BOOL_AND"), 445 exp.LogicalOr: rename_func("BOOL_OR"), 446 exp.Pivot: no_pivot_sql, 447 exp.Quantile: _quantile_sql, 448 exp.RegexpExtract: regexp_extract_sql, 449 exp.Right: right_to_substring_sql, 450 exp.SafeDivide: no_safe_divide_sql, 451 exp.Schema: _schema_sql, 452 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 453 exp.Select: transforms.preprocess( 454 [ 455 transforms.eliminate_qualify, 456 transforms.eliminate_distinct_on, 457 transforms.explode_to_unnest(1), 458 transforms.eliminate_semi_and_anti_joins, 459 ] 460 ), 461 exp.SortArray: _no_sort_array, 462 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 463 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 464 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 465 exp.StrToTime: _str_to_time_sql, 466 exp.StructExtract: struct_extract_sql, 467 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 468 exp.Timestamp: no_timestamp_sql, 469 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 470 exp.TimestampTrunc: timestamptrunc_sql(), 471 exp.TimeStrToDate: timestrtotime_sql, 472 exp.TimeStrToTime: timestrtotime_sql, 473 exp.TimeStrToUnix: lambda self, e: self.func( 474 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 475 ), 476 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 477 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 478 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 479 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 480 exp.TsOrDiToDi: lambda self, 481 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 482 exp.TsOrDsAdd: _ts_or_ds_add_sql, 483 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 484 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 485 exp.Unhex: rename_func("FROM_HEX"), 486 exp.UnixToStr: lambda self, 487 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 488 exp.UnixToTime: _unix_to_time_sql, 489 exp.UnixToTimeStr: lambda self, 490 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 491 exp.VariancePop: rename_func("VAR_POP"), 492 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 493 exp.WithinGroup: transforms.preprocess( 494 [transforms.remove_within_group_for_percentiles] 495 ), 496 exp.Xor: bool_xor_sql, 497 exp.MD5Digest: rename_func("MD5"), 498 exp.SHA: rename_func("SHA1"), 499 exp.SHA2: sha256_sql, 500 } 501 502 RESERVED_KEYWORDS = { 503 "alter", 504 "and", 505 "as", 506 "between", 507 "by", 508 "case", 509 "cast", 510 "constraint", 511 "create", 512 "cross", 513 "current_time", 514 "current_timestamp", 515 "deallocate", 516 "delete", 517 "describe", 518 "distinct", 519 "drop", 520 "else", 521 "end", 522 "escape", 523 "except", 524 "execute", 525 "exists", 526 "extract", 527 "false", 528 "for", 529 "from", 530 "full", 531 "group", 532 "having", 533 "in", 534 "inner", 535 "insert", 536 "intersect", 537 "into", 538 "is", 539 "join", 540 "left", 541 "like", 542 "natural", 543 "not", 544 "null", 545 "on", 546 "or", 547 "order", 548 "outer", 549 "prepare", 550 "right", 551 "select", 552 "table", 553 "then", 554 "true", 555 "union", 556 "using", 557 "values", 558 "when", 559 "where", 560 "with", 561 } 562 563 def md5_sql(self, expression: exp.MD5) -> str: 564 this = expression.this 565 566 if not this.type: 567 from sqlglot.optimizer.annotate_types import annotate_types 568 569 this = annotate_types(this) 570 571 if this.is_type(*exp.DataType.TEXT_TYPES): 572 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 573 574 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 575 576 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 577 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 578 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 579 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 580 # which seems to be using the same time mapping as Hive, as per: 581 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 582 this = expression.this 583 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 584 value_as_timestamp = ( 585 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 586 ) 587 588 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 589 590 formatted_value = self.func( 591 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 592 ) 593 parse_with_tz = self.func( 594 "PARSE_DATETIME", 595 formatted_value, 596 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 597 ) 598 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 599 return self.func("TO_UNIXTIME", coalesced) 600 601 def bracket_sql(self, expression: exp.Bracket) -> str: 602 if expression.args.get("safe"): 603 return self.func( 604 "ELEMENT_AT", 605 expression.this, 606 seq_get( 607 apply_index_offset( 608 expression.this, 609 expression.expressions, 610 1 - expression.args.get("offset", 0), 611 ), 612 0, 613 ), 614 ) 615 return super().bracket_sql(expression) 616 617 def struct_sql(self, expression: exp.Struct) -> str: 618 from sqlglot.optimizer.annotate_types import annotate_types 619 620 expression = annotate_types(expression) 621 values: t.List[str] = [] 622 schema: t.List[str] = [] 623 unknown_type = False 624 625 for e in expression.expressions: 626 if isinstance(e, exp.PropertyEQ): 627 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 628 unknown_type = True 629 else: 630 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 631 values.append(self.sql(e, "expression")) 632 else: 633 values.append(self.sql(e)) 634 635 size = len(expression.expressions) 636 637 if not size or len(schema) != size: 638 if unknown_type: 639 self.unsupported( 640 "Cannot convert untyped key-value definitions (try annotate_types)." 641 ) 642 return self.func("ROW", *values) 643 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 644 645 def interval_sql(self, expression: exp.Interval) -> str: 646 if expression.this and expression.text("unit").upper().startswith("WEEK"): 647 return f"({expression.this.name} * INTERVAL '7' DAY)" 648 return super().interval_sql(expression) 649 650 def transaction_sql(self, expression: exp.Transaction) -> str: 651 modes = expression.args.get("modes") 652 modes = f" {', '.join(modes)}" if modes else "" 653 return f"START TRANSACTION{modes}" 654 655 def offset_limit_modifiers( 656 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 657 ) -> t.List[str]: 658 return [ 659 self.sql(expression, "offset"), 660 self.sql(limit), 661 ] 662 663 def create_sql(self, expression: exp.Create) -> str: 664 """ 665 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 666 so we need to remove them 667 """ 668 kind = expression.args["kind"] 669 schema = expression.this 670 if kind == "VIEW" and schema.expressions: 671 expression.this.set("expressions", None) 672 return super().create_sql(expression) 673 674 def delete_sql(self, expression: exp.Delete) -> str: 675 """ 676 Presto only supports DELETE FROM for a single table without an alias, so we need 677 to remove the unnecessary parts. If the original DELETE statement contains more 678 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 679 """ 680 tables = expression.args.get("tables") or [expression.this] 681 if len(tables) > 1: 682 return super().delete_sql(expression) 683 684 table = tables[0] 685 expression.set("this", table) 686 expression.set("tables", None) 687 688 if isinstance(table, exp.Table): 689 table_alias = table.args.get("alias") 690 if table_alias: 691 table_alias.pop() 692 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 693 694 return super().delete_sql(expression)
234class Presto(Dialect): 235 INDEX_OFFSET = 1 236 NULL_ORDERING = "nulls_are_last" 237 TIME_FORMAT = MySQL.TIME_FORMAT 238 STRICT_STRING_CONCAT = True 239 SUPPORTS_SEMI_ANTI_JOIN = False 240 TYPED_DIVISION = True 241 TABLESAMPLE_SIZE_IS_PERCENT = True 242 LOG_BASE_FIRST: t.Optional[bool] = None 243 244 TIME_MAPPING = { 245 **MySQL.TIME_MAPPING, 246 "%W": "%A", 247 } 248 249 # https://github.com/trinodb/trino/issues/17 250 # https://github.com/trinodb/trino/issues/12289 251 # https://github.com/prestodb/presto/issues/2863 252 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 253 254 # The result of certain math functions in Presto/Trino is of type 255 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 256 ANNOTATORS = { 257 **Dialect.ANNOTATORS, 258 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 259 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 260 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 261 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 262 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 263 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 264 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 265 if e.this 266 else self._set_type(e, exp.DataType.Type.DOUBLE), 267 } 268 269 class Tokenizer(tokens.Tokenizer): 270 UNICODE_STRINGS = [ 271 (prefix + q, q) 272 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 273 for prefix in ("U&", "u&") 274 ] 275 276 KEYWORDS = { 277 **tokens.Tokenizer.KEYWORDS, 278 "START": TokenType.BEGIN, 279 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 280 "ROW": TokenType.STRUCT, 281 "IPADDRESS": TokenType.IPADDRESS, 282 "IPPREFIX": TokenType.IPPREFIX, 283 "TDIGEST": TokenType.TDIGEST, 284 "HYPERLOGLOG": TokenType.HLLSKETCH, 285 } 286 KEYWORDS.pop("/*+") 287 KEYWORDS.pop("QUALIFY") 288 289 class Parser(parser.Parser): 290 VALUES_FOLLOWED_BY_PAREN = False 291 292 FUNCTIONS = { 293 **parser.Parser.FUNCTIONS, 294 "ARBITRARY": exp.AnyValue.from_arg_list, 295 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 296 "APPROX_PERCENTILE": _build_approx_percentile, 297 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 298 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 299 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 300 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 301 "CARDINALITY": exp.ArraySize.from_arg_list, 302 "CONTAINS": exp.ArrayContains.from_arg_list, 303 "DATE_ADD": lambda args: exp.DateAdd( 304 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 305 ), 306 "DATE_DIFF": lambda args: exp.DateDiff( 307 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 308 ), 309 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 310 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 311 "DATE_TRUNC": date_trunc_to_time, 312 "ELEMENT_AT": lambda args: exp.Bracket( 313 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 314 ), 315 "FROM_HEX": exp.Unhex.from_arg_list, 316 "FROM_UNIXTIME": _build_from_unixtime, 317 "FROM_UTF8": lambda args: exp.Decode( 318 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 319 ), 320 "NOW": exp.CurrentTimestamp.from_arg_list, 321 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 322 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 323 ), 324 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 325 this=seq_get(args, 0), 326 expression=seq_get(args, 1), 327 replacement=seq_get(args, 2) or exp.Literal.string(""), 328 ), 329 "ROW": exp.Struct.from_arg_list, 330 "SEQUENCE": exp.GenerateSeries.from_arg_list, 331 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 332 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 333 "STRPOS": lambda args: exp.StrPosition( 334 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 335 ), 336 "TO_CHAR": _build_to_char, 337 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 338 "TO_UTF8": lambda args: exp.Encode( 339 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 340 ), 341 "MD5": exp.MD5Digest.from_arg_list, 342 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 343 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 344 } 345 346 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 347 FUNCTION_PARSERS.pop("TRIM") 348 349 class Generator(generator.Generator): 350 INTERVAL_ALLOWS_PLURAL_FORM = False 351 JOIN_HINTS = False 352 TABLE_HINTS = False 353 QUERY_HINTS = False 354 IS_BOOL_ALLOWED = False 355 TZ_TO_WITH_TIME_ZONE = True 356 NVL2_SUPPORTED = False 357 STRUCT_DELIMITER = ("(", ")") 358 LIMIT_ONLY_LITERALS = True 359 SUPPORTS_SINGLE_ARG_CONCAT = False 360 LIKE_PROPERTY_INSIDE_SCHEMA = True 361 MULTI_ARG_DISTINCT = False 362 SUPPORTS_TO_NUMBER = False 363 HEX_FUNC = "TO_HEX" 364 PARSE_JSON_NAME = "JSON_PARSE" 365 PAD_FILL_PATTERN_IS_REQUIRED = True 366 367 PROPERTIES_LOCATION = { 368 **generator.Generator.PROPERTIES_LOCATION, 369 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 370 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 371 } 372 373 TYPE_MAPPING = { 374 **generator.Generator.TYPE_MAPPING, 375 exp.DataType.Type.INT: "INTEGER", 376 exp.DataType.Type.FLOAT: "REAL", 377 exp.DataType.Type.BINARY: "VARBINARY", 378 exp.DataType.Type.TEXT: "VARCHAR", 379 exp.DataType.Type.TIMETZ: "TIME", 380 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 381 exp.DataType.Type.STRUCT: "ROW", 382 exp.DataType.Type.DATETIME: "TIMESTAMP", 383 exp.DataType.Type.DATETIME64: "TIMESTAMP", 384 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 385 } 386 387 TRANSFORMS = { 388 **generator.Generator.TRANSFORMS, 389 exp.AnyValue: rename_func("ARBITRARY"), 390 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 391 exp.ArgMax: rename_func("MAX_BY"), 392 exp.ArgMin: rename_func("MIN_BY"), 393 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 394 exp.ArrayAny: rename_func("ANY_MATCH"), 395 exp.ArrayConcat: rename_func("CONCAT"), 396 exp.ArrayContains: rename_func("CONTAINS"), 397 exp.ArraySize: rename_func("CARDINALITY"), 398 exp.ArrayToString: rename_func("ARRAY_JOIN"), 399 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 400 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 401 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 402 exp.BitwiseLeftShift: lambda self, e: self.func( 403 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 404 ), 405 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 406 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 407 exp.BitwiseRightShift: lambda self, e: self.func( 408 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 409 ), 410 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 411 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 412 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 413 exp.DateAdd: _date_delta_sql("DATE_ADD"), 414 exp.DateDiff: lambda self, e: self.func( 415 "DATE_DIFF", unit_to_str(e), e.expression, e.this 416 ), 417 exp.DateStrToDate: datestrtodate_sql, 418 exp.DateToDi: lambda self, 419 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 420 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 421 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 422 exp.DiToDate: lambda self, 423 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 424 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 425 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 426 exp.First: _first_last_sql, 427 exp.FirstValue: _first_last_sql, 428 exp.FromTimeZone: lambda self, 429 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 430 exp.GenerateSeries: sequence_sql, 431 exp.Group: transforms.preprocess([transforms.unalias_group]), 432 exp.GroupConcat: lambda self, e: self.func( 433 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 434 ), 435 exp.If: if_sql(), 436 exp.ILike: no_ilike_sql, 437 exp.Initcap: _initcap_sql, 438 exp.JSONExtract: _jsonextract_sql, 439 exp.Last: _first_last_sql, 440 exp.LastValue: _first_last_sql, 441 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 442 exp.Lateral: _explode_to_unnest_sql, 443 exp.Left: left_to_substring_sql, 444 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 445 exp.LogicalAnd: rename_func("BOOL_AND"), 446 exp.LogicalOr: rename_func("BOOL_OR"), 447 exp.Pivot: no_pivot_sql, 448 exp.Quantile: _quantile_sql, 449 exp.RegexpExtract: regexp_extract_sql, 450 exp.Right: right_to_substring_sql, 451 exp.SafeDivide: no_safe_divide_sql, 452 exp.Schema: _schema_sql, 453 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 454 exp.Select: transforms.preprocess( 455 [ 456 transforms.eliminate_qualify, 457 transforms.eliminate_distinct_on, 458 transforms.explode_to_unnest(1), 459 transforms.eliminate_semi_and_anti_joins, 460 ] 461 ), 462 exp.SortArray: _no_sort_array, 463 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 464 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 465 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 466 exp.StrToTime: _str_to_time_sql, 467 exp.StructExtract: struct_extract_sql, 468 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 469 exp.Timestamp: no_timestamp_sql, 470 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 471 exp.TimestampTrunc: timestamptrunc_sql(), 472 exp.TimeStrToDate: timestrtotime_sql, 473 exp.TimeStrToTime: timestrtotime_sql, 474 exp.TimeStrToUnix: lambda self, e: self.func( 475 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 476 ), 477 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 478 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 479 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 480 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 481 exp.TsOrDiToDi: lambda self, 482 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 483 exp.TsOrDsAdd: _ts_or_ds_add_sql, 484 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 485 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 486 exp.Unhex: rename_func("FROM_HEX"), 487 exp.UnixToStr: lambda self, 488 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 489 exp.UnixToTime: _unix_to_time_sql, 490 exp.UnixToTimeStr: lambda self, 491 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 492 exp.VariancePop: rename_func("VAR_POP"), 493 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 494 exp.WithinGroup: transforms.preprocess( 495 [transforms.remove_within_group_for_percentiles] 496 ), 497 exp.Xor: bool_xor_sql, 498 exp.MD5Digest: rename_func("MD5"), 499 exp.SHA: rename_func("SHA1"), 500 exp.SHA2: sha256_sql, 501 } 502 503 RESERVED_KEYWORDS = { 504 "alter", 505 "and", 506 "as", 507 "between", 508 "by", 509 "case", 510 "cast", 511 "constraint", 512 "create", 513 "cross", 514 "current_time", 515 "current_timestamp", 516 "deallocate", 517 "delete", 518 "describe", 519 "distinct", 520 "drop", 521 "else", 522 "end", 523 "escape", 524 "except", 525 "execute", 526 "exists", 527 "extract", 528 "false", 529 "for", 530 "from", 531 "full", 532 "group", 533 "having", 534 "in", 535 "inner", 536 "insert", 537 "intersect", 538 "into", 539 "is", 540 "join", 541 "left", 542 "like", 543 "natural", 544 "not", 545 "null", 546 "on", 547 "or", 548 "order", 549 "outer", 550 "prepare", 551 "right", 552 "select", 553 "table", 554 "then", 555 "true", 556 "union", 557 "using", 558 "values", 559 "when", 560 "where", 561 "with", 562 } 563 564 def md5_sql(self, expression: exp.MD5) -> str: 565 this = expression.this 566 567 if not this.type: 568 from sqlglot.optimizer.annotate_types import annotate_types 569 570 this = annotate_types(this) 571 572 if this.is_type(*exp.DataType.TEXT_TYPES): 573 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 574 575 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 576 577 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 578 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 579 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 580 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 581 # which seems to be using the same time mapping as Hive, as per: 582 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 583 this = expression.this 584 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 585 value_as_timestamp = ( 586 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 587 ) 588 589 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 590 591 formatted_value = self.func( 592 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 593 ) 594 parse_with_tz = self.func( 595 "PARSE_DATETIME", 596 formatted_value, 597 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 598 ) 599 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 600 return self.func("TO_UNIXTIME", coalesced) 601 602 def bracket_sql(self, expression: exp.Bracket) -> str: 603 if expression.args.get("safe"): 604 return self.func( 605 "ELEMENT_AT", 606 expression.this, 607 seq_get( 608 apply_index_offset( 609 expression.this, 610 expression.expressions, 611 1 - expression.args.get("offset", 0), 612 ), 613 0, 614 ), 615 ) 616 return super().bracket_sql(expression) 617 618 def struct_sql(self, expression: exp.Struct) -> str: 619 from sqlglot.optimizer.annotate_types import annotate_types 620 621 expression = annotate_types(expression) 622 values: t.List[str] = [] 623 schema: t.List[str] = [] 624 unknown_type = False 625 626 for e in expression.expressions: 627 if isinstance(e, exp.PropertyEQ): 628 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 629 unknown_type = True 630 else: 631 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 632 values.append(self.sql(e, "expression")) 633 else: 634 values.append(self.sql(e)) 635 636 size = len(expression.expressions) 637 638 if not size or len(schema) != size: 639 if unknown_type: 640 self.unsupported( 641 "Cannot convert untyped key-value definitions (try annotate_types)." 642 ) 643 return self.func("ROW", *values) 644 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 645 646 def interval_sql(self, expression: exp.Interval) -> str: 647 if expression.this and expression.text("unit").upper().startswith("WEEK"): 648 return f"({expression.this.name} * INTERVAL '7' DAY)" 649 return super().interval_sql(expression) 650 651 def transaction_sql(self, expression: exp.Transaction) -> str: 652 modes = expression.args.get("modes") 653 modes = f" {', '.join(modes)}" if modes else "" 654 return f"START TRANSACTION{modes}" 655 656 def offset_limit_modifiers( 657 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 658 ) -> t.List[str]: 659 return [ 660 self.sql(expression, "offset"), 661 self.sql(limit), 662 ] 663 664 def create_sql(self, expression: exp.Create) -> str: 665 """ 666 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 667 so we need to remove them 668 """ 669 kind = expression.args["kind"] 670 schema = expression.this 671 if kind == "VIEW" and schema.expressions: 672 expression.this.set("expressions", None) 673 return super().create_sql(expression) 674 675 def delete_sql(self, expression: exp.Delete) -> str: 676 """ 677 Presto only supports DELETE FROM for a single table without an alias, so we need 678 to remove the unnecessary parts. If the original DELETE statement contains more 679 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 680 """ 681 tables = expression.args.get("tables") or [expression.this] 682 if len(tables) > 1: 683 return super().delete_sql(expression) 684 685 table = tables[0] 686 expression.set("this", table) 687 expression.set("tables", None) 688 689 if isinstance(table, exp.Table): 690 table_alias = table.args.get("alias") 691 if table_alias: 692 table_alias.pop() 693 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 694 695 return super().delete_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Associates this dialect's time formats with their equivalent Python strftime
formats.
Specifies the strategy according to which identifiers should be normalized.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- SUPPORTS_USER_DEFINED_TYPES
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
269 class Tokenizer(tokens.Tokenizer): 270 UNICODE_STRINGS = [ 271 (prefix + q, q) 272 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 273 for prefix in ("U&", "u&") 274 ] 275 276 KEYWORDS = { 277 **tokens.Tokenizer.KEYWORDS, 278 "START": TokenType.BEGIN, 279 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 280 "ROW": TokenType.STRUCT, 281 "IPADDRESS": TokenType.IPADDRESS, 282 "IPPREFIX": TokenType.IPPREFIX, 283 "TDIGEST": TokenType.TDIGEST, 284 "HYPERLOGLOG": TokenType.HLLSKETCH, 285 } 286 KEYWORDS.pop("/*+") 287 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
289 class Parser(parser.Parser): 290 VALUES_FOLLOWED_BY_PAREN = False 291 292 FUNCTIONS = { 293 **parser.Parser.FUNCTIONS, 294 "ARBITRARY": exp.AnyValue.from_arg_list, 295 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 296 "APPROX_PERCENTILE": _build_approx_percentile, 297 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 298 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 299 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 300 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 301 "CARDINALITY": exp.ArraySize.from_arg_list, 302 "CONTAINS": exp.ArrayContains.from_arg_list, 303 "DATE_ADD": lambda args: exp.DateAdd( 304 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 305 ), 306 "DATE_DIFF": lambda args: exp.DateDiff( 307 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 308 ), 309 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 310 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 311 "DATE_TRUNC": date_trunc_to_time, 312 "ELEMENT_AT": lambda args: exp.Bracket( 313 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 314 ), 315 "FROM_HEX": exp.Unhex.from_arg_list, 316 "FROM_UNIXTIME": _build_from_unixtime, 317 "FROM_UTF8": lambda args: exp.Decode( 318 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 319 ), 320 "NOW": exp.CurrentTimestamp.from_arg_list, 321 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 322 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 323 ), 324 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 325 this=seq_get(args, 0), 326 expression=seq_get(args, 1), 327 replacement=seq_get(args, 2) or exp.Literal.string(""), 328 ), 329 "ROW": exp.Struct.from_arg_list, 330 "SEQUENCE": exp.GenerateSeries.from_arg_list, 331 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 332 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 333 "STRPOS": lambda args: exp.StrPosition( 334 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 335 ), 336 "TO_CHAR": _build_to_char, 337 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 338 "TO_UTF8": lambda args: exp.Encode( 339 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 340 ), 341 "MD5": exp.MD5Digest.from_arg_list, 342 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 343 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 344 } 345 346 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 347 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
349 class Generator(generator.Generator): 350 INTERVAL_ALLOWS_PLURAL_FORM = False 351 JOIN_HINTS = False 352 TABLE_HINTS = False 353 QUERY_HINTS = False 354 IS_BOOL_ALLOWED = False 355 TZ_TO_WITH_TIME_ZONE = True 356 NVL2_SUPPORTED = False 357 STRUCT_DELIMITER = ("(", ")") 358 LIMIT_ONLY_LITERALS = True 359 SUPPORTS_SINGLE_ARG_CONCAT = False 360 LIKE_PROPERTY_INSIDE_SCHEMA = True 361 MULTI_ARG_DISTINCT = False 362 SUPPORTS_TO_NUMBER = False 363 HEX_FUNC = "TO_HEX" 364 PARSE_JSON_NAME = "JSON_PARSE" 365 PAD_FILL_PATTERN_IS_REQUIRED = True 366 367 PROPERTIES_LOCATION = { 368 **generator.Generator.PROPERTIES_LOCATION, 369 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 370 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 371 } 372 373 TYPE_MAPPING = { 374 **generator.Generator.TYPE_MAPPING, 375 exp.DataType.Type.INT: "INTEGER", 376 exp.DataType.Type.FLOAT: "REAL", 377 exp.DataType.Type.BINARY: "VARBINARY", 378 exp.DataType.Type.TEXT: "VARCHAR", 379 exp.DataType.Type.TIMETZ: "TIME", 380 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 381 exp.DataType.Type.STRUCT: "ROW", 382 exp.DataType.Type.DATETIME: "TIMESTAMP", 383 exp.DataType.Type.DATETIME64: "TIMESTAMP", 384 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 385 } 386 387 TRANSFORMS = { 388 **generator.Generator.TRANSFORMS, 389 exp.AnyValue: rename_func("ARBITRARY"), 390 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 391 exp.ArgMax: rename_func("MAX_BY"), 392 exp.ArgMin: rename_func("MIN_BY"), 393 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 394 exp.ArrayAny: rename_func("ANY_MATCH"), 395 exp.ArrayConcat: rename_func("CONCAT"), 396 exp.ArrayContains: rename_func("CONTAINS"), 397 exp.ArraySize: rename_func("CARDINALITY"), 398 exp.ArrayToString: rename_func("ARRAY_JOIN"), 399 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 400 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 401 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 402 exp.BitwiseLeftShift: lambda self, e: self.func( 403 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 404 ), 405 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 406 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 407 exp.BitwiseRightShift: lambda self, e: self.func( 408 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 409 ), 410 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 411 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 412 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 413 exp.DateAdd: _date_delta_sql("DATE_ADD"), 414 exp.DateDiff: lambda self, e: self.func( 415 "DATE_DIFF", unit_to_str(e), e.expression, e.this 416 ), 417 exp.DateStrToDate: datestrtodate_sql, 418 exp.DateToDi: lambda self, 419 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 420 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 421 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 422 exp.DiToDate: lambda self, 423 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 424 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 425 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 426 exp.First: _first_last_sql, 427 exp.FirstValue: _first_last_sql, 428 exp.FromTimeZone: lambda self, 429 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 430 exp.GenerateSeries: sequence_sql, 431 exp.Group: transforms.preprocess([transforms.unalias_group]), 432 exp.GroupConcat: lambda self, e: self.func( 433 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 434 ), 435 exp.If: if_sql(), 436 exp.ILike: no_ilike_sql, 437 exp.Initcap: _initcap_sql, 438 exp.JSONExtract: _jsonextract_sql, 439 exp.Last: _first_last_sql, 440 exp.LastValue: _first_last_sql, 441 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 442 exp.Lateral: _explode_to_unnest_sql, 443 exp.Left: left_to_substring_sql, 444 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 445 exp.LogicalAnd: rename_func("BOOL_AND"), 446 exp.LogicalOr: rename_func("BOOL_OR"), 447 exp.Pivot: no_pivot_sql, 448 exp.Quantile: _quantile_sql, 449 exp.RegexpExtract: regexp_extract_sql, 450 exp.Right: right_to_substring_sql, 451 exp.SafeDivide: no_safe_divide_sql, 452 exp.Schema: _schema_sql, 453 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 454 exp.Select: transforms.preprocess( 455 [ 456 transforms.eliminate_qualify, 457 transforms.eliminate_distinct_on, 458 transforms.explode_to_unnest(1), 459 transforms.eliminate_semi_and_anti_joins, 460 ] 461 ), 462 exp.SortArray: _no_sort_array, 463 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 464 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 465 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 466 exp.StrToTime: _str_to_time_sql, 467 exp.StructExtract: struct_extract_sql, 468 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 469 exp.Timestamp: no_timestamp_sql, 470 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 471 exp.TimestampTrunc: timestamptrunc_sql(), 472 exp.TimeStrToDate: timestrtotime_sql, 473 exp.TimeStrToTime: timestrtotime_sql, 474 exp.TimeStrToUnix: lambda self, e: self.func( 475 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 476 ), 477 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 478 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 479 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 480 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 481 exp.TsOrDiToDi: lambda self, 482 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 483 exp.TsOrDsAdd: _ts_or_ds_add_sql, 484 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 485 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 486 exp.Unhex: rename_func("FROM_HEX"), 487 exp.UnixToStr: lambda self, 488 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 489 exp.UnixToTime: _unix_to_time_sql, 490 exp.UnixToTimeStr: lambda self, 491 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 492 exp.VariancePop: rename_func("VAR_POP"), 493 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 494 exp.WithinGroup: transforms.preprocess( 495 [transforms.remove_within_group_for_percentiles] 496 ), 497 exp.Xor: bool_xor_sql, 498 exp.MD5Digest: rename_func("MD5"), 499 exp.SHA: rename_func("SHA1"), 500 exp.SHA2: sha256_sql, 501 } 502 503 RESERVED_KEYWORDS = { 504 "alter", 505 "and", 506 "as", 507 "between", 508 "by", 509 "case", 510 "cast", 511 "constraint", 512 "create", 513 "cross", 514 "current_time", 515 "current_timestamp", 516 "deallocate", 517 "delete", 518 "describe", 519 "distinct", 520 "drop", 521 "else", 522 "end", 523 "escape", 524 "except", 525 "execute", 526 "exists", 527 "extract", 528 "false", 529 "for", 530 "from", 531 "full", 532 "group", 533 "having", 534 "in", 535 "inner", 536 "insert", 537 "intersect", 538 "into", 539 "is", 540 "join", 541 "left", 542 "like", 543 "natural", 544 "not", 545 "null", 546 "on", 547 "or", 548 "order", 549 "outer", 550 "prepare", 551 "right", 552 "select", 553 "table", 554 "then", 555 "true", 556 "union", 557 "using", 558 "values", 559 "when", 560 "where", 561 "with", 562 } 563 564 def md5_sql(self, expression: exp.MD5) -> str: 565 this = expression.this 566 567 if not this.type: 568 from sqlglot.optimizer.annotate_types import annotate_types 569 570 this = annotate_types(this) 571 572 if this.is_type(*exp.DataType.TEXT_TYPES): 573 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 574 575 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 576 577 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 578 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 579 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 580 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 581 # which seems to be using the same time mapping as Hive, as per: 582 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 583 this = expression.this 584 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 585 value_as_timestamp = ( 586 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 587 ) 588 589 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 590 591 formatted_value = self.func( 592 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 593 ) 594 parse_with_tz = self.func( 595 "PARSE_DATETIME", 596 formatted_value, 597 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 598 ) 599 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 600 return self.func("TO_UNIXTIME", coalesced) 601 602 def bracket_sql(self, expression: exp.Bracket) -> str: 603 if expression.args.get("safe"): 604 return self.func( 605 "ELEMENT_AT", 606 expression.this, 607 seq_get( 608 apply_index_offset( 609 expression.this, 610 expression.expressions, 611 1 - expression.args.get("offset", 0), 612 ), 613 0, 614 ), 615 ) 616 return super().bracket_sql(expression) 617 618 def struct_sql(self, expression: exp.Struct) -> str: 619 from sqlglot.optimizer.annotate_types import annotate_types 620 621 expression = annotate_types(expression) 622 values: t.List[str] = [] 623 schema: t.List[str] = [] 624 unknown_type = False 625 626 for e in expression.expressions: 627 if isinstance(e, exp.PropertyEQ): 628 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 629 unknown_type = True 630 else: 631 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 632 values.append(self.sql(e, "expression")) 633 else: 634 values.append(self.sql(e)) 635 636 size = len(expression.expressions) 637 638 if not size or len(schema) != size: 639 if unknown_type: 640 self.unsupported( 641 "Cannot convert untyped key-value definitions (try annotate_types)." 642 ) 643 return self.func("ROW", *values) 644 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 645 646 def interval_sql(self, expression: exp.Interval) -> str: 647 if expression.this and expression.text("unit").upper().startswith("WEEK"): 648 return f"({expression.this.name} * INTERVAL '7' DAY)" 649 return super().interval_sql(expression) 650 651 def transaction_sql(self, expression: exp.Transaction) -> str: 652 modes = expression.args.get("modes") 653 modes = f" {', '.join(modes)}" if modes else "" 654 return f"START TRANSACTION{modes}" 655 656 def offset_limit_modifiers( 657 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 658 ) -> t.List[str]: 659 return [ 660 self.sql(expression, "offset"), 661 self.sql(limit), 662 ] 663 664 def create_sql(self, expression: exp.Create) -> str: 665 """ 666 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 667 so we need to remove them 668 """ 669 kind = expression.args["kind"] 670 schema = expression.this 671 if kind == "VIEW" and schema.expressions: 672 expression.this.set("expressions", None) 673 return super().create_sql(expression) 674 675 def delete_sql(self, expression: exp.Delete) -> str: 676 """ 677 Presto only supports DELETE FROM for a single table without an alias, so we need 678 to remove the unnecessary parts. If the original DELETE statement contains more 679 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 680 """ 681 tables = expression.args.get("tables") or [expression.this] 682 if len(tables) > 1: 683 return super().delete_sql(expression) 684 685 table = tables[0] 686 expression.set("this", table) 687 expression.set("tables", None) 688 689 if isinstance(table, exp.Table): 690 table_alias = table.args.get("alias") 691 if table_alias: 692 table_alias.pop() 693 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 694 695 return super().delete_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
564 def md5_sql(self, expression: exp.MD5) -> str: 565 this = expression.this 566 567 if not this.type: 568 from sqlglot.optimizer.annotate_types import annotate_types 569 570 this = annotate_types(this) 571 572 if this.is_type(*exp.DataType.TEXT_TYPES): 573 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 574 575 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
577 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 578 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 579 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 580 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 581 # which seems to be using the same time mapping as Hive, as per: 582 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 583 this = expression.this 584 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 585 value_as_timestamp = ( 586 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 587 ) 588 589 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 590 591 formatted_value = self.func( 592 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 593 ) 594 parse_with_tz = self.func( 595 "PARSE_DATETIME", 596 formatted_value, 597 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 598 ) 599 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 600 return self.func("TO_UNIXTIME", coalesced)
602 def bracket_sql(self, expression: exp.Bracket) -> str: 603 if expression.args.get("safe"): 604 return self.func( 605 "ELEMENT_AT", 606 expression.this, 607 seq_get( 608 apply_index_offset( 609 expression.this, 610 expression.expressions, 611 1 - expression.args.get("offset", 0), 612 ), 613 0, 614 ), 615 ) 616 return super().bracket_sql(expression)
618 def struct_sql(self, expression: exp.Struct) -> str: 619 from sqlglot.optimizer.annotate_types import annotate_types 620 621 expression = annotate_types(expression) 622 values: t.List[str] = [] 623 schema: t.List[str] = [] 624 unknown_type = False 625 626 for e in expression.expressions: 627 if isinstance(e, exp.PropertyEQ): 628 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 629 unknown_type = True 630 else: 631 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 632 values.append(self.sql(e, "expression")) 633 else: 634 values.append(self.sql(e)) 635 636 size = len(expression.expressions) 637 638 if not size or len(schema) != size: 639 if unknown_type: 640 self.unsupported( 641 "Cannot convert untyped key-value definitions (try annotate_types)." 642 ) 643 return self.func("ROW", *values) 644 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
664 def create_sql(self, expression: exp.Create) -> str: 665 """ 666 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 667 so we need to remove them 668 """ 669 kind = expression.args["kind"] 670 schema = expression.this 671 if kind == "VIEW" and schema.expressions: 672 expression.this.set("expressions", None) 673 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
675 def delete_sql(self, expression: exp.Delete) -> str: 676 """ 677 Presto only supports DELETE FROM for a single table without an alias, so we need 678 to remove the unnecessary parts. If the original DELETE statement contains more 679 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 680 """ 681 tables = expression.args.get("tables") or [expression.this] 682 if len(tables) > 1: 683 return super().delete_sql(expression) 684 685 table = tables[0] 686 expression.set("this", table) 687 expression.set("tables", None) 688 689 if isinstance(table, exp.Table): 690 table_alias = table.args.get("alias") 691 if table_alias: 692 table_alias.pop() 693 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 694 695 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql