sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_safe_divide_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 struct_extract_sql, 25 str_position_sql, 26 timestamptrunc_sql, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29 unit_to_str, 30) 31from sqlglot.dialects.hive import Hive 32from sqlglot.dialects.mysql import MySQL 33from sqlglot.helper import apply_index_offset, seq_get 34from sqlglot.tokens import TokenType 35 36 37def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 38 if isinstance(expression.this, exp.Explode): 39 return self.sql( 40 exp.Join( 41 this=exp.Unnest( 42 expressions=[expression.this.this], 43 alias=expression.args.get("alias"), 44 offset=isinstance(expression.this, exp.Posexplode), 45 ), 46 kind="cross", 47 ) 48 ) 49 return self.lateral_sql(expression) 50 51 52def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 53 regex = r"(\w)(\w*)" 54 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 55 56 57def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 58 if expression.args.get("asc") == exp.false(): 59 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 60 else: 61 comparator = None 62 return self.func("ARRAY_SORT", expression.this, comparator) 63 64 65def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 66 if isinstance(expression.parent, exp.Property): 67 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 68 return f"ARRAY[{columns}]" 69 70 if expression.parent: 71 for schema in expression.parent.find_all(exp.Schema): 72 column_defs = schema.find_all(exp.ColumnDef) 73 if column_defs and isinstance(schema.parent, exp.Property): 74 expression.expressions.extend(column_defs) 75 76 return self.schema_sql(expression) 77 78 79def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 80 self.unsupported("Presto does not support exact quantiles") 81 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 82 83 84def _str_to_time_sql( 85 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 86) -> str: 87 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 88 89 90def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 91 time_format = self.format_time(expression) 92 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 93 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 94 return self.sql( 95 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 96 ) 97 98 99def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 100 expression = ts_or_ds_add_cast(expression) 101 unit = unit_to_str(expression) 102 return self.func("DATE_ADD", unit, expression.expression, expression.this) 103 104 105def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 106 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 107 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 108 unit = unit_to_str(expression) 109 return self.func("DATE_DIFF", unit, expr, this) 110 111 112def _build_approx_percentile(args: t.List) -> exp.Expression: 113 if len(args) == 4: 114 return exp.ApproxQuantile( 115 this=seq_get(args, 0), 116 weight=seq_get(args, 1), 117 quantile=seq_get(args, 2), 118 accuracy=seq_get(args, 3), 119 ) 120 if len(args) == 3: 121 return exp.ApproxQuantile( 122 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 123 ) 124 return exp.ApproxQuantile.from_arg_list(args) 125 126 127def _build_from_unixtime(args: t.List) -> exp.Expression: 128 if len(args) == 3: 129 return exp.UnixToTime( 130 this=seq_get(args, 0), 131 hours=seq_get(args, 1), 132 minutes=seq_get(args, 2), 133 ) 134 if len(args) == 2: 135 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 136 137 return exp.UnixToTime.from_arg_list(args) 138 139 140def _unnest_sequence(expression: exp.Expression) -> exp.Expression: 141 if isinstance(expression, exp.Table): 142 if isinstance(expression.this, exp.GenerateSeries): 143 unnest = exp.Unnest(expressions=[expression.this]) 144 145 if expression.alias: 146 return exp.alias_(unnest, alias="_u", table=[expression.alias], copy=False) 147 return unnest 148 return expression 149 150 151def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 152 """ 153 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 154 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 155 they're converted into an ARBITRARY call. 156 157 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 158 """ 159 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 160 return self.function_fallback_sql(expression) 161 162 return rename_func("ARBITRARY")(self, expression) 163 164 165def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 166 scale = expression.args.get("scale") 167 timestamp = self.sql(expression, "this") 168 if scale in (None, exp.UnixToTime.SECONDS): 169 return rename_func("FROM_UNIXTIME")(self, expression) 170 171 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 172 173 174def _to_int(expression: exp.Expression) -> exp.Expression: 175 if not expression.type: 176 from sqlglot.optimizer.annotate_types import annotate_types 177 178 annotate_types(expression) 179 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 180 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 181 return expression 182 183 184def _build_to_char(args: t.List) -> exp.TimeToStr: 185 fmt = seq_get(args, 1) 186 if isinstance(fmt, exp.Literal): 187 # We uppercase this to match Teradata's format mapping keys 188 fmt.set("this", fmt.this.upper()) 189 190 # We use "teradata" on purpose here, because the time formats are different in Presto. 191 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 192 return build_formatted_time(exp.TimeToStr, "teradata")(args) 193 194 195class Presto(Dialect): 196 INDEX_OFFSET = 1 197 NULL_ORDERING = "nulls_are_last" 198 TIME_FORMAT = MySQL.TIME_FORMAT 199 TIME_MAPPING = MySQL.TIME_MAPPING 200 STRICT_STRING_CONCAT = True 201 SUPPORTS_SEMI_ANTI_JOIN = False 202 TYPED_DIVISION = True 203 TABLESAMPLE_SIZE_IS_PERCENT = True 204 LOG_BASE_FIRST: t.Optional[bool] = None 205 206 # https://github.com/trinodb/trino/issues/17 207 # https://github.com/trinodb/trino/issues/12289 208 # https://github.com/prestodb/presto/issues/2863 209 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 210 211 class Tokenizer(tokens.Tokenizer): 212 UNICODE_STRINGS = [ 213 (prefix + q, q) 214 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 215 for prefix in ("U&", "u&") 216 ] 217 218 KEYWORDS = { 219 **tokens.Tokenizer.KEYWORDS, 220 "START": TokenType.BEGIN, 221 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 222 "ROW": TokenType.STRUCT, 223 "IPADDRESS": TokenType.IPADDRESS, 224 "IPPREFIX": TokenType.IPPREFIX, 225 "TDIGEST": TokenType.TDIGEST, 226 "HYPERLOGLOG": TokenType.HLLSKETCH, 227 } 228 229 KEYWORDS.pop("QUALIFY") 230 231 class Parser(parser.Parser): 232 VALUES_FOLLOWED_BY_PAREN = False 233 234 FUNCTIONS = { 235 **parser.Parser.FUNCTIONS, 236 "ARBITRARY": exp.AnyValue.from_arg_list, 237 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 238 "APPROX_PERCENTILE": _build_approx_percentile, 239 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 240 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 241 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 242 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 243 "CARDINALITY": exp.ArraySize.from_arg_list, 244 "CONTAINS": exp.ArrayContains.from_arg_list, 245 "DATE_ADD": lambda args: exp.DateAdd( 246 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 247 ), 248 "DATE_DIFF": lambda args: exp.DateDiff( 249 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 250 ), 251 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 252 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 253 "DATE_TRUNC": date_trunc_to_time, 254 "ELEMENT_AT": lambda args: exp.Bracket( 255 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 256 ), 257 "FROM_HEX": exp.Unhex.from_arg_list, 258 "FROM_UNIXTIME": _build_from_unixtime, 259 "FROM_UTF8": lambda args: exp.Decode( 260 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 261 ), 262 "NOW": exp.CurrentTimestamp.from_arg_list, 263 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 264 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 265 ), 266 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 267 this=seq_get(args, 0), 268 expression=seq_get(args, 1), 269 replacement=seq_get(args, 2) or exp.Literal.string(""), 270 ), 271 "ROW": exp.Struct.from_arg_list, 272 "SEQUENCE": exp.GenerateSeries.from_arg_list, 273 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 274 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 275 "STRPOS": lambda args: exp.StrPosition( 276 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 277 ), 278 "TO_CHAR": _build_to_char, 279 "TO_HEX": exp.Hex.from_arg_list, 280 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 281 "TO_UTF8": lambda args: exp.Encode( 282 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 283 ), 284 } 285 286 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 287 FUNCTION_PARSERS.pop("TRIM") 288 289 class Generator(generator.Generator): 290 INTERVAL_ALLOWS_PLURAL_FORM = False 291 JOIN_HINTS = False 292 TABLE_HINTS = False 293 QUERY_HINTS = False 294 IS_BOOL_ALLOWED = False 295 TZ_TO_WITH_TIME_ZONE = True 296 NVL2_SUPPORTED = False 297 STRUCT_DELIMITER = ("(", ")") 298 LIMIT_ONLY_LITERALS = True 299 SUPPORTS_SINGLE_ARG_CONCAT = False 300 LIKE_PROPERTY_INSIDE_SCHEMA = True 301 MULTI_ARG_DISTINCT = False 302 SUPPORTS_TO_NUMBER = False 303 304 PROPERTIES_LOCATION = { 305 **generator.Generator.PROPERTIES_LOCATION, 306 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 307 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 308 } 309 310 TYPE_MAPPING = { 311 **generator.Generator.TYPE_MAPPING, 312 exp.DataType.Type.INT: "INTEGER", 313 exp.DataType.Type.FLOAT: "REAL", 314 exp.DataType.Type.BINARY: "VARBINARY", 315 exp.DataType.Type.TEXT: "VARCHAR", 316 exp.DataType.Type.TIMETZ: "TIME", 317 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 318 exp.DataType.Type.STRUCT: "ROW", 319 exp.DataType.Type.DATETIME: "TIMESTAMP", 320 exp.DataType.Type.DATETIME64: "TIMESTAMP", 321 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 322 } 323 324 TRANSFORMS = { 325 **generator.Generator.TRANSFORMS, 326 exp.AnyValue: rename_func("ARBITRARY"), 327 exp.ApproxDistinct: lambda self, e: self.func( 328 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 329 ), 330 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 331 exp.ArgMax: rename_func("MAX_BY"), 332 exp.ArgMin: rename_func("MIN_BY"), 333 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 334 exp.ArrayAny: rename_func("ANY_MATCH"), 335 exp.ArrayConcat: rename_func("CONCAT"), 336 exp.ArrayContains: rename_func("CONTAINS"), 337 exp.ArraySize: rename_func("CARDINALITY"), 338 exp.ArrayToString: rename_func("ARRAY_JOIN"), 339 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 340 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 341 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 342 exp.BitwiseLeftShift: lambda self, e: self.func( 343 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 344 ), 345 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 346 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 347 exp.BitwiseRightShift: lambda self, e: self.func( 348 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 349 ), 350 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 351 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 352 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 353 exp.DateAdd: lambda self, e: self.func( 354 "DATE_ADD", 355 unit_to_str(e), 356 _to_int(e.expression), 357 e.this, 358 ), 359 exp.DateDiff: lambda self, e: self.func( 360 "DATE_DIFF", unit_to_str(e), e.expression, e.this 361 ), 362 exp.DateStrToDate: datestrtodate_sql, 363 exp.DateToDi: lambda self, 364 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 365 exp.DateSub: lambda self, e: self.func( 366 "DATE_ADD", 367 unit_to_str(e), 368 _to_int(e.expression * -1), 369 e.this, 370 ), 371 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 372 exp.DiToDate: lambda self, 373 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 374 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 375 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 376 exp.First: _first_last_sql, 377 exp.FirstValue: _first_last_sql, 378 exp.FromTimeZone: lambda self, 379 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 380 exp.Group: transforms.preprocess([transforms.unalias_group]), 381 exp.GroupConcat: lambda self, e: self.func( 382 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 383 ), 384 exp.Hex: rename_func("TO_HEX"), 385 exp.If: if_sql(), 386 exp.ILike: no_ilike_sql, 387 exp.Initcap: _initcap_sql, 388 exp.ParseJSON: rename_func("JSON_PARSE"), 389 exp.Last: _first_last_sql, 390 exp.LastValue: _first_last_sql, 391 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 392 exp.Lateral: _explode_to_unnest_sql, 393 exp.Left: left_to_substring_sql, 394 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 395 exp.LogicalAnd: rename_func("BOOL_AND"), 396 exp.LogicalOr: rename_func("BOOL_OR"), 397 exp.Pivot: no_pivot_sql, 398 exp.Quantile: _quantile_sql, 399 exp.RegexpExtract: regexp_extract_sql, 400 exp.Right: right_to_substring_sql, 401 exp.SafeDivide: no_safe_divide_sql, 402 exp.Schema: _schema_sql, 403 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 404 exp.Select: transforms.preprocess( 405 [ 406 transforms.eliminate_qualify, 407 transforms.eliminate_distinct_on, 408 transforms.explode_to_unnest(1), 409 transforms.eliminate_semi_and_anti_joins, 410 ] 411 ), 412 exp.SortArray: _no_sort_array, 413 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 414 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 415 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 416 exp.StrToTime: _str_to_time_sql, 417 exp.StructExtract: struct_extract_sql, 418 exp.Table: transforms.preprocess([_unnest_sequence]), 419 exp.Timestamp: no_timestamp_sql, 420 exp.TimestampTrunc: timestamptrunc_sql, 421 exp.TimeStrToDate: timestrtotime_sql, 422 exp.TimeStrToTime: timestrtotime_sql, 423 exp.TimeStrToUnix: lambda self, e: self.func( 424 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 425 ), 426 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 427 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 428 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 429 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 430 exp.TsOrDiToDi: lambda self, 431 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 432 exp.TsOrDsAdd: _ts_or_ds_add_sql, 433 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 434 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 435 exp.Unhex: rename_func("FROM_HEX"), 436 exp.UnixToStr: lambda self, 437 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 438 exp.UnixToTime: _unix_to_time_sql, 439 exp.UnixToTimeStr: lambda self, 440 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 441 exp.VariancePop: rename_func("VAR_POP"), 442 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 443 exp.WithinGroup: transforms.preprocess( 444 [transforms.remove_within_group_for_percentiles] 445 ), 446 exp.Xor: bool_xor_sql, 447 } 448 449 RESERVED_KEYWORDS = { 450 "alter", 451 "and", 452 "as", 453 "between", 454 "by", 455 "case", 456 "cast", 457 "constraint", 458 "create", 459 "cross", 460 "current_time", 461 "current_timestamp", 462 "deallocate", 463 "delete", 464 "describe", 465 "distinct", 466 "drop", 467 "else", 468 "end", 469 "escape", 470 "except", 471 "execute", 472 "exists", 473 "extract", 474 "false", 475 "for", 476 "from", 477 "full", 478 "group", 479 "having", 480 "in", 481 "inner", 482 "insert", 483 "intersect", 484 "into", 485 "is", 486 "join", 487 "left", 488 "like", 489 "natural", 490 "not", 491 "null", 492 "on", 493 "or", 494 "order", 495 "outer", 496 "prepare", 497 "right", 498 "select", 499 "table", 500 "then", 501 "true", 502 "union", 503 "using", 504 "values", 505 "when", 506 "where", 507 "with", 508 } 509 510 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 511 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 512 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 513 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 514 # which seems to be using the same time mapping as Hive, as per: 515 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 516 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 517 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 518 parse_with_tz = self.func( 519 "PARSE_DATETIME", 520 value_as_text, 521 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 522 ) 523 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 524 return self.func("TO_UNIXTIME", coalesced) 525 526 def bracket_sql(self, expression: exp.Bracket) -> str: 527 if expression.args.get("safe"): 528 return self.func( 529 "ELEMENT_AT", 530 expression.this, 531 seq_get( 532 apply_index_offset( 533 expression.this, 534 expression.expressions, 535 1 - expression.args.get("offset", 0), 536 ), 537 0, 538 ), 539 ) 540 return super().bracket_sql(expression) 541 542 def struct_sql(self, expression: exp.Struct) -> str: 543 from sqlglot.optimizer.annotate_types import annotate_types 544 545 expression = annotate_types(expression) 546 values: t.List[str] = [] 547 schema: t.List[str] = [] 548 unknown_type = False 549 550 for e in expression.expressions: 551 if isinstance(e, exp.PropertyEQ): 552 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 553 unknown_type = True 554 else: 555 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 556 values.append(self.sql(e, "expression")) 557 else: 558 values.append(self.sql(e)) 559 560 size = len(expression.expressions) 561 562 if not size or len(schema) != size: 563 if unknown_type: 564 self.unsupported( 565 "Cannot convert untyped key-value definitions (try annotate_types)." 566 ) 567 return self.func("ROW", *values) 568 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 569 570 def interval_sql(self, expression: exp.Interval) -> str: 571 if expression.this and expression.text("unit").upper().startswith("WEEK"): 572 return f"({expression.this.name} * INTERVAL '7' DAY)" 573 return super().interval_sql(expression) 574 575 def transaction_sql(self, expression: exp.Transaction) -> str: 576 modes = expression.args.get("modes") 577 modes = f" {', '.join(modes)}" if modes else "" 578 return f"START TRANSACTION{modes}" 579 580 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 581 start = expression.args["start"] 582 end = expression.args["end"] 583 step = expression.args.get("step") 584 585 if isinstance(start, exp.Cast): 586 target_type = start.to 587 elif isinstance(end, exp.Cast): 588 target_type = end.to 589 else: 590 target_type = None 591 592 if target_type and target_type.is_type("timestamp"): 593 if target_type is start.to: 594 end = exp.cast(end, target_type) 595 else: 596 start = exp.cast(start, target_type) 597 598 return self.func("SEQUENCE", start, end, step) 599 600 def offset_limit_modifiers( 601 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 602 ) -> t.List[str]: 603 return [ 604 self.sql(expression, "offset"), 605 self.sql(limit), 606 ] 607 608 def create_sql(self, expression: exp.Create) -> str: 609 """ 610 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 611 so we need to remove them 612 """ 613 kind = expression.args["kind"] 614 schema = expression.this 615 if kind == "VIEW" and schema.expressions: 616 expression.this.set("expressions", None) 617 return super().create_sql(expression)
196class Presto(Dialect): 197 INDEX_OFFSET = 1 198 NULL_ORDERING = "nulls_are_last" 199 TIME_FORMAT = MySQL.TIME_FORMAT 200 TIME_MAPPING = MySQL.TIME_MAPPING 201 STRICT_STRING_CONCAT = True 202 SUPPORTS_SEMI_ANTI_JOIN = False 203 TYPED_DIVISION = True 204 TABLESAMPLE_SIZE_IS_PERCENT = True 205 LOG_BASE_FIRST: t.Optional[bool] = None 206 207 # https://github.com/trinodb/trino/issues/17 208 # https://github.com/trinodb/trino/issues/12289 209 # https://github.com/prestodb/presto/issues/2863 210 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 211 212 class Tokenizer(tokens.Tokenizer): 213 UNICODE_STRINGS = [ 214 (prefix + q, q) 215 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 216 for prefix in ("U&", "u&") 217 ] 218 219 KEYWORDS = { 220 **tokens.Tokenizer.KEYWORDS, 221 "START": TokenType.BEGIN, 222 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 223 "ROW": TokenType.STRUCT, 224 "IPADDRESS": TokenType.IPADDRESS, 225 "IPPREFIX": TokenType.IPPREFIX, 226 "TDIGEST": TokenType.TDIGEST, 227 "HYPERLOGLOG": TokenType.HLLSKETCH, 228 } 229 230 KEYWORDS.pop("QUALIFY") 231 232 class Parser(parser.Parser): 233 VALUES_FOLLOWED_BY_PAREN = False 234 235 FUNCTIONS = { 236 **parser.Parser.FUNCTIONS, 237 "ARBITRARY": exp.AnyValue.from_arg_list, 238 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 239 "APPROX_PERCENTILE": _build_approx_percentile, 240 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 241 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 242 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 243 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 244 "CARDINALITY": exp.ArraySize.from_arg_list, 245 "CONTAINS": exp.ArrayContains.from_arg_list, 246 "DATE_ADD": lambda args: exp.DateAdd( 247 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 248 ), 249 "DATE_DIFF": lambda args: exp.DateDiff( 250 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 251 ), 252 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 253 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 254 "DATE_TRUNC": date_trunc_to_time, 255 "ELEMENT_AT": lambda args: exp.Bracket( 256 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 257 ), 258 "FROM_HEX": exp.Unhex.from_arg_list, 259 "FROM_UNIXTIME": _build_from_unixtime, 260 "FROM_UTF8": lambda args: exp.Decode( 261 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 262 ), 263 "NOW": exp.CurrentTimestamp.from_arg_list, 264 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 265 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 266 ), 267 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 268 this=seq_get(args, 0), 269 expression=seq_get(args, 1), 270 replacement=seq_get(args, 2) or exp.Literal.string(""), 271 ), 272 "ROW": exp.Struct.from_arg_list, 273 "SEQUENCE": exp.GenerateSeries.from_arg_list, 274 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 275 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 276 "STRPOS": lambda args: exp.StrPosition( 277 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 278 ), 279 "TO_CHAR": _build_to_char, 280 "TO_HEX": exp.Hex.from_arg_list, 281 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 282 "TO_UTF8": lambda args: exp.Encode( 283 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 284 ), 285 } 286 287 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 288 FUNCTION_PARSERS.pop("TRIM") 289 290 class Generator(generator.Generator): 291 INTERVAL_ALLOWS_PLURAL_FORM = False 292 JOIN_HINTS = False 293 TABLE_HINTS = False 294 QUERY_HINTS = False 295 IS_BOOL_ALLOWED = False 296 TZ_TO_WITH_TIME_ZONE = True 297 NVL2_SUPPORTED = False 298 STRUCT_DELIMITER = ("(", ")") 299 LIMIT_ONLY_LITERALS = True 300 SUPPORTS_SINGLE_ARG_CONCAT = False 301 LIKE_PROPERTY_INSIDE_SCHEMA = True 302 MULTI_ARG_DISTINCT = False 303 SUPPORTS_TO_NUMBER = False 304 305 PROPERTIES_LOCATION = { 306 **generator.Generator.PROPERTIES_LOCATION, 307 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 308 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 309 } 310 311 TYPE_MAPPING = { 312 **generator.Generator.TYPE_MAPPING, 313 exp.DataType.Type.INT: "INTEGER", 314 exp.DataType.Type.FLOAT: "REAL", 315 exp.DataType.Type.BINARY: "VARBINARY", 316 exp.DataType.Type.TEXT: "VARCHAR", 317 exp.DataType.Type.TIMETZ: "TIME", 318 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 319 exp.DataType.Type.STRUCT: "ROW", 320 exp.DataType.Type.DATETIME: "TIMESTAMP", 321 exp.DataType.Type.DATETIME64: "TIMESTAMP", 322 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 323 } 324 325 TRANSFORMS = { 326 **generator.Generator.TRANSFORMS, 327 exp.AnyValue: rename_func("ARBITRARY"), 328 exp.ApproxDistinct: lambda self, e: self.func( 329 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 330 ), 331 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 332 exp.ArgMax: rename_func("MAX_BY"), 333 exp.ArgMin: rename_func("MIN_BY"), 334 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 335 exp.ArrayAny: rename_func("ANY_MATCH"), 336 exp.ArrayConcat: rename_func("CONCAT"), 337 exp.ArrayContains: rename_func("CONTAINS"), 338 exp.ArraySize: rename_func("CARDINALITY"), 339 exp.ArrayToString: rename_func("ARRAY_JOIN"), 340 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 341 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 342 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 343 exp.BitwiseLeftShift: lambda self, e: self.func( 344 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 345 ), 346 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 347 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 348 exp.BitwiseRightShift: lambda self, e: self.func( 349 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 350 ), 351 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 352 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 353 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 354 exp.DateAdd: lambda self, e: self.func( 355 "DATE_ADD", 356 unit_to_str(e), 357 _to_int(e.expression), 358 e.this, 359 ), 360 exp.DateDiff: lambda self, e: self.func( 361 "DATE_DIFF", unit_to_str(e), e.expression, e.this 362 ), 363 exp.DateStrToDate: datestrtodate_sql, 364 exp.DateToDi: lambda self, 365 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 366 exp.DateSub: lambda self, e: self.func( 367 "DATE_ADD", 368 unit_to_str(e), 369 _to_int(e.expression * -1), 370 e.this, 371 ), 372 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 373 exp.DiToDate: lambda self, 374 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 375 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 376 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 377 exp.First: _first_last_sql, 378 exp.FirstValue: _first_last_sql, 379 exp.FromTimeZone: lambda self, 380 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 381 exp.Group: transforms.preprocess([transforms.unalias_group]), 382 exp.GroupConcat: lambda self, e: self.func( 383 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 384 ), 385 exp.Hex: rename_func("TO_HEX"), 386 exp.If: if_sql(), 387 exp.ILike: no_ilike_sql, 388 exp.Initcap: _initcap_sql, 389 exp.ParseJSON: rename_func("JSON_PARSE"), 390 exp.Last: _first_last_sql, 391 exp.LastValue: _first_last_sql, 392 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 393 exp.Lateral: _explode_to_unnest_sql, 394 exp.Left: left_to_substring_sql, 395 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 396 exp.LogicalAnd: rename_func("BOOL_AND"), 397 exp.LogicalOr: rename_func("BOOL_OR"), 398 exp.Pivot: no_pivot_sql, 399 exp.Quantile: _quantile_sql, 400 exp.RegexpExtract: regexp_extract_sql, 401 exp.Right: right_to_substring_sql, 402 exp.SafeDivide: no_safe_divide_sql, 403 exp.Schema: _schema_sql, 404 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 405 exp.Select: transforms.preprocess( 406 [ 407 transforms.eliminate_qualify, 408 transforms.eliminate_distinct_on, 409 transforms.explode_to_unnest(1), 410 transforms.eliminate_semi_and_anti_joins, 411 ] 412 ), 413 exp.SortArray: _no_sort_array, 414 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 415 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 416 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 417 exp.StrToTime: _str_to_time_sql, 418 exp.StructExtract: struct_extract_sql, 419 exp.Table: transforms.preprocess([_unnest_sequence]), 420 exp.Timestamp: no_timestamp_sql, 421 exp.TimestampTrunc: timestamptrunc_sql, 422 exp.TimeStrToDate: timestrtotime_sql, 423 exp.TimeStrToTime: timestrtotime_sql, 424 exp.TimeStrToUnix: lambda self, e: self.func( 425 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 426 ), 427 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 428 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 429 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 430 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 431 exp.TsOrDiToDi: lambda self, 432 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 433 exp.TsOrDsAdd: _ts_or_ds_add_sql, 434 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 435 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 436 exp.Unhex: rename_func("FROM_HEX"), 437 exp.UnixToStr: lambda self, 438 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 439 exp.UnixToTime: _unix_to_time_sql, 440 exp.UnixToTimeStr: lambda self, 441 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 442 exp.VariancePop: rename_func("VAR_POP"), 443 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 444 exp.WithinGroup: transforms.preprocess( 445 [transforms.remove_within_group_for_percentiles] 446 ), 447 exp.Xor: bool_xor_sql, 448 } 449 450 RESERVED_KEYWORDS = { 451 "alter", 452 "and", 453 "as", 454 "between", 455 "by", 456 "case", 457 "cast", 458 "constraint", 459 "create", 460 "cross", 461 "current_time", 462 "current_timestamp", 463 "deallocate", 464 "delete", 465 "describe", 466 "distinct", 467 "drop", 468 "else", 469 "end", 470 "escape", 471 "except", 472 "execute", 473 "exists", 474 "extract", 475 "false", 476 "for", 477 "from", 478 "full", 479 "group", 480 "having", 481 "in", 482 "inner", 483 "insert", 484 "intersect", 485 "into", 486 "is", 487 "join", 488 "left", 489 "like", 490 "natural", 491 "not", 492 "null", 493 "on", 494 "or", 495 "order", 496 "outer", 497 "prepare", 498 "right", 499 "select", 500 "table", 501 "then", 502 "true", 503 "union", 504 "using", 505 "values", 506 "when", 507 "where", 508 "with", 509 } 510 511 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 512 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 513 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 514 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 515 # which seems to be using the same time mapping as Hive, as per: 516 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 517 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 518 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 519 parse_with_tz = self.func( 520 "PARSE_DATETIME", 521 value_as_text, 522 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 523 ) 524 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 525 return self.func("TO_UNIXTIME", coalesced) 526 527 def bracket_sql(self, expression: exp.Bracket) -> str: 528 if expression.args.get("safe"): 529 return self.func( 530 "ELEMENT_AT", 531 expression.this, 532 seq_get( 533 apply_index_offset( 534 expression.this, 535 expression.expressions, 536 1 - expression.args.get("offset", 0), 537 ), 538 0, 539 ), 540 ) 541 return super().bracket_sql(expression) 542 543 def struct_sql(self, expression: exp.Struct) -> str: 544 from sqlglot.optimizer.annotate_types import annotate_types 545 546 expression = annotate_types(expression) 547 values: t.List[str] = [] 548 schema: t.List[str] = [] 549 unknown_type = False 550 551 for e in expression.expressions: 552 if isinstance(e, exp.PropertyEQ): 553 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 554 unknown_type = True 555 else: 556 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 557 values.append(self.sql(e, "expression")) 558 else: 559 values.append(self.sql(e)) 560 561 size = len(expression.expressions) 562 563 if not size or len(schema) != size: 564 if unknown_type: 565 self.unsupported( 566 "Cannot convert untyped key-value definitions (try annotate_types)." 567 ) 568 return self.func("ROW", *values) 569 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 570 571 def interval_sql(self, expression: exp.Interval) -> str: 572 if expression.this and expression.text("unit").upper().startswith("WEEK"): 573 return f"({expression.this.name} * INTERVAL '7' DAY)" 574 return super().interval_sql(expression) 575 576 def transaction_sql(self, expression: exp.Transaction) -> str: 577 modes = expression.args.get("modes") 578 modes = f" {', '.join(modes)}" if modes else "" 579 return f"START TRANSACTION{modes}" 580 581 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 582 start = expression.args["start"] 583 end = expression.args["end"] 584 step = expression.args.get("step") 585 586 if isinstance(start, exp.Cast): 587 target_type = start.to 588 elif isinstance(end, exp.Cast): 589 target_type = end.to 590 else: 591 target_type = None 592 593 if target_type and target_type.is_type("timestamp"): 594 if target_type is start.to: 595 end = exp.cast(end, target_type) 596 else: 597 start = exp.cast(start, target_type) 598 599 return self.func("SEQUENCE", start, end, step) 600 601 def offset_limit_modifiers( 602 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 603 ) -> t.List[str]: 604 return [ 605 self.sql(expression, "offset"), 606 self.sql(limit), 607 ] 608 609 def create_sql(self, expression: exp.Create) -> str: 610 """ 611 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 612 so we need to remove them 613 """ 614 kind = expression.args["kind"] 615 schema = expression.this 616 if kind == "VIEW" and schema.expressions: 617 expression.this.set("expressions", None) 618 return super().create_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Associates this dialect's time formats with their equivalent Python strftime
formats.
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- SUPPORTS_USER_DEFINED_TYPES
- NORMALIZE_FUNCTIONS
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- COPY_PARAMS_ARE_CSV
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
212 class Tokenizer(tokens.Tokenizer): 213 UNICODE_STRINGS = [ 214 (prefix + q, q) 215 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 216 for prefix in ("U&", "u&") 217 ] 218 219 KEYWORDS = { 220 **tokens.Tokenizer.KEYWORDS, 221 "START": TokenType.BEGIN, 222 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 223 "ROW": TokenType.STRUCT, 224 "IPADDRESS": TokenType.IPADDRESS, 225 "IPPREFIX": TokenType.IPPREFIX, 226 "TDIGEST": TokenType.TDIGEST, 227 "HYPERLOGLOG": TokenType.HLLSKETCH, 228 } 229 230 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
232 class Parser(parser.Parser): 233 VALUES_FOLLOWED_BY_PAREN = False 234 235 FUNCTIONS = { 236 **parser.Parser.FUNCTIONS, 237 "ARBITRARY": exp.AnyValue.from_arg_list, 238 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 239 "APPROX_PERCENTILE": _build_approx_percentile, 240 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 241 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 242 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 243 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 244 "CARDINALITY": exp.ArraySize.from_arg_list, 245 "CONTAINS": exp.ArrayContains.from_arg_list, 246 "DATE_ADD": lambda args: exp.DateAdd( 247 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 248 ), 249 "DATE_DIFF": lambda args: exp.DateDiff( 250 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 251 ), 252 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 253 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 254 "DATE_TRUNC": date_trunc_to_time, 255 "ELEMENT_AT": lambda args: exp.Bracket( 256 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 257 ), 258 "FROM_HEX": exp.Unhex.from_arg_list, 259 "FROM_UNIXTIME": _build_from_unixtime, 260 "FROM_UTF8": lambda args: exp.Decode( 261 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 262 ), 263 "NOW": exp.CurrentTimestamp.from_arg_list, 264 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 265 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 266 ), 267 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 268 this=seq_get(args, 0), 269 expression=seq_get(args, 1), 270 replacement=seq_get(args, 2) or exp.Literal.string(""), 271 ), 272 "ROW": exp.Struct.from_arg_list, 273 "SEQUENCE": exp.GenerateSeries.from_arg_list, 274 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 275 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 276 "STRPOS": lambda args: exp.StrPosition( 277 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 278 ), 279 "TO_CHAR": _build_to_char, 280 "TO_HEX": exp.Hex.from_arg_list, 281 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 282 "TO_UTF8": lambda args: exp.Encode( 283 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 284 ), 285 } 286 287 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 288 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
290 class Generator(generator.Generator): 291 INTERVAL_ALLOWS_PLURAL_FORM = False 292 JOIN_HINTS = False 293 TABLE_HINTS = False 294 QUERY_HINTS = False 295 IS_BOOL_ALLOWED = False 296 TZ_TO_WITH_TIME_ZONE = True 297 NVL2_SUPPORTED = False 298 STRUCT_DELIMITER = ("(", ")") 299 LIMIT_ONLY_LITERALS = True 300 SUPPORTS_SINGLE_ARG_CONCAT = False 301 LIKE_PROPERTY_INSIDE_SCHEMA = True 302 MULTI_ARG_DISTINCT = False 303 SUPPORTS_TO_NUMBER = False 304 305 PROPERTIES_LOCATION = { 306 **generator.Generator.PROPERTIES_LOCATION, 307 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 308 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 309 } 310 311 TYPE_MAPPING = { 312 **generator.Generator.TYPE_MAPPING, 313 exp.DataType.Type.INT: "INTEGER", 314 exp.DataType.Type.FLOAT: "REAL", 315 exp.DataType.Type.BINARY: "VARBINARY", 316 exp.DataType.Type.TEXT: "VARCHAR", 317 exp.DataType.Type.TIMETZ: "TIME", 318 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 319 exp.DataType.Type.STRUCT: "ROW", 320 exp.DataType.Type.DATETIME: "TIMESTAMP", 321 exp.DataType.Type.DATETIME64: "TIMESTAMP", 322 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 323 } 324 325 TRANSFORMS = { 326 **generator.Generator.TRANSFORMS, 327 exp.AnyValue: rename_func("ARBITRARY"), 328 exp.ApproxDistinct: lambda self, e: self.func( 329 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 330 ), 331 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 332 exp.ArgMax: rename_func("MAX_BY"), 333 exp.ArgMin: rename_func("MIN_BY"), 334 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 335 exp.ArrayAny: rename_func("ANY_MATCH"), 336 exp.ArrayConcat: rename_func("CONCAT"), 337 exp.ArrayContains: rename_func("CONTAINS"), 338 exp.ArraySize: rename_func("CARDINALITY"), 339 exp.ArrayToString: rename_func("ARRAY_JOIN"), 340 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 341 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 342 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 343 exp.BitwiseLeftShift: lambda self, e: self.func( 344 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 345 ), 346 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 347 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 348 exp.BitwiseRightShift: lambda self, e: self.func( 349 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 350 ), 351 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 352 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 353 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 354 exp.DateAdd: lambda self, e: self.func( 355 "DATE_ADD", 356 unit_to_str(e), 357 _to_int(e.expression), 358 e.this, 359 ), 360 exp.DateDiff: lambda self, e: self.func( 361 "DATE_DIFF", unit_to_str(e), e.expression, e.this 362 ), 363 exp.DateStrToDate: datestrtodate_sql, 364 exp.DateToDi: lambda self, 365 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 366 exp.DateSub: lambda self, e: self.func( 367 "DATE_ADD", 368 unit_to_str(e), 369 _to_int(e.expression * -1), 370 e.this, 371 ), 372 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 373 exp.DiToDate: lambda self, 374 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 375 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 376 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 377 exp.First: _first_last_sql, 378 exp.FirstValue: _first_last_sql, 379 exp.FromTimeZone: lambda self, 380 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 381 exp.Group: transforms.preprocess([transforms.unalias_group]), 382 exp.GroupConcat: lambda self, e: self.func( 383 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 384 ), 385 exp.Hex: rename_func("TO_HEX"), 386 exp.If: if_sql(), 387 exp.ILike: no_ilike_sql, 388 exp.Initcap: _initcap_sql, 389 exp.ParseJSON: rename_func("JSON_PARSE"), 390 exp.Last: _first_last_sql, 391 exp.LastValue: _first_last_sql, 392 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 393 exp.Lateral: _explode_to_unnest_sql, 394 exp.Left: left_to_substring_sql, 395 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 396 exp.LogicalAnd: rename_func("BOOL_AND"), 397 exp.LogicalOr: rename_func("BOOL_OR"), 398 exp.Pivot: no_pivot_sql, 399 exp.Quantile: _quantile_sql, 400 exp.RegexpExtract: regexp_extract_sql, 401 exp.Right: right_to_substring_sql, 402 exp.SafeDivide: no_safe_divide_sql, 403 exp.Schema: _schema_sql, 404 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 405 exp.Select: transforms.preprocess( 406 [ 407 transforms.eliminate_qualify, 408 transforms.eliminate_distinct_on, 409 transforms.explode_to_unnest(1), 410 transforms.eliminate_semi_and_anti_joins, 411 ] 412 ), 413 exp.SortArray: _no_sort_array, 414 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 415 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 416 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 417 exp.StrToTime: _str_to_time_sql, 418 exp.StructExtract: struct_extract_sql, 419 exp.Table: transforms.preprocess([_unnest_sequence]), 420 exp.Timestamp: no_timestamp_sql, 421 exp.TimestampTrunc: timestamptrunc_sql, 422 exp.TimeStrToDate: timestrtotime_sql, 423 exp.TimeStrToTime: timestrtotime_sql, 424 exp.TimeStrToUnix: lambda self, e: self.func( 425 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 426 ), 427 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 428 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 429 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 430 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 431 exp.TsOrDiToDi: lambda self, 432 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 433 exp.TsOrDsAdd: _ts_or_ds_add_sql, 434 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 435 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 436 exp.Unhex: rename_func("FROM_HEX"), 437 exp.UnixToStr: lambda self, 438 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 439 exp.UnixToTime: _unix_to_time_sql, 440 exp.UnixToTimeStr: lambda self, 441 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 442 exp.VariancePop: rename_func("VAR_POP"), 443 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 444 exp.WithinGroup: transforms.preprocess( 445 [transforms.remove_within_group_for_percentiles] 446 ), 447 exp.Xor: bool_xor_sql, 448 } 449 450 RESERVED_KEYWORDS = { 451 "alter", 452 "and", 453 "as", 454 "between", 455 "by", 456 "case", 457 "cast", 458 "constraint", 459 "create", 460 "cross", 461 "current_time", 462 "current_timestamp", 463 "deallocate", 464 "delete", 465 "describe", 466 "distinct", 467 "drop", 468 "else", 469 "end", 470 "escape", 471 "except", 472 "execute", 473 "exists", 474 "extract", 475 "false", 476 "for", 477 "from", 478 "full", 479 "group", 480 "having", 481 "in", 482 "inner", 483 "insert", 484 "intersect", 485 "into", 486 "is", 487 "join", 488 "left", 489 "like", 490 "natural", 491 "not", 492 "null", 493 "on", 494 "or", 495 "order", 496 "outer", 497 "prepare", 498 "right", 499 "select", 500 "table", 501 "then", 502 "true", 503 "union", 504 "using", 505 "values", 506 "when", 507 "where", 508 "with", 509 } 510 511 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 512 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 513 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 514 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 515 # which seems to be using the same time mapping as Hive, as per: 516 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 517 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 518 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 519 parse_with_tz = self.func( 520 "PARSE_DATETIME", 521 value_as_text, 522 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 523 ) 524 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 525 return self.func("TO_UNIXTIME", coalesced) 526 527 def bracket_sql(self, expression: exp.Bracket) -> str: 528 if expression.args.get("safe"): 529 return self.func( 530 "ELEMENT_AT", 531 expression.this, 532 seq_get( 533 apply_index_offset( 534 expression.this, 535 expression.expressions, 536 1 - expression.args.get("offset", 0), 537 ), 538 0, 539 ), 540 ) 541 return super().bracket_sql(expression) 542 543 def struct_sql(self, expression: exp.Struct) -> str: 544 from sqlglot.optimizer.annotate_types import annotate_types 545 546 expression = annotate_types(expression) 547 values: t.List[str] = [] 548 schema: t.List[str] = [] 549 unknown_type = False 550 551 for e in expression.expressions: 552 if isinstance(e, exp.PropertyEQ): 553 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 554 unknown_type = True 555 else: 556 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 557 values.append(self.sql(e, "expression")) 558 else: 559 values.append(self.sql(e)) 560 561 size = len(expression.expressions) 562 563 if not size or len(schema) != size: 564 if unknown_type: 565 self.unsupported( 566 "Cannot convert untyped key-value definitions (try annotate_types)." 567 ) 568 return self.func("ROW", *values) 569 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 570 571 def interval_sql(self, expression: exp.Interval) -> str: 572 if expression.this and expression.text("unit").upper().startswith("WEEK"): 573 return f"({expression.this.name} * INTERVAL '7' DAY)" 574 return super().interval_sql(expression) 575 576 def transaction_sql(self, expression: exp.Transaction) -> str: 577 modes = expression.args.get("modes") 578 modes = f" {', '.join(modes)}" if modes else "" 579 return f"START TRANSACTION{modes}" 580 581 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 582 start = expression.args["start"] 583 end = expression.args["end"] 584 step = expression.args.get("step") 585 586 if isinstance(start, exp.Cast): 587 target_type = start.to 588 elif isinstance(end, exp.Cast): 589 target_type = end.to 590 else: 591 target_type = None 592 593 if target_type and target_type.is_type("timestamp"): 594 if target_type is start.to: 595 end = exp.cast(end, target_type) 596 else: 597 start = exp.cast(start, target_type) 598 599 return self.func("SEQUENCE", start, end, step) 600 601 def offset_limit_modifiers( 602 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 603 ) -> t.List[str]: 604 return [ 605 self.sql(expression, "offset"), 606 self.sql(limit), 607 ] 608 609 def create_sql(self, expression: exp.Create) -> str: 610 """ 611 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 612 so we need to remove them 613 """ 614 kind = expression.args["kind"] 615 schema = expression.this 616 if kind == "VIEW" and schema.expressions: 617 expression.this.set("expressions", None) 618 return super().create_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
511 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 512 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 513 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 514 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 515 # which seems to be using the same time mapping as Hive, as per: 516 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 517 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 518 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 519 parse_with_tz = self.func( 520 "PARSE_DATETIME", 521 value_as_text, 522 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 523 ) 524 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 525 return self.func("TO_UNIXTIME", coalesced)
527 def bracket_sql(self, expression: exp.Bracket) -> str: 528 if expression.args.get("safe"): 529 return self.func( 530 "ELEMENT_AT", 531 expression.this, 532 seq_get( 533 apply_index_offset( 534 expression.this, 535 expression.expressions, 536 1 - expression.args.get("offset", 0), 537 ), 538 0, 539 ), 540 ) 541 return super().bracket_sql(expression)
543 def struct_sql(self, expression: exp.Struct) -> str: 544 from sqlglot.optimizer.annotate_types import annotate_types 545 546 expression = annotate_types(expression) 547 values: t.List[str] = [] 548 schema: t.List[str] = [] 549 unknown_type = False 550 551 for e in expression.expressions: 552 if isinstance(e, exp.PropertyEQ): 553 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 554 unknown_type = True 555 else: 556 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 557 values.append(self.sql(e, "expression")) 558 else: 559 values.append(self.sql(e)) 560 561 size = len(expression.expressions) 562 563 if not size or len(schema) != size: 564 if unknown_type: 565 self.unsupported( 566 "Cannot convert untyped key-value definitions (try annotate_types)." 567 ) 568 return self.func("ROW", *values) 569 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
581 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 582 start = expression.args["start"] 583 end = expression.args["end"] 584 step = expression.args.get("step") 585 586 if isinstance(start, exp.Cast): 587 target_type = start.to 588 elif isinstance(end, exp.Cast): 589 target_type = end.to 590 else: 591 target_type = None 592 593 if target_type and target_type.is_type("timestamp"): 594 if target_type is start.to: 595 end = exp.cast(end, target_type) 596 else: 597 start = exp.cast(start, target_type) 598 599 return self.func("SEQUENCE", start, end, step)
609 def create_sql(self, expression: exp.Create) -> str: 610 """ 611 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 612 so we need to remove them 613 """ 614 kind = expression.args["kind"] 615 schema = expression.this 616 if kind == "VIEW" and schema.expressions: 617 expression.this.set("expressions", None) 618 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- OUTER_UNION_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql