sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 DATE_ADD_OR_SUB, 8 Dialect, 9 NormalizationStrategy, 10 approx_count_distinct_sql, 11 arg_max_or_min_no_count, 12 datestrtodate_sql, 13 build_formatted_time, 14 if_sql, 15 is_parse_json, 16 left_to_substring_sql, 17 locate_to_strposition, 18 max_or_greatest, 19 min_or_least, 20 no_ilike_sql, 21 no_recursive_cte_sql, 22 no_safe_divide_sql, 23 no_trycast_sql, 24 regexp_extract_sql, 25 regexp_replace_sql, 26 rename_func, 27 right_to_substring_sql, 28 strposition_to_locate_sql, 29 struct_extract_sql, 30 time_format, 31 timestrtotime_sql, 32 var_map_sql, 33) 34from sqlglot.transforms import ( 35 remove_unique_constraints, 36 ctas_with_tmp_tables_to_create_tmp_view, 37 preprocess, 38 move_schema_columns_to_partitioned_by, 39) 40from sqlglot.helper import seq_get 41from sqlglot.tokens import TokenType 42 43# (FuncType, Multiplier) 44DATE_DELTA_INTERVAL = { 45 "YEAR": ("ADD_MONTHS", 12), 46 "MONTH": ("ADD_MONTHS", 1), 47 "QUARTER": ("ADD_MONTHS", 3), 48 "WEEK": ("DATE_ADD", 7), 49 "DAY": ("DATE_ADD", 1), 50} 51 52TIME_DIFF_FACTOR = { 53 "MILLISECOND": " * 1000", 54 "SECOND": "", 55 "MINUTE": " / 60", 56 "HOUR": " / 3600", 57} 58 59DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 60 61 62def _add_date_sql(self: Hive.Generator, expression: DATE_ADD_OR_SUB) -> str: 63 if isinstance(expression, exp.TsOrDsAdd) and not expression.unit: 64 return self.func("DATE_ADD", expression.this, expression.expression) 65 66 unit = expression.text("unit").upper() 67 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 68 69 if isinstance(expression, exp.DateSub): 70 multiplier *= -1 71 72 if expression.expression.is_number: 73 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 74 else: 75 modified_increment = expression.expression 76 if multiplier != 1: 77 modified_increment = exp.Mul( # type: ignore 78 this=modified_increment, expression=exp.Literal.number(multiplier) 79 ) 80 81 return self.func(func, expression.this, modified_increment) 82 83 84def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff | exp.TsOrDsDiff) -> str: 85 unit = expression.text("unit").upper() 86 87 factor = TIME_DIFF_FACTOR.get(unit) 88 if factor is not None: 89 left = self.sql(expression, "this") 90 right = self.sql(expression, "expression") 91 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 92 return f"({sec_diff}){factor}" if factor else sec_diff 93 94 months_between = unit in DIFF_MONTH_SWITCH 95 sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF" 96 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 97 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 98 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 99 100 if months_between or multiplier_sql: 101 # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part. 102 # For the same reason, we want to truncate if there's a divisor present. 103 diff_sql = f"CAST({diff_sql}{multiplier_sql} AS INT)" 104 105 return diff_sql 106 107 108def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 109 this = expression.this 110 111 if is_parse_json(this): 112 if this.this.is_string: 113 # Since FROM_JSON requires a nested type, we always wrap the json string with 114 # an array to ensure that "naked" strings like "'a'" will be handled correctly 115 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 116 117 from_json = self.func( 118 "FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json) 119 ) 120 to_json = self.func("TO_JSON", from_json) 121 122 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 123 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 124 return self.sql(this) 125 126 return self.func("TO_JSON", this, expression.args.get("options")) 127 128 129def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 130 if expression.expression: 131 self.unsupported("Hive SORT_ARRAY does not support a comparator") 132 return self.func("SORT_ARRAY", expression.this) 133 134 135def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 136 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 137 138 139def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 140 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 141 142 143def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 144 this = self.sql(expression, "this") 145 time_format = self.format_time(expression) 146 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 147 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 148 return f"CAST({this} AS DATE)" 149 150 151def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 152 this = self.sql(expression, "this") 153 time_format = self.format_time(expression) 154 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 155 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 156 return f"CAST({this} AS TIMESTAMP)" 157 158 159def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 160 time_format = self.format_time(expression) 161 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 162 return self.func("TO_DATE", expression.this, time_format) 163 164 if isinstance(expression.this, exp.TsOrDsToDate): 165 return self.sql(expression, "this") 166 167 return self.func("TO_DATE", expression.this) 168 169 170def _build_with_ignore_nulls( 171 exp_class: t.Type[exp.Expression], 172) -> t.Callable[[t.List[exp.Expression]], exp.Expression]: 173 def _parse(args: t.List[exp.Expression]) -> exp.Expression: 174 this = exp_class(this=seq_get(args, 0)) 175 if seq_get(args, 1) == exp.true(): 176 return exp.IgnoreNulls(this=this) 177 return this 178 179 return _parse 180 181 182class Hive(Dialect): 183 ALIAS_POST_TABLESAMPLE = True 184 IDENTIFIERS_CAN_START_WITH_DIGIT = True 185 SUPPORTS_USER_DEFINED_TYPES = False 186 SAFE_DIVISION = True 187 188 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 189 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 190 191 TIME_MAPPING = { 192 "y": "%Y", 193 "Y": "%Y", 194 "YYYY": "%Y", 195 "yyyy": "%Y", 196 "YY": "%y", 197 "yy": "%y", 198 "MMMM": "%B", 199 "MMM": "%b", 200 "MM": "%m", 201 "M": "%-m", 202 "dd": "%d", 203 "d": "%-d", 204 "HH": "%H", 205 "H": "%-H", 206 "hh": "%I", 207 "h": "%-I", 208 "mm": "%M", 209 "m": "%-M", 210 "ss": "%S", 211 "s": "%-S", 212 "SSSSSS": "%f", 213 "a": "%p", 214 "DD": "%j", 215 "D": "%-j", 216 "E": "%a", 217 "EE": "%a", 218 "EEE": "%a", 219 "EEEE": "%A", 220 } 221 222 DATE_FORMAT = "'yyyy-MM-dd'" 223 DATEINT_FORMAT = "'yyyyMMdd'" 224 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 225 226 class Tokenizer(tokens.Tokenizer): 227 QUOTES = ["'", '"'] 228 IDENTIFIERS = ["`"] 229 STRING_ESCAPES = ["\\"] 230 231 SINGLE_TOKENS = { 232 **tokens.Tokenizer.SINGLE_TOKENS, 233 "$": TokenType.PARAMETER, 234 } 235 236 KEYWORDS = { 237 **tokens.Tokenizer.KEYWORDS, 238 "ADD ARCHIVE": TokenType.COMMAND, 239 "ADD ARCHIVES": TokenType.COMMAND, 240 "ADD FILE": TokenType.COMMAND, 241 "ADD FILES": TokenType.COMMAND, 242 "ADD JAR": TokenType.COMMAND, 243 "ADD JARS": TokenType.COMMAND, 244 "MSCK REPAIR": TokenType.COMMAND, 245 "REFRESH": TokenType.REFRESH, 246 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 247 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 248 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 249 } 250 251 NUMERIC_LITERALS = { 252 "L": "BIGINT", 253 "S": "SMALLINT", 254 "Y": "TINYINT", 255 "D": "DOUBLE", 256 "F": "FLOAT", 257 "BD": "DECIMAL", 258 } 259 260 class Parser(parser.Parser): 261 LOG_DEFAULTS_TO_LN = True 262 STRICT_CAST = False 263 VALUES_FOLLOWED_BY_PAREN = False 264 265 FUNCTIONS = { 266 **parser.Parser.FUNCTIONS, 267 "BASE64": exp.ToBase64.from_arg_list, 268 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 269 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 270 "DATE_ADD": lambda args: exp.TsOrDsAdd( 271 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 272 ), 273 "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")( 274 [ 275 exp.TimeStrToTime(this=seq_get(args, 0)), 276 seq_get(args, 1), 277 ] 278 ), 279 "DATE_SUB": lambda args: exp.TsOrDsAdd( 280 this=seq_get(args, 0), 281 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 282 unit=exp.Literal.string("DAY"), 283 ), 284 "DATEDIFF": lambda args: exp.DateDiff( 285 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 286 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 287 ), 288 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 289 "FIRST": _build_with_ignore_nulls(exp.First), 290 "FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue), 291 "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True), 292 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 293 "LAST": _build_with_ignore_nulls(exp.Last), 294 "LAST_VALUE": _build_with_ignore_nulls(exp.LastValue), 295 "LOCATE": locate_to_strposition, 296 "MAP": parser.build_var_map, 297 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 298 "PERCENTILE": exp.Quantile.from_arg_list, 299 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 300 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 301 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 302 ), 303 "SIZE": exp.ArraySize.from_arg_list, 304 "SPLIT": exp.RegexpSplit.from_arg_list, 305 "STR_TO_MAP": lambda args: exp.StrToMap( 306 this=seq_get(args, 0), 307 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 308 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 309 ), 310 "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "hive"), 311 "TO_JSON": exp.JSONFormat.from_arg_list, 312 "UNBASE64": exp.FromBase64.from_arg_list, 313 "UNIX_TIMESTAMP": build_formatted_time(exp.StrToUnix, "hive", True), 314 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 315 } 316 317 NO_PAREN_FUNCTION_PARSERS = { 318 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 319 "TRANSFORM": lambda self: self._parse_transform(), 320 } 321 322 PROPERTY_PARSERS = { 323 **parser.Parser.PROPERTY_PARSERS, 324 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 325 expressions=self._parse_wrapped_csv(self._parse_property) 326 ), 327 } 328 329 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 330 if not self._match(TokenType.L_PAREN, advance=False): 331 self._retreat(self._index - 1) 332 return None 333 334 args = self._parse_wrapped_csv(self._parse_lambda) 335 row_format_before = self._parse_row_format(match_row=True) 336 337 record_writer = None 338 if self._match_text_seq("RECORDWRITER"): 339 record_writer = self._parse_string() 340 341 if not self._match(TokenType.USING): 342 return exp.Transform.from_arg_list(args) 343 344 command_script = self._parse_string() 345 346 self._match(TokenType.ALIAS) 347 schema = self._parse_schema() 348 349 row_format_after = self._parse_row_format(match_row=True) 350 record_reader = None 351 if self._match_text_seq("RECORDREADER"): 352 record_reader = self._parse_string() 353 354 return self.expression( 355 exp.QueryTransform, 356 expressions=args, 357 command_script=command_script, 358 schema=schema, 359 row_format_before=row_format_before, 360 record_writer=record_writer, 361 row_format_after=row_format_after, 362 record_reader=record_reader, 363 ) 364 365 def _parse_types( 366 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 367 ) -> t.Optional[exp.Expression]: 368 """ 369 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 370 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 371 372 spark-sql (default)> select cast(1234 as varchar(2)); 373 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 374 char/varchar type and simply treats them as string type. Please use string type 375 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 376 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 377 378 1234 379 Time taken: 4.265 seconds, Fetched 1 row(s) 380 381 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 382 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 383 384 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 385 """ 386 this = super()._parse_types( 387 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 388 ) 389 390 if this and not schema: 391 return this.transform( 392 lambda node: ( 393 node.replace(exp.DataType.build("text")) 394 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 395 else node 396 ), 397 copy=False, 398 ) 399 400 return this 401 402 def _parse_partition_and_order( 403 self, 404 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 405 return ( 406 ( 407 self._parse_csv(self._parse_conjunction) 408 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 409 else [] 410 ), 411 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 412 ) 413 414 class Generator(generator.Generator): 415 LIMIT_FETCH = "LIMIT" 416 TABLESAMPLE_WITH_METHOD = False 417 JOIN_HINTS = False 418 TABLE_HINTS = False 419 QUERY_HINTS = False 420 INDEX_ON = "ON TABLE" 421 EXTRACT_ALLOWS_QUOTES = False 422 NVL2_SUPPORTED = False 423 LAST_DAY_SUPPORTS_DATE_PART = False 424 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 425 426 EXPRESSIONS_WITHOUT_NESTED_CTES = { 427 exp.Insert, 428 exp.Select, 429 exp.Subquery, 430 exp.Union, 431 } 432 433 SUPPORTED_JSON_PATH_PARTS = { 434 exp.JSONPathKey, 435 exp.JSONPathRoot, 436 exp.JSONPathSubscript, 437 exp.JSONPathWildcard, 438 } 439 440 TYPE_MAPPING = { 441 **generator.Generator.TYPE_MAPPING, 442 exp.DataType.Type.BIT: "BOOLEAN", 443 exp.DataType.Type.DATETIME: "TIMESTAMP", 444 exp.DataType.Type.TEXT: "STRING", 445 exp.DataType.Type.TIME: "TIMESTAMP", 446 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 447 exp.DataType.Type.VARBINARY: "BINARY", 448 } 449 450 TRANSFORMS = { 451 **generator.Generator.TRANSFORMS, 452 exp.Group: transforms.preprocess([transforms.unalias_group]), 453 exp.Select: transforms.preprocess( 454 [ 455 transforms.eliminate_qualify, 456 transforms.eliminate_distinct_on, 457 transforms.unnest_to_explode, 458 ] 459 ), 460 exp.Property: _property_sql, 461 exp.AnyValue: rename_func("FIRST"), 462 exp.ApproxDistinct: approx_count_distinct_sql, 463 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 464 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 465 exp.ArrayConcat: rename_func("CONCAT"), 466 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 467 exp.ArraySize: rename_func("SIZE"), 468 exp.ArraySort: _array_sort_sql, 469 exp.With: no_recursive_cte_sql, 470 exp.DateAdd: _add_date_sql, 471 exp.DateDiff: _date_diff_sql, 472 exp.DateStrToDate: datestrtodate_sql, 473 exp.DateSub: _add_date_sql, 474 exp.DateToDi: lambda self, 475 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 476 exp.DiToDate: lambda self, 477 e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 478 exp.FileFormatProperty: lambda self, 479 e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 480 exp.FromBase64: rename_func("UNBASE64"), 481 exp.If: if_sql(), 482 exp.ILike: no_ilike_sql, 483 exp.IsNan: rename_func("ISNAN"), 484 exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression), 485 exp.JSONExtractScalar: lambda self, e: self.func( 486 "GET_JSON_OBJECT", e.this, e.expression 487 ), 488 exp.JSONFormat: _json_format_sql, 489 exp.Left: left_to_substring_sql, 490 exp.Map: var_map_sql, 491 exp.Max: max_or_greatest, 492 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 493 exp.Min: min_or_least, 494 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 495 exp.NotNullColumnConstraint: lambda _, e: ( 496 "" if e.args.get("allow_null") else "NOT NULL" 497 ), 498 exp.VarMap: var_map_sql, 499 exp.Create: preprocess( 500 [ 501 remove_unique_constraints, 502 ctas_with_tmp_tables_to_create_tmp_view, 503 move_schema_columns_to_partitioned_by, 504 ] 505 ), 506 exp.Quantile: rename_func("PERCENTILE"), 507 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 508 exp.RegexpExtract: regexp_extract_sql, 509 exp.RegexpReplace: regexp_replace_sql, 510 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 511 exp.RegexpSplit: rename_func("SPLIT"), 512 exp.Right: right_to_substring_sql, 513 exp.SafeDivide: no_safe_divide_sql, 514 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 515 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 516 exp.Split: lambda self, e: self.func( 517 "SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression) 518 ), 519 exp.StrPosition: strposition_to_locate_sql, 520 exp.StrToDate: _str_to_date_sql, 521 exp.StrToTime: _str_to_time_sql, 522 exp.StrToUnix: _str_to_unix_sql, 523 exp.StructExtract: struct_extract_sql, 524 exp.TimeStrToDate: rename_func("TO_DATE"), 525 exp.TimeStrToTime: timestrtotime_sql, 526 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 527 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 528 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 529 exp.ToBase64: rename_func("BASE64"), 530 exp.TsOrDiToDi: lambda self, 531 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 532 exp.TsOrDsAdd: _add_date_sql, 533 exp.TsOrDsDiff: _date_diff_sql, 534 exp.TsOrDsToDate: _to_date_sql, 535 exp.TryCast: no_trycast_sql, 536 exp.UnixToStr: lambda self, e: self.func( 537 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 538 ), 539 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 540 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 541 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 542 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 543 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 544 exp.National: lambda self, e: self.national_sql(e, prefix=""), 545 exp.ClusteredColumnConstraint: lambda self, 546 e: f"({self.expressions(e, 'this', indent=False)})", 547 exp.NonClusteredColumnConstraint: lambda self, 548 e: f"({self.expressions(e, 'this', indent=False)})", 549 exp.NotForReplicationColumnConstraint: lambda *_: "", 550 exp.OnProperty: lambda *_: "", 551 exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY", 552 } 553 554 PROPERTIES_LOCATION = { 555 **generator.Generator.PROPERTIES_LOCATION, 556 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 557 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 558 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 559 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 560 } 561 562 def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: 563 if isinstance(expression.this, exp.JSONPathWildcard): 564 self.unsupported("Unsupported wildcard in JSONPathKey expression") 565 return "" 566 567 return super()._jsonpathkey_sql(expression) 568 569 def parameter_sql(self, expression: exp.Parameter) -> str: 570 this = self.sql(expression, "this") 571 expression_sql = self.sql(expression, "expression") 572 573 parent = expression.parent 574 this = f"{this}:{expression_sql}" if expression_sql else this 575 576 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 577 # We need to produce SET key = value instead of SET ${key} = value 578 return this 579 580 return f"${{{this}}}" 581 582 def schema_sql(self, expression: exp.Schema) -> str: 583 for ordered in expression.find_all(exp.Ordered): 584 if ordered.args.get("desc") is False: 585 ordered.set("desc", None) 586 587 return super().schema_sql(expression) 588 589 def constraint_sql(self, expression: exp.Constraint) -> str: 590 for prop in list(expression.find_all(exp.Properties)): 591 prop.pop() 592 593 this = self.sql(expression, "this") 594 expressions = self.expressions(expression, sep=" ", flat=True) 595 return f"CONSTRAINT {this} {expressions}" 596 597 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 598 serde_props = self.sql(expression, "serde_properties") 599 serde_props = f" {serde_props}" if serde_props else "" 600 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 601 602 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 603 return self.func( 604 "COLLECT_LIST", 605 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 606 ) 607 608 def with_properties(self, properties: exp.Properties) -> str: 609 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 610 611 def datatype_sql(self, expression: exp.DataType) -> str: 612 if ( 613 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 614 and not expression.expressions 615 ): 616 expression = exp.DataType.build("text") 617 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 618 expression.set("this", exp.DataType.Type.VARCHAR) 619 elif expression.this in exp.DataType.TEMPORAL_TYPES: 620 expression = exp.DataType.build(expression.this) 621 elif expression.is_type("float"): 622 size_expression = expression.find(exp.DataTypeParam) 623 if size_expression: 624 size = int(size_expression.name) 625 expression = ( 626 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 627 ) 628 629 return super().datatype_sql(expression) 630 631 def version_sql(self, expression: exp.Version) -> str: 632 sql = super().version_sql(expression) 633 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
183class Hive(Dialect): 184 ALIAS_POST_TABLESAMPLE = True 185 IDENTIFIERS_CAN_START_WITH_DIGIT = True 186 SUPPORTS_USER_DEFINED_TYPES = False 187 SAFE_DIVISION = True 188 189 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 190 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 191 192 TIME_MAPPING = { 193 "y": "%Y", 194 "Y": "%Y", 195 "YYYY": "%Y", 196 "yyyy": "%Y", 197 "YY": "%y", 198 "yy": "%y", 199 "MMMM": "%B", 200 "MMM": "%b", 201 "MM": "%m", 202 "M": "%-m", 203 "dd": "%d", 204 "d": "%-d", 205 "HH": "%H", 206 "H": "%-H", 207 "hh": "%I", 208 "h": "%-I", 209 "mm": "%M", 210 "m": "%-M", 211 "ss": "%S", 212 "s": "%-S", 213 "SSSSSS": "%f", 214 "a": "%p", 215 "DD": "%j", 216 "D": "%-j", 217 "E": "%a", 218 "EE": "%a", 219 "EEE": "%a", 220 "EEEE": "%A", 221 } 222 223 DATE_FORMAT = "'yyyy-MM-dd'" 224 DATEINT_FORMAT = "'yyyyMMdd'" 225 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 226 227 class Tokenizer(tokens.Tokenizer): 228 QUOTES = ["'", '"'] 229 IDENTIFIERS = ["`"] 230 STRING_ESCAPES = ["\\"] 231 232 SINGLE_TOKENS = { 233 **tokens.Tokenizer.SINGLE_TOKENS, 234 "$": TokenType.PARAMETER, 235 } 236 237 KEYWORDS = { 238 **tokens.Tokenizer.KEYWORDS, 239 "ADD ARCHIVE": TokenType.COMMAND, 240 "ADD ARCHIVES": TokenType.COMMAND, 241 "ADD FILE": TokenType.COMMAND, 242 "ADD FILES": TokenType.COMMAND, 243 "ADD JAR": TokenType.COMMAND, 244 "ADD JARS": TokenType.COMMAND, 245 "MSCK REPAIR": TokenType.COMMAND, 246 "REFRESH": TokenType.REFRESH, 247 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 248 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 249 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 250 } 251 252 NUMERIC_LITERALS = { 253 "L": "BIGINT", 254 "S": "SMALLINT", 255 "Y": "TINYINT", 256 "D": "DOUBLE", 257 "F": "FLOAT", 258 "BD": "DECIMAL", 259 } 260 261 class Parser(parser.Parser): 262 LOG_DEFAULTS_TO_LN = True 263 STRICT_CAST = False 264 VALUES_FOLLOWED_BY_PAREN = False 265 266 FUNCTIONS = { 267 **parser.Parser.FUNCTIONS, 268 "BASE64": exp.ToBase64.from_arg_list, 269 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 270 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 271 "DATE_ADD": lambda args: exp.TsOrDsAdd( 272 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 273 ), 274 "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")( 275 [ 276 exp.TimeStrToTime(this=seq_get(args, 0)), 277 seq_get(args, 1), 278 ] 279 ), 280 "DATE_SUB": lambda args: exp.TsOrDsAdd( 281 this=seq_get(args, 0), 282 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 283 unit=exp.Literal.string("DAY"), 284 ), 285 "DATEDIFF": lambda args: exp.DateDiff( 286 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 287 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 288 ), 289 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 290 "FIRST": _build_with_ignore_nulls(exp.First), 291 "FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue), 292 "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True), 293 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 294 "LAST": _build_with_ignore_nulls(exp.Last), 295 "LAST_VALUE": _build_with_ignore_nulls(exp.LastValue), 296 "LOCATE": locate_to_strposition, 297 "MAP": parser.build_var_map, 298 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 299 "PERCENTILE": exp.Quantile.from_arg_list, 300 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 301 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 302 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 303 ), 304 "SIZE": exp.ArraySize.from_arg_list, 305 "SPLIT": exp.RegexpSplit.from_arg_list, 306 "STR_TO_MAP": lambda args: exp.StrToMap( 307 this=seq_get(args, 0), 308 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 309 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 310 ), 311 "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "hive"), 312 "TO_JSON": exp.JSONFormat.from_arg_list, 313 "UNBASE64": exp.FromBase64.from_arg_list, 314 "UNIX_TIMESTAMP": build_formatted_time(exp.StrToUnix, "hive", True), 315 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 316 } 317 318 NO_PAREN_FUNCTION_PARSERS = { 319 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 320 "TRANSFORM": lambda self: self._parse_transform(), 321 } 322 323 PROPERTY_PARSERS = { 324 **parser.Parser.PROPERTY_PARSERS, 325 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 326 expressions=self._parse_wrapped_csv(self._parse_property) 327 ), 328 } 329 330 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 331 if not self._match(TokenType.L_PAREN, advance=False): 332 self._retreat(self._index - 1) 333 return None 334 335 args = self._parse_wrapped_csv(self._parse_lambda) 336 row_format_before = self._parse_row_format(match_row=True) 337 338 record_writer = None 339 if self._match_text_seq("RECORDWRITER"): 340 record_writer = self._parse_string() 341 342 if not self._match(TokenType.USING): 343 return exp.Transform.from_arg_list(args) 344 345 command_script = self._parse_string() 346 347 self._match(TokenType.ALIAS) 348 schema = self._parse_schema() 349 350 row_format_after = self._parse_row_format(match_row=True) 351 record_reader = None 352 if self._match_text_seq("RECORDREADER"): 353 record_reader = self._parse_string() 354 355 return self.expression( 356 exp.QueryTransform, 357 expressions=args, 358 command_script=command_script, 359 schema=schema, 360 row_format_before=row_format_before, 361 record_writer=record_writer, 362 row_format_after=row_format_after, 363 record_reader=record_reader, 364 ) 365 366 def _parse_types( 367 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 368 ) -> t.Optional[exp.Expression]: 369 """ 370 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 371 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 372 373 spark-sql (default)> select cast(1234 as varchar(2)); 374 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 375 char/varchar type and simply treats them as string type. Please use string type 376 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 377 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 378 379 1234 380 Time taken: 4.265 seconds, Fetched 1 row(s) 381 382 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 383 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 384 385 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 386 """ 387 this = super()._parse_types( 388 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 389 ) 390 391 if this and not schema: 392 return this.transform( 393 lambda node: ( 394 node.replace(exp.DataType.build("text")) 395 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 396 else node 397 ), 398 copy=False, 399 ) 400 401 return this 402 403 def _parse_partition_and_order( 404 self, 405 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 406 return ( 407 ( 408 self._parse_csv(self._parse_conjunction) 409 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 410 else [] 411 ), 412 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 413 ) 414 415 class Generator(generator.Generator): 416 LIMIT_FETCH = "LIMIT" 417 TABLESAMPLE_WITH_METHOD = False 418 JOIN_HINTS = False 419 TABLE_HINTS = False 420 QUERY_HINTS = False 421 INDEX_ON = "ON TABLE" 422 EXTRACT_ALLOWS_QUOTES = False 423 NVL2_SUPPORTED = False 424 LAST_DAY_SUPPORTS_DATE_PART = False 425 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 426 427 EXPRESSIONS_WITHOUT_NESTED_CTES = { 428 exp.Insert, 429 exp.Select, 430 exp.Subquery, 431 exp.Union, 432 } 433 434 SUPPORTED_JSON_PATH_PARTS = { 435 exp.JSONPathKey, 436 exp.JSONPathRoot, 437 exp.JSONPathSubscript, 438 exp.JSONPathWildcard, 439 } 440 441 TYPE_MAPPING = { 442 **generator.Generator.TYPE_MAPPING, 443 exp.DataType.Type.BIT: "BOOLEAN", 444 exp.DataType.Type.DATETIME: "TIMESTAMP", 445 exp.DataType.Type.TEXT: "STRING", 446 exp.DataType.Type.TIME: "TIMESTAMP", 447 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 448 exp.DataType.Type.VARBINARY: "BINARY", 449 } 450 451 TRANSFORMS = { 452 **generator.Generator.TRANSFORMS, 453 exp.Group: transforms.preprocess([transforms.unalias_group]), 454 exp.Select: transforms.preprocess( 455 [ 456 transforms.eliminate_qualify, 457 transforms.eliminate_distinct_on, 458 transforms.unnest_to_explode, 459 ] 460 ), 461 exp.Property: _property_sql, 462 exp.AnyValue: rename_func("FIRST"), 463 exp.ApproxDistinct: approx_count_distinct_sql, 464 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 465 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 466 exp.ArrayConcat: rename_func("CONCAT"), 467 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 468 exp.ArraySize: rename_func("SIZE"), 469 exp.ArraySort: _array_sort_sql, 470 exp.With: no_recursive_cte_sql, 471 exp.DateAdd: _add_date_sql, 472 exp.DateDiff: _date_diff_sql, 473 exp.DateStrToDate: datestrtodate_sql, 474 exp.DateSub: _add_date_sql, 475 exp.DateToDi: lambda self, 476 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 477 exp.DiToDate: lambda self, 478 e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 479 exp.FileFormatProperty: lambda self, 480 e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 481 exp.FromBase64: rename_func("UNBASE64"), 482 exp.If: if_sql(), 483 exp.ILike: no_ilike_sql, 484 exp.IsNan: rename_func("ISNAN"), 485 exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression), 486 exp.JSONExtractScalar: lambda self, e: self.func( 487 "GET_JSON_OBJECT", e.this, e.expression 488 ), 489 exp.JSONFormat: _json_format_sql, 490 exp.Left: left_to_substring_sql, 491 exp.Map: var_map_sql, 492 exp.Max: max_or_greatest, 493 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 494 exp.Min: min_or_least, 495 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 496 exp.NotNullColumnConstraint: lambda _, e: ( 497 "" if e.args.get("allow_null") else "NOT NULL" 498 ), 499 exp.VarMap: var_map_sql, 500 exp.Create: preprocess( 501 [ 502 remove_unique_constraints, 503 ctas_with_tmp_tables_to_create_tmp_view, 504 move_schema_columns_to_partitioned_by, 505 ] 506 ), 507 exp.Quantile: rename_func("PERCENTILE"), 508 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 509 exp.RegexpExtract: regexp_extract_sql, 510 exp.RegexpReplace: regexp_replace_sql, 511 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 512 exp.RegexpSplit: rename_func("SPLIT"), 513 exp.Right: right_to_substring_sql, 514 exp.SafeDivide: no_safe_divide_sql, 515 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 516 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 517 exp.Split: lambda self, e: self.func( 518 "SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression) 519 ), 520 exp.StrPosition: strposition_to_locate_sql, 521 exp.StrToDate: _str_to_date_sql, 522 exp.StrToTime: _str_to_time_sql, 523 exp.StrToUnix: _str_to_unix_sql, 524 exp.StructExtract: struct_extract_sql, 525 exp.TimeStrToDate: rename_func("TO_DATE"), 526 exp.TimeStrToTime: timestrtotime_sql, 527 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 528 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 529 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 530 exp.ToBase64: rename_func("BASE64"), 531 exp.TsOrDiToDi: lambda self, 532 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 533 exp.TsOrDsAdd: _add_date_sql, 534 exp.TsOrDsDiff: _date_diff_sql, 535 exp.TsOrDsToDate: _to_date_sql, 536 exp.TryCast: no_trycast_sql, 537 exp.UnixToStr: lambda self, e: self.func( 538 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 539 ), 540 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 541 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 542 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 543 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 544 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 545 exp.National: lambda self, e: self.national_sql(e, prefix=""), 546 exp.ClusteredColumnConstraint: lambda self, 547 e: f"({self.expressions(e, 'this', indent=False)})", 548 exp.NonClusteredColumnConstraint: lambda self, 549 e: f"({self.expressions(e, 'this', indent=False)})", 550 exp.NotForReplicationColumnConstraint: lambda *_: "", 551 exp.OnProperty: lambda *_: "", 552 exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY", 553 } 554 555 PROPERTIES_LOCATION = { 556 **generator.Generator.PROPERTIES_LOCATION, 557 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 558 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 559 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 560 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 561 } 562 563 def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: 564 if isinstance(expression.this, exp.JSONPathWildcard): 565 self.unsupported("Unsupported wildcard in JSONPathKey expression") 566 return "" 567 568 return super()._jsonpathkey_sql(expression) 569 570 def parameter_sql(self, expression: exp.Parameter) -> str: 571 this = self.sql(expression, "this") 572 expression_sql = self.sql(expression, "expression") 573 574 parent = expression.parent 575 this = f"{this}:{expression_sql}" if expression_sql else this 576 577 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 578 # We need to produce SET key = value instead of SET ${key} = value 579 return this 580 581 return f"${{{this}}}" 582 583 def schema_sql(self, expression: exp.Schema) -> str: 584 for ordered in expression.find_all(exp.Ordered): 585 if ordered.args.get("desc") is False: 586 ordered.set("desc", None) 587 588 return super().schema_sql(expression) 589 590 def constraint_sql(self, expression: exp.Constraint) -> str: 591 for prop in list(expression.find_all(exp.Properties)): 592 prop.pop() 593 594 this = self.sql(expression, "this") 595 expressions = self.expressions(expression, sep=" ", flat=True) 596 return f"CONSTRAINT {this} {expressions}" 597 598 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 599 serde_props = self.sql(expression, "serde_properties") 600 serde_props = f" {serde_props}" if serde_props else "" 601 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 602 603 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 604 return self.func( 605 "COLLECT_LIST", 606 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 607 ) 608 609 def with_properties(self, properties: exp.Properties) -> str: 610 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 611 612 def datatype_sql(self, expression: exp.DataType) -> str: 613 if ( 614 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 615 and not expression.expressions 616 ): 617 expression = exp.DataType.build("text") 618 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 619 expression.set("this", exp.DataType.Type.VARCHAR) 620 elif expression.this in exp.DataType.TEMPORAL_TYPES: 621 expression = exp.DataType.build(expression.this) 622 elif expression.is_type("float"): 623 size_expression = expression.find(exp.DataTypeParam) 624 if size_expression: 625 size = int(size_expression.name) 626 expression = ( 627 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 628 ) 629 630 return super().datatype_sql(expression) 631 632 def version_sql(self, expression: exp.Version) -> str: 633 sql = super().version_sql(expression) 634 return sql.replace("FOR ", "", 1)
NORMALIZATION_STRATEGY =
<NormalizationStrategy.CASE_INSENSITIVE: 'CASE_INSENSITIVE'>
Specifies the strategy according to which identifiers should be normalized.
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
Associates this dialect's time formats with their equivalent Python strftime
formats.
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- TABLESAMPLE_SIZE_IS_PERCENT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- TYPED_DIVISION
- CONCAT_COALESCE
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
227 class Tokenizer(tokens.Tokenizer): 228 QUOTES = ["'", '"'] 229 IDENTIFIERS = ["`"] 230 STRING_ESCAPES = ["\\"] 231 232 SINGLE_TOKENS = { 233 **tokens.Tokenizer.SINGLE_TOKENS, 234 "$": TokenType.PARAMETER, 235 } 236 237 KEYWORDS = { 238 **tokens.Tokenizer.KEYWORDS, 239 "ADD ARCHIVE": TokenType.COMMAND, 240 "ADD ARCHIVES": TokenType.COMMAND, 241 "ADD FILE": TokenType.COMMAND, 242 "ADD FILES": TokenType.COMMAND, 243 "ADD JAR": TokenType.COMMAND, 244 "ADD JARS": TokenType.COMMAND, 245 "MSCK REPAIR": TokenType.COMMAND, 246 "REFRESH": TokenType.REFRESH, 247 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 248 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 249 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 250 } 251 252 NUMERIC_LITERALS = { 253 "L": "BIGINT", 254 "S": "SMALLINT", 255 "Y": "TINYINT", 256 "D": "DOUBLE", 257 "F": "FLOAT", 258 "BD": "DECIMAL", 259 }
SINGLE_TOKENS =
{'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '#': <TokenType.HASH: 'HASH'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'BPCHAR': <TokenType.BPCHAR: 'BPCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
NUMERIC_LITERALS =
{'L': 'BIGINT', 'S': 'SMALLINT', 'Y': 'TINYINT', 'D': 'DOUBLE', 'F': 'FLOAT', 'BD': 'DECIMAL'}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- COMMENTS
- dialect
- reset
- tokenize
- peek
- tokenize_rs
- size
- sql
- tokens
261 class Parser(parser.Parser): 262 LOG_DEFAULTS_TO_LN = True 263 STRICT_CAST = False 264 VALUES_FOLLOWED_BY_PAREN = False 265 266 FUNCTIONS = { 267 **parser.Parser.FUNCTIONS, 268 "BASE64": exp.ToBase64.from_arg_list, 269 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 270 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 271 "DATE_ADD": lambda args: exp.TsOrDsAdd( 272 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 273 ), 274 "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")( 275 [ 276 exp.TimeStrToTime(this=seq_get(args, 0)), 277 seq_get(args, 1), 278 ] 279 ), 280 "DATE_SUB": lambda args: exp.TsOrDsAdd( 281 this=seq_get(args, 0), 282 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 283 unit=exp.Literal.string("DAY"), 284 ), 285 "DATEDIFF": lambda args: exp.DateDiff( 286 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 287 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 288 ), 289 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 290 "FIRST": _build_with_ignore_nulls(exp.First), 291 "FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue), 292 "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True), 293 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 294 "LAST": _build_with_ignore_nulls(exp.Last), 295 "LAST_VALUE": _build_with_ignore_nulls(exp.LastValue), 296 "LOCATE": locate_to_strposition, 297 "MAP": parser.build_var_map, 298 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 299 "PERCENTILE": exp.Quantile.from_arg_list, 300 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 301 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 302 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 303 ), 304 "SIZE": exp.ArraySize.from_arg_list, 305 "SPLIT": exp.RegexpSplit.from_arg_list, 306 "STR_TO_MAP": lambda args: exp.StrToMap( 307 this=seq_get(args, 0), 308 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 309 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 310 ), 311 "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "hive"), 312 "TO_JSON": exp.JSONFormat.from_arg_list, 313 "UNBASE64": exp.FromBase64.from_arg_list, 314 "UNIX_TIMESTAMP": build_formatted_time(exp.StrToUnix, "hive", True), 315 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 316 } 317 318 NO_PAREN_FUNCTION_PARSERS = { 319 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 320 "TRANSFORM": lambda self: self._parse_transform(), 321 } 322 323 PROPERTY_PARSERS = { 324 **parser.Parser.PROPERTY_PARSERS, 325 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 326 expressions=self._parse_wrapped_csv(self._parse_property) 327 ), 328 } 329 330 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 331 if not self._match(TokenType.L_PAREN, advance=False): 332 self._retreat(self._index - 1) 333 return None 334 335 args = self._parse_wrapped_csv(self._parse_lambda) 336 row_format_before = self._parse_row_format(match_row=True) 337 338 record_writer = None 339 if self._match_text_seq("RECORDWRITER"): 340 record_writer = self._parse_string() 341 342 if not self._match(TokenType.USING): 343 return exp.Transform.from_arg_list(args) 344 345 command_script = self._parse_string() 346 347 self._match(TokenType.ALIAS) 348 schema = self._parse_schema() 349 350 row_format_after = self._parse_row_format(match_row=True) 351 record_reader = None 352 if self._match_text_seq("RECORDREADER"): 353 record_reader = self._parse_string() 354 355 return self.expression( 356 exp.QueryTransform, 357 expressions=args, 358 command_script=command_script, 359 schema=schema, 360 row_format_before=row_format_before, 361 record_writer=record_writer, 362 row_format_after=row_format_after, 363 record_reader=record_reader, 364 ) 365 366 def _parse_types( 367 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 368 ) -> t.Optional[exp.Expression]: 369 """ 370 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 371 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 372 373 spark-sql (default)> select cast(1234 as varchar(2)); 374 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 375 char/varchar type and simply treats them as string type. Please use string type 376 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 377 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 378 379 1234 380 Time taken: 4.265 seconds, Fetched 1 row(s) 381 382 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 383 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 384 385 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 386 """ 387 this = super()._parse_types( 388 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 389 ) 390 391 if this and not schema: 392 return this.transform( 393 lambda node: ( 394 node.replace(exp.DataType.build("text")) 395 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 396 else node 397 ), 398 copy=False, 399 ) 400 401 return this 402 403 def _parse_partition_and_order( 404 self, 405 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 406 return ( 407 ( 408 self._parse_csv(self._parse_conjunction) 409 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 410 else [] 411 ), 412 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 413 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANONYMOUS_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnonymousAggFunc'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_OVERLAPS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayOverlaps'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'ARRAY_UNIQUE_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CBRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cbrt'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'COMBINED_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedAggFunc'>>, 'COMBINED_PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedParameterizedAgg'>>, 'CONCAT': <function Parser.<lambda>>, 'CONCAT_WS': <function Parser.<lambda>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'COUNTIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATE_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <function _build_with_ignore_nulls.<locals>._parse>, 'FIRST_VALUE': <function _build_with_ignore_nulls.<locals>._parse>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'IIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_INF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'ISINF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <function build_extract_json_with_path.<locals>._builder>, 'JSON_EXTRACT_SCALAR': <function build_extract_json_with_path.<locals>._builder>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_OBJECT_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObjectAgg'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lag'>>, 'LAST': <function _build_with_ignore_nulls.<locals>._parse>, 'LAST_DAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_VALUE': <function _build_with_ignore_nulls.<locals>._parse>, 'LEAD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lead'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <function build_logarithm>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function build_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NTH_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NthValue'>>, 'NULLIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nullif'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RAND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDOM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Randn'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIMEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMPDIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMPFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToArray'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsDiff'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'TS_OR_DS_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToTime'>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixDate'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function build_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'JSON_EXTRACT_PATH_TEXT': <function build_extract_json_with_path.<locals>._builder>, 'LIKE': <function build_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function build_formatted_time.<locals>._builder>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function build_formatted_time.<locals>._builder>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function build_formatted_time.<locals>._builder>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'CONTAINS': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INHERITS': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MODIFIES': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'READS': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'SYSTEM_VERSIONING': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
415 class Generator(generator.Generator): 416 LIMIT_FETCH = "LIMIT" 417 TABLESAMPLE_WITH_METHOD = False 418 JOIN_HINTS = False 419 TABLE_HINTS = False 420 QUERY_HINTS = False 421 INDEX_ON = "ON TABLE" 422 EXTRACT_ALLOWS_QUOTES = False 423 NVL2_SUPPORTED = False 424 LAST_DAY_SUPPORTS_DATE_PART = False 425 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 426 427 EXPRESSIONS_WITHOUT_NESTED_CTES = { 428 exp.Insert, 429 exp.Select, 430 exp.Subquery, 431 exp.Union, 432 } 433 434 SUPPORTED_JSON_PATH_PARTS = { 435 exp.JSONPathKey, 436 exp.JSONPathRoot, 437 exp.JSONPathSubscript, 438 exp.JSONPathWildcard, 439 } 440 441 TYPE_MAPPING = { 442 **generator.Generator.TYPE_MAPPING, 443 exp.DataType.Type.BIT: "BOOLEAN", 444 exp.DataType.Type.DATETIME: "TIMESTAMP", 445 exp.DataType.Type.TEXT: "STRING", 446 exp.DataType.Type.TIME: "TIMESTAMP", 447 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 448 exp.DataType.Type.VARBINARY: "BINARY", 449 } 450 451 TRANSFORMS = { 452 **generator.Generator.TRANSFORMS, 453 exp.Group: transforms.preprocess([transforms.unalias_group]), 454 exp.Select: transforms.preprocess( 455 [ 456 transforms.eliminate_qualify, 457 transforms.eliminate_distinct_on, 458 transforms.unnest_to_explode, 459 ] 460 ), 461 exp.Property: _property_sql, 462 exp.AnyValue: rename_func("FIRST"), 463 exp.ApproxDistinct: approx_count_distinct_sql, 464 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 465 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 466 exp.ArrayConcat: rename_func("CONCAT"), 467 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 468 exp.ArraySize: rename_func("SIZE"), 469 exp.ArraySort: _array_sort_sql, 470 exp.With: no_recursive_cte_sql, 471 exp.DateAdd: _add_date_sql, 472 exp.DateDiff: _date_diff_sql, 473 exp.DateStrToDate: datestrtodate_sql, 474 exp.DateSub: _add_date_sql, 475 exp.DateToDi: lambda self, 476 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 477 exp.DiToDate: lambda self, 478 e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 479 exp.FileFormatProperty: lambda self, 480 e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 481 exp.FromBase64: rename_func("UNBASE64"), 482 exp.If: if_sql(), 483 exp.ILike: no_ilike_sql, 484 exp.IsNan: rename_func("ISNAN"), 485 exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression), 486 exp.JSONExtractScalar: lambda self, e: self.func( 487 "GET_JSON_OBJECT", e.this, e.expression 488 ), 489 exp.JSONFormat: _json_format_sql, 490 exp.Left: left_to_substring_sql, 491 exp.Map: var_map_sql, 492 exp.Max: max_or_greatest, 493 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 494 exp.Min: min_or_least, 495 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 496 exp.NotNullColumnConstraint: lambda _, e: ( 497 "" if e.args.get("allow_null") else "NOT NULL" 498 ), 499 exp.VarMap: var_map_sql, 500 exp.Create: preprocess( 501 [ 502 remove_unique_constraints, 503 ctas_with_tmp_tables_to_create_tmp_view, 504 move_schema_columns_to_partitioned_by, 505 ] 506 ), 507 exp.Quantile: rename_func("PERCENTILE"), 508 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 509 exp.RegexpExtract: regexp_extract_sql, 510 exp.RegexpReplace: regexp_replace_sql, 511 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 512 exp.RegexpSplit: rename_func("SPLIT"), 513 exp.Right: right_to_substring_sql, 514 exp.SafeDivide: no_safe_divide_sql, 515 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 516 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 517 exp.Split: lambda self, e: self.func( 518 "SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression) 519 ), 520 exp.StrPosition: strposition_to_locate_sql, 521 exp.StrToDate: _str_to_date_sql, 522 exp.StrToTime: _str_to_time_sql, 523 exp.StrToUnix: _str_to_unix_sql, 524 exp.StructExtract: struct_extract_sql, 525 exp.TimeStrToDate: rename_func("TO_DATE"), 526 exp.TimeStrToTime: timestrtotime_sql, 527 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 528 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 529 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 530 exp.ToBase64: rename_func("BASE64"), 531 exp.TsOrDiToDi: lambda self, 532 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 533 exp.TsOrDsAdd: _add_date_sql, 534 exp.TsOrDsDiff: _date_diff_sql, 535 exp.TsOrDsToDate: _to_date_sql, 536 exp.TryCast: no_trycast_sql, 537 exp.UnixToStr: lambda self, e: self.func( 538 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 539 ), 540 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 541 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 542 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 543 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 544 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 545 exp.National: lambda self, e: self.national_sql(e, prefix=""), 546 exp.ClusteredColumnConstraint: lambda self, 547 e: f"({self.expressions(e, 'this', indent=False)})", 548 exp.NonClusteredColumnConstraint: lambda self, 549 e: f"({self.expressions(e, 'this', indent=False)})", 550 exp.NotForReplicationColumnConstraint: lambda *_: "", 551 exp.OnProperty: lambda *_: "", 552 exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY", 553 } 554 555 PROPERTIES_LOCATION = { 556 **generator.Generator.PROPERTIES_LOCATION, 557 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 558 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 559 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 560 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 561 } 562 563 def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str: 564 if isinstance(expression.this, exp.JSONPathWildcard): 565 self.unsupported("Unsupported wildcard in JSONPathKey expression") 566 return "" 567 568 return super()._jsonpathkey_sql(expression) 569 570 def parameter_sql(self, expression: exp.Parameter) -> str: 571 this = self.sql(expression, "this") 572 expression_sql = self.sql(expression, "expression") 573 574 parent = expression.parent 575 this = f"{this}:{expression_sql}" if expression_sql else this 576 577 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 578 # We need to produce SET key = value instead of SET ${key} = value 579 return this 580 581 return f"${{{this}}}" 582 583 def schema_sql(self, expression: exp.Schema) -> str: 584 for ordered in expression.find_all(exp.Ordered): 585 if ordered.args.get("desc") is False: 586 ordered.set("desc", None) 587 588 return super().schema_sql(expression) 589 590 def constraint_sql(self, expression: exp.Constraint) -> str: 591 for prop in list(expression.find_all(exp.Properties)): 592 prop.pop() 593 594 this = self.sql(expression, "this") 595 expressions = self.expressions(expression, sep=" ", flat=True) 596 return f"CONSTRAINT {this} {expressions}" 597 598 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 599 serde_props = self.sql(expression, "serde_properties") 600 serde_props = f" {serde_props}" if serde_props else "" 601 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 602 603 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 604 return self.func( 605 "COLLECT_LIST", 606 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 607 ) 608 609 def with_properties(self, properties: exp.Properties) -> str: 610 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 611 612 def datatype_sql(self, expression: exp.DataType) -> str: 613 if ( 614 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 615 and not expression.expressions 616 ): 617 expression = exp.DataType.build("text") 618 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 619 expression.set("this", exp.DataType.Type.VARCHAR) 620 elif expression.this in exp.DataType.TEMPORAL_TYPES: 621 expression = exp.DataType.build(expression.this) 622 elif expression.is_type("float"): 623 size_expression = expression.find(exp.DataTypeParam) 624 if size_expression: 625 size = int(size_expression.name) 626 expression = ( 627 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 628 ) 629 630 return super().datatype_sql(expression) 631 632 def version_sql(self, expression: exp.Version) -> str: 633 sql = super().version_sql(expression) 634 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
EXPRESSIONS_WITHOUT_NESTED_CTES =
{<class 'sqlglot.expressions.Insert'>, <class 'sqlglot.expressions.Subquery'>, <class 'sqlglot.expressions.Union'>, <class 'sqlglot.expressions.Select'>}
SUPPORTED_JSON_PATH_PARTS =
{<class 'sqlglot.expressions.JSONPathKey'>, <class 'sqlglot.expressions.JSONPathSubscript'>, <class 'sqlglot.expressions.JSONPathWildcard'>, <class 'sqlglot.expressions.JSONPathRoot'>}
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.JSONPathKey'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathRoot'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathSubscript'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathWildcard'>: <function <lambda>>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InheritsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetConfigProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Timestamp'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function datestrtodate_sql>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArrayUniqueAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InheritsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedOfProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SetConfigProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.WithSystemVersioningProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>}
570 def parameter_sql(self, expression: exp.Parameter) -> str: 571 this = self.sql(expression, "this") 572 expression_sql = self.sql(expression, "expression") 573 574 parent = expression.parent 575 this = f"{this}:{expression_sql}" if expression_sql else this 576 577 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 578 # We need to produce SET key = value instead of SET ${key} = value 579 return this 580 581 return f"${{{this}}}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
612 def datatype_sql(self, expression: exp.DataType) -> str: 613 if ( 614 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 615 and not expression.expressions 616 ): 617 expression = exp.DataType.build("text") 618 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 619 expression.set("this", exp.DataType.Type.VARCHAR) 620 elif expression.this in exp.DataType.TEMPORAL_TYPES: 621 expression = exp.DataType.build(expression.this) 622 elif expression.is_type("float"): 623 size_expression = expression.find(exp.DataTypeParam) 624 if size_expression: 625 size = int(size_expression.name) 626 expression = ( 627 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 628 ) 629 630 return super().datatype_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- CAN_IMPLEMENT_ARRAY_ANY
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql