sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 DATE_ADD_OR_SUB, 8 Dialect, 9 NormalizationStrategy, 10 approx_count_distinct_sql, 11 arg_max_or_min_no_count, 12 create_with_partitions_sql, 13 datestrtodate_sql, 14 format_time_lambda, 15 if_sql, 16 is_parse_json, 17 left_to_substring_sql, 18 locate_to_strposition, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 no_recursive_cte_sql, 23 no_safe_divide_sql, 24 no_trycast_sql, 25 regexp_extract_sql, 26 regexp_replace_sql, 27 rename_func, 28 right_to_substring_sql, 29 strposition_to_locate_sql, 30 struct_extract_sql, 31 time_format, 32 timestrtotime_sql, 33 var_map_sql, 34) 35from sqlglot.helper import seq_get 36from sqlglot.parser import parse_var_map 37from sqlglot.tokens import TokenType 38 39# (FuncType, Multiplier) 40DATE_DELTA_INTERVAL = { 41 "YEAR": ("ADD_MONTHS", 12), 42 "MONTH": ("ADD_MONTHS", 1), 43 "QUARTER": ("ADD_MONTHS", 3), 44 "WEEK": ("DATE_ADD", 7), 45 "DAY": ("DATE_ADD", 1), 46} 47 48TIME_DIFF_FACTOR = { 49 "MILLISECOND": " * 1000", 50 "SECOND": "", 51 "MINUTE": " / 60", 52 "HOUR": " / 3600", 53} 54 55DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 56 57 58def _create_sql(self, expression: exp.Create) -> str: 59 # remove UNIQUE column constraints 60 for constraint in expression.find_all(exp.UniqueColumnConstraint): 61 if constraint.parent: 62 constraint.parent.pop() 63 64 properties = expression.args.get("properties") 65 temporary = any( 66 isinstance(prop, exp.TemporaryProperty) 67 for prop in (properties.expressions if properties else []) 68 ) 69 70 # CTAS with temp tables map to CREATE TEMPORARY VIEW 71 kind = expression.args["kind"] 72 if kind.upper() == "TABLE" and temporary: 73 if expression.expression: 74 return f"CREATE TEMPORARY VIEW {self.sql(expression, 'this')} AS {self.sql(expression, 'expression')}" 75 else: 76 # CREATE TEMPORARY TABLE may require storage provider 77 expression = self.temporary_storage_provider(expression) 78 79 return create_with_partitions_sql(self, expression) 80 81 82def _add_date_sql(self: Hive.Generator, expression: DATE_ADD_OR_SUB) -> str: 83 if isinstance(expression, exp.TsOrDsAdd) and not expression.unit: 84 return self.func("DATE_ADD", expression.this, expression.expression) 85 86 unit = expression.text("unit").upper() 87 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 88 89 if isinstance(expression, exp.DateSub): 90 multiplier *= -1 91 92 if expression.expression.is_number: 93 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 94 else: 95 modified_increment = expression.expression 96 if multiplier != 1: 97 modified_increment = exp.Mul( # type: ignore 98 this=modified_increment, expression=exp.Literal.number(multiplier) 99 ) 100 101 return self.func(func, expression.this, modified_increment) 102 103 104def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff | exp.TsOrDsDiff) -> str: 105 unit = expression.text("unit").upper() 106 107 factor = TIME_DIFF_FACTOR.get(unit) 108 if factor is not None: 109 left = self.sql(expression, "this") 110 right = self.sql(expression, "expression") 111 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 112 return f"({sec_diff}){factor}" if factor else sec_diff 113 114 months_between = unit in DIFF_MONTH_SWITCH 115 sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF" 116 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 117 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 118 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 119 120 if months_between or multiplier_sql: 121 # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part. 122 # For the same reason, we want to truncate if there's a divisor present. 123 diff_sql = f"CAST({diff_sql}{multiplier_sql} AS INT)" 124 125 return diff_sql 126 127 128def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 129 this = expression.this 130 131 if is_parse_json(this): 132 if this.this.is_string: 133 # Since FROM_JSON requires a nested type, we always wrap the json string with 134 # an array to ensure that "naked" strings like "'a'" will be handled correctly 135 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 136 137 from_json = self.func( 138 "FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json) 139 ) 140 to_json = self.func("TO_JSON", from_json) 141 142 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 143 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 144 return self.sql(this) 145 146 return self.func("TO_JSON", this, expression.args.get("options")) 147 148 149def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 150 if expression.expression: 151 self.unsupported("Hive SORT_ARRAY does not support a comparator") 152 return f"SORT_ARRAY({self.sql(expression, 'this')})" 153 154 155def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 156 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 157 158 159def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 160 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 161 162 163def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 164 this = self.sql(expression, "this") 165 time_format = self.format_time(expression) 166 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 167 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 168 return f"CAST({this} AS DATE)" 169 170 171def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 172 this = self.sql(expression, "this") 173 time_format = self.format_time(expression) 174 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 175 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 176 return f"CAST({this} AS TIMESTAMP)" 177 178 179def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 180 this = self.sql(expression, "this") 181 time_format = self.format_time(expression) 182 return f"DATE_FORMAT({this}, {time_format})" 183 184 185def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 186 this = self.sql(expression, "this") 187 time_format = self.format_time(expression) 188 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 189 return f"TO_DATE({this}, {time_format})" 190 if isinstance(expression.this, exp.TsOrDsToDate): 191 return this 192 return f"TO_DATE({this})" 193 194 195class Hive(Dialect): 196 ALIAS_POST_TABLESAMPLE = True 197 IDENTIFIERS_CAN_START_WITH_DIGIT = True 198 SUPPORTS_USER_DEFINED_TYPES = False 199 SAFE_DIVISION = True 200 201 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 202 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 203 204 TIME_MAPPING = { 205 "y": "%Y", 206 "Y": "%Y", 207 "YYYY": "%Y", 208 "yyyy": "%Y", 209 "YY": "%y", 210 "yy": "%y", 211 "MMMM": "%B", 212 "MMM": "%b", 213 "MM": "%m", 214 "M": "%-m", 215 "dd": "%d", 216 "d": "%-d", 217 "HH": "%H", 218 "H": "%-H", 219 "hh": "%I", 220 "h": "%-I", 221 "mm": "%M", 222 "m": "%-M", 223 "ss": "%S", 224 "s": "%-S", 225 "SSSSSS": "%f", 226 "a": "%p", 227 "DD": "%j", 228 "D": "%-j", 229 "E": "%a", 230 "EE": "%a", 231 "EEE": "%a", 232 "EEEE": "%A", 233 } 234 235 DATE_FORMAT = "'yyyy-MM-dd'" 236 DATEINT_FORMAT = "'yyyyMMdd'" 237 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 238 239 class Tokenizer(tokens.Tokenizer): 240 QUOTES = ["'", '"'] 241 IDENTIFIERS = ["`"] 242 STRING_ESCAPES = ["\\"] 243 244 SINGLE_TOKENS = { 245 **tokens.Tokenizer.SINGLE_TOKENS, 246 "$": TokenType.PARAMETER, 247 } 248 249 KEYWORDS = { 250 **tokens.Tokenizer.KEYWORDS, 251 "ADD ARCHIVE": TokenType.COMMAND, 252 "ADD ARCHIVES": TokenType.COMMAND, 253 "ADD FILE": TokenType.COMMAND, 254 "ADD FILES": TokenType.COMMAND, 255 "ADD JAR": TokenType.COMMAND, 256 "ADD JARS": TokenType.COMMAND, 257 "MSCK REPAIR": TokenType.COMMAND, 258 "REFRESH": TokenType.REFRESH, 259 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 260 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 261 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 262 } 263 264 NUMERIC_LITERALS = { 265 "L": "BIGINT", 266 "S": "SMALLINT", 267 "Y": "TINYINT", 268 "D": "DOUBLE", 269 "F": "FLOAT", 270 "BD": "DECIMAL", 271 } 272 273 class Parser(parser.Parser): 274 LOG_DEFAULTS_TO_LN = True 275 STRICT_CAST = False 276 277 FUNCTIONS = { 278 **parser.Parser.FUNCTIONS, 279 "BASE64": exp.ToBase64.from_arg_list, 280 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 281 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 282 "DATE_ADD": lambda args: exp.TsOrDsAdd( 283 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 284 ), 285 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 286 [ 287 exp.TimeStrToTime(this=seq_get(args, 0)), 288 seq_get(args, 1), 289 ] 290 ), 291 "DATE_SUB": lambda args: exp.TsOrDsAdd( 292 this=seq_get(args, 0), 293 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 294 unit=exp.Literal.string("DAY"), 295 ), 296 "DATEDIFF": lambda args: exp.DateDiff( 297 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 298 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 299 ), 300 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 301 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 302 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 303 "LOCATE": locate_to_strposition, 304 "MAP": parse_var_map, 305 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 306 "PERCENTILE": exp.Quantile.from_arg_list, 307 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 308 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 309 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 310 ), 311 "SIZE": exp.ArraySize.from_arg_list, 312 "SPLIT": exp.RegexpSplit.from_arg_list, 313 "STR_TO_MAP": lambda args: exp.StrToMap( 314 this=seq_get(args, 0), 315 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 316 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 317 ), 318 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 319 "TO_JSON": exp.JSONFormat.from_arg_list, 320 "UNBASE64": exp.FromBase64.from_arg_list, 321 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 322 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 323 } 324 325 NO_PAREN_FUNCTION_PARSERS = { 326 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 327 "TRANSFORM": lambda self: self._parse_transform(), 328 } 329 330 PROPERTY_PARSERS = { 331 **parser.Parser.PROPERTY_PARSERS, 332 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 333 expressions=self._parse_wrapped_csv(self._parse_property) 334 ), 335 } 336 337 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 338 if not self._match(TokenType.L_PAREN, advance=False): 339 self._retreat(self._index - 1) 340 return None 341 342 args = self._parse_wrapped_csv(self._parse_lambda) 343 row_format_before = self._parse_row_format(match_row=True) 344 345 record_writer = None 346 if self._match_text_seq("RECORDWRITER"): 347 record_writer = self._parse_string() 348 349 if not self._match(TokenType.USING): 350 return exp.Transform.from_arg_list(args) 351 352 command_script = self._parse_string() 353 354 self._match(TokenType.ALIAS) 355 schema = self._parse_schema() 356 357 row_format_after = self._parse_row_format(match_row=True) 358 record_reader = None 359 if self._match_text_seq("RECORDREADER"): 360 record_reader = self._parse_string() 361 362 return self.expression( 363 exp.QueryTransform, 364 expressions=args, 365 command_script=command_script, 366 schema=schema, 367 row_format_before=row_format_before, 368 record_writer=record_writer, 369 row_format_after=row_format_after, 370 record_reader=record_reader, 371 ) 372 373 def _parse_types( 374 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 375 ) -> t.Optional[exp.Expression]: 376 """ 377 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 378 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 379 380 spark-sql (default)> select cast(1234 as varchar(2)); 381 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 382 char/varchar type and simply treats them as string type. Please use string type 383 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 384 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 385 386 1234 387 Time taken: 4.265 seconds, Fetched 1 row(s) 388 389 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 390 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 391 392 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 393 """ 394 this = super()._parse_types( 395 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 396 ) 397 398 if this and not schema: 399 return this.transform( 400 lambda node: ( 401 node.replace(exp.DataType.build("text")) 402 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 403 else node 404 ), 405 copy=False, 406 ) 407 408 return this 409 410 def _parse_partition_and_order( 411 self, 412 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 413 return ( 414 ( 415 self._parse_csv(self._parse_conjunction) 416 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 417 else [] 418 ), 419 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 420 ) 421 422 class Generator(generator.Generator): 423 LIMIT_FETCH = "LIMIT" 424 TABLESAMPLE_WITH_METHOD = False 425 JOIN_HINTS = False 426 TABLE_HINTS = False 427 QUERY_HINTS = False 428 INDEX_ON = "ON TABLE" 429 EXTRACT_ALLOWS_QUOTES = False 430 NVL2_SUPPORTED = False 431 LAST_DAY_SUPPORTS_DATE_PART = False 432 433 EXPRESSIONS_WITHOUT_NESTED_CTES = { 434 exp.Insert, 435 exp.Select, 436 exp.Subquery, 437 exp.Union, 438 } 439 440 TYPE_MAPPING = { 441 **generator.Generator.TYPE_MAPPING, 442 exp.DataType.Type.BIT: "BOOLEAN", 443 exp.DataType.Type.DATETIME: "TIMESTAMP", 444 exp.DataType.Type.TEXT: "STRING", 445 exp.DataType.Type.TIME: "TIMESTAMP", 446 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 447 exp.DataType.Type.VARBINARY: "BINARY", 448 } 449 450 TRANSFORMS = { 451 **generator.Generator.TRANSFORMS, 452 exp.Group: transforms.preprocess([transforms.unalias_group]), 453 exp.Select: transforms.preprocess( 454 [ 455 transforms.eliminate_qualify, 456 transforms.eliminate_distinct_on, 457 transforms.unnest_to_explode, 458 ] 459 ), 460 exp.Property: _property_sql, 461 exp.AnyValue: rename_func("FIRST"), 462 exp.ApproxDistinct: approx_count_distinct_sql, 463 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 464 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 465 exp.ArrayConcat: rename_func("CONCAT"), 466 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 467 exp.ArraySize: rename_func("SIZE"), 468 exp.ArraySort: _array_sort_sql, 469 exp.With: no_recursive_cte_sql, 470 exp.DateAdd: _add_date_sql, 471 exp.DateDiff: _date_diff_sql, 472 exp.DateStrToDate: datestrtodate_sql, 473 exp.DateSub: _add_date_sql, 474 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 475 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 476 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 477 exp.FromBase64: rename_func("UNBASE64"), 478 exp.If: if_sql(), 479 exp.ILike: no_ilike_sql, 480 exp.IsNan: rename_func("ISNAN"), 481 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 482 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 483 exp.JSONFormat: _json_format_sql, 484 exp.Left: left_to_substring_sql, 485 exp.Map: var_map_sql, 486 exp.Max: max_or_greatest, 487 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 488 exp.Min: min_or_least, 489 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 490 exp.NotNullColumnConstraint: lambda self, e: ( 491 "" if e.args.get("allow_null") else "NOT NULL" 492 ), 493 exp.VarMap: var_map_sql, 494 exp.Create: _create_sql, 495 exp.Quantile: rename_func("PERCENTILE"), 496 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 497 exp.RegexpExtract: regexp_extract_sql, 498 exp.RegexpReplace: regexp_replace_sql, 499 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 500 exp.RegexpSplit: rename_func("SPLIT"), 501 exp.Right: right_to_substring_sql, 502 exp.SafeDivide: no_safe_divide_sql, 503 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 504 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 505 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 506 exp.StrPosition: strposition_to_locate_sql, 507 exp.StrToDate: _str_to_date_sql, 508 exp.StrToTime: _str_to_time_sql, 509 exp.StrToUnix: _str_to_unix_sql, 510 exp.StructExtract: struct_extract_sql, 511 exp.TimeStrToDate: rename_func("TO_DATE"), 512 exp.TimeStrToTime: timestrtotime_sql, 513 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 514 exp.TimeToStr: _time_to_str, 515 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 516 exp.ToBase64: rename_func("BASE64"), 517 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 518 exp.TsOrDsAdd: _add_date_sql, 519 exp.TsOrDsDiff: _date_diff_sql, 520 exp.TsOrDsToDate: _to_date_sql, 521 exp.TryCast: no_trycast_sql, 522 exp.UnixToStr: lambda self, e: self.func( 523 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 524 ), 525 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 526 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 527 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 528 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 529 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 530 exp.National: lambda self, e: self.national_sql(e, prefix=""), 531 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 532 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 533 exp.NotForReplicationColumnConstraint: lambda self, e: "", 534 exp.OnProperty: lambda self, e: "", 535 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 536 } 537 538 PROPERTIES_LOCATION = { 539 **generator.Generator.PROPERTIES_LOCATION, 540 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 541 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 542 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 543 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 544 } 545 546 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 547 # Hive has no temporary storage provider (there are hive settings though) 548 return expression 549 550 def parameter_sql(self, expression: exp.Parameter) -> str: 551 this = self.sql(expression, "this") 552 expression_sql = self.sql(expression, "expression") 553 554 parent = expression.parent 555 this = f"{this}:{expression_sql}" if expression_sql else this 556 557 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 558 # We need to produce SET key = value instead of SET ${key} = value 559 return this 560 561 return f"${{{this}}}" 562 563 def schema_sql(self, expression: exp.Schema) -> str: 564 for ordered in expression.find_all(exp.Ordered): 565 if ordered.args.get("desc") is False: 566 ordered.set("desc", None) 567 568 return super().schema_sql(expression) 569 570 def constraint_sql(self, expression: exp.Constraint) -> str: 571 for prop in list(expression.find_all(exp.Properties)): 572 prop.pop() 573 574 this = self.sql(expression, "this") 575 expressions = self.expressions(expression, sep=" ", flat=True) 576 return f"CONSTRAINT {this} {expressions}" 577 578 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 579 serde_props = self.sql(expression, "serde_properties") 580 serde_props = f" {serde_props}" if serde_props else "" 581 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 582 583 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 584 return self.func( 585 "COLLECT_LIST", 586 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 587 ) 588 589 def with_properties(self, properties: exp.Properties) -> str: 590 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 591 592 def datatype_sql(self, expression: exp.DataType) -> str: 593 if ( 594 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 595 and not expression.expressions 596 ): 597 expression = exp.DataType.build("text") 598 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 599 expression.set("this", exp.DataType.Type.VARCHAR) 600 elif expression.this in exp.DataType.TEMPORAL_TYPES: 601 expression = exp.DataType.build(expression.this) 602 elif expression.is_type("float"): 603 size_expression = expression.find(exp.DataTypeParam) 604 if size_expression: 605 size = int(size_expression.name) 606 expression = ( 607 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 608 ) 609 610 return super().datatype_sql(expression) 611 612 def version_sql(self, expression: exp.Version) -> str: 613 sql = super().version_sql(expression) 614 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
196class Hive(Dialect): 197 ALIAS_POST_TABLESAMPLE = True 198 IDENTIFIERS_CAN_START_WITH_DIGIT = True 199 SUPPORTS_USER_DEFINED_TYPES = False 200 SAFE_DIVISION = True 201 202 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 203 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 204 205 TIME_MAPPING = { 206 "y": "%Y", 207 "Y": "%Y", 208 "YYYY": "%Y", 209 "yyyy": "%Y", 210 "YY": "%y", 211 "yy": "%y", 212 "MMMM": "%B", 213 "MMM": "%b", 214 "MM": "%m", 215 "M": "%-m", 216 "dd": "%d", 217 "d": "%-d", 218 "HH": "%H", 219 "H": "%-H", 220 "hh": "%I", 221 "h": "%-I", 222 "mm": "%M", 223 "m": "%-M", 224 "ss": "%S", 225 "s": "%-S", 226 "SSSSSS": "%f", 227 "a": "%p", 228 "DD": "%j", 229 "D": "%-j", 230 "E": "%a", 231 "EE": "%a", 232 "EEE": "%a", 233 "EEEE": "%A", 234 } 235 236 DATE_FORMAT = "'yyyy-MM-dd'" 237 DATEINT_FORMAT = "'yyyyMMdd'" 238 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 239 240 class Tokenizer(tokens.Tokenizer): 241 QUOTES = ["'", '"'] 242 IDENTIFIERS = ["`"] 243 STRING_ESCAPES = ["\\"] 244 245 SINGLE_TOKENS = { 246 **tokens.Tokenizer.SINGLE_TOKENS, 247 "$": TokenType.PARAMETER, 248 } 249 250 KEYWORDS = { 251 **tokens.Tokenizer.KEYWORDS, 252 "ADD ARCHIVE": TokenType.COMMAND, 253 "ADD ARCHIVES": TokenType.COMMAND, 254 "ADD FILE": TokenType.COMMAND, 255 "ADD FILES": TokenType.COMMAND, 256 "ADD JAR": TokenType.COMMAND, 257 "ADD JARS": TokenType.COMMAND, 258 "MSCK REPAIR": TokenType.COMMAND, 259 "REFRESH": TokenType.REFRESH, 260 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 261 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 262 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 263 } 264 265 NUMERIC_LITERALS = { 266 "L": "BIGINT", 267 "S": "SMALLINT", 268 "Y": "TINYINT", 269 "D": "DOUBLE", 270 "F": "FLOAT", 271 "BD": "DECIMAL", 272 } 273 274 class Parser(parser.Parser): 275 LOG_DEFAULTS_TO_LN = True 276 STRICT_CAST = False 277 278 FUNCTIONS = { 279 **parser.Parser.FUNCTIONS, 280 "BASE64": exp.ToBase64.from_arg_list, 281 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 282 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 283 "DATE_ADD": lambda args: exp.TsOrDsAdd( 284 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 285 ), 286 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 287 [ 288 exp.TimeStrToTime(this=seq_get(args, 0)), 289 seq_get(args, 1), 290 ] 291 ), 292 "DATE_SUB": lambda args: exp.TsOrDsAdd( 293 this=seq_get(args, 0), 294 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 295 unit=exp.Literal.string("DAY"), 296 ), 297 "DATEDIFF": lambda args: exp.DateDiff( 298 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 299 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 300 ), 301 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 302 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 303 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 304 "LOCATE": locate_to_strposition, 305 "MAP": parse_var_map, 306 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 307 "PERCENTILE": exp.Quantile.from_arg_list, 308 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 309 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 310 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 311 ), 312 "SIZE": exp.ArraySize.from_arg_list, 313 "SPLIT": exp.RegexpSplit.from_arg_list, 314 "STR_TO_MAP": lambda args: exp.StrToMap( 315 this=seq_get(args, 0), 316 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 317 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 318 ), 319 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 320 "TO_JSON": exp.JSONFormat.from_arg_list, 321 "UNBASE64": exp.FromBase64.from_arg_list, 322 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 323 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 324 } 325 326 NO_PAREN_FUNCTION_PARSERS = { 327 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 328 "TRANSFORM": lambda self: self._parse_transform(), 329 } 330 331 PROPERTY_PARSERS = { 332 **parser.Parser.PROPERTY_PARSERS, 333 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 334 expressions=self._parse_wrapped_csv(self._parse_property) 335 ), 336 } 337 338 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 339 if not self._match(TokenType.L_PAREN, advance=False): 340 self._retreat(self._index - 1) 341 return None 342 343 args = self._parse_wrapped_csv(self._parse_lambda) 344 row_format_before = self._parse_row_format(match_row=True) 345 346 record_writer = None 347 if self._match_text_seq("RECORDWRITER"): 348 record_writer = self._parse_string() 349 350 if not self._match(TokenType.USING): 351 return exp.Transform.from_arg_list(args) 352 353 command_script = self._parse_string() 354 355 self._match(TokenType.ALIAS) 356 schema = self._parse_schema() 357 358 row_format_after = self._parse_row_format(match_row=True) 359 record_reader = None 360 if self._match_text_seq("RECORDREADER"): 361 record_reader = self._parse_string() 362 363 return self.expression( 364 exp.QueryTransform, 365 expressions=args, 366 command_script=command_script, 367 schema=schema, 368 row_format_before=row_format_before, 369 record_writer=record_writer, 370 row_format_after=row_format_after, 371 record_reader=record_reader, 372 ) 373 374 def _parse_types( 375 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 376 ) -> t.Optional[exp.Expression]: 377 """ 378 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 379 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 380 381 spark-sql (default)> select cast(1234 as varchar(2)); 382 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 383 char/varchar type and simply treats them as string type. Please use string type 384 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 385 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 386 387 1234 388 Time taken: 4.265 seconds, Fetched 1 row(s) 389 390 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 391 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 392 393 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 394 """ 395 this = super()._parse_types( 396 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 397 ) 398 399 if this and not schema: 400 return this.transform( 401 lambda node: ( 402 node.replace(exp.DataType.build("text")) 403 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 404 else node 405 ), 406 copy=False, 407 ) 408 409 return this 410 411 def _parse_partition_and_order( 412 self, 413 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 414 return ( 415 ( 416 self._parse_csv(self._parse_conjunction) 417 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 418 else [] 419 ), 420 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 421 ) 422 423 class Generator(generator.Generator): 424 LIMIT_FETCH = "LIMIT" 425 TABLESAMPLE_WITH_METHOD = False 426 JOIN_HINTS = False 427 TABLE_HINTS = False 428 QUERY_HINTS = False 429 INDEX_ON = "ON TABLE" 430 EXTRACT_ALLOWS_QUOTES = False 431 NVL2_SUPPORTED = False 432 LAST_DAY_SUPPORTS_DATE_PART = False 433 434 EXPRESSIONS_WITHOUT_NESTED_CTES = { 435 exp.Insert, 436 exp.Select, 437 exp.Subquery, 438 exp.Union, 439 } 440 441 TYPE_MAPPING = { 442 **generator.Generator.TYPE_MAPPING, 443 exp.DataType.Type.BIT: "BOOLEAN", 444 exp.DataType.Type.DATETIME: "TIMESTAMP", 445 exp.DataType.Type.TEXT: "STRING", 446 exp.DataType.Type.TIME: "TIMESTAMP", 447 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 448 exp.DataType.Type.VARBINARY: "BINARY", 449 } 450 451 TRANSFORMS = { 452 **generator.Generator.TRANSFORMS, 453 exp.Group: transforms.preprocess([transforms.unalias_group]), 454 exp.Select: transforms.preprocess( 455 [ 456 transforms.eliminate_qualify, 457 transforms.eliminate_distinct_on, 458 transforms.unnest_to_explode, 459 ] 460 ), 461 exp.Property: _property_sql, 462 exp.AnyValue: rename_func("FIRST"), 463 exp.ApproxDistinct: approx_count_distinct_sql, 464 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 465 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 466 exp.ArrayConcat: rename_func("CONCAT"), 467 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 468 exp.ArraySize: rename_func("SIZE"), 469 exp.ArraySort: _array_sort_sql, 470 exp.With: no_recursive_cte_sql, 471 exp.DateAdd: _add_date_sql, 472 exp.DateDiff: _date_diff_sql, 473 exp.DateStrToDate: datestrtodate_sql, 474 exp.DateSub: _add_date_sql, 475 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 476 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 477 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 478 exp.FromBase64: rename_func("UNBASE64"), 479 exp.If: if_sql(), 480 exp.ILike: no_ilike_sql, 481 exp.IsNan: rename_func("ISNAN"), 482 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 483 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 484 exp.JSONFormat: _json_format_sql, 485 exp.Left: left_to_substring_sql, 486 exp.Map: var_map_sql, 487 exp.Max: max_or_greatest, 488 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 489 exp.Min: min_or_least, 490 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 491 exp.NotNullColumnConstraint: lambda self, e: ( 492 "" if e.args.get("allow_null") else "NOT NULL" 493 ), 494 exp.VarMap: var_map_sql, 495 exp.Create: _create_sql, 496 exp.Quantile: rename_func("PERCENTILE"), 497 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 498 exp.RegexpExtract: regexp_extract_sql, 499 exp.RegexpReplace: regexp_replace_sql, 500 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 501 exp.RegexpSplit: rename_func("SPLIT"), 502 exp.Right: right_to_substring_sql, 503 exp.SafeDivide: no_safe_divide_sql, 504 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 505 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 506 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 507 exp.StrPosition: strposition_to_locate_sql, 508 exp.StrToDate: _str_to_date_sql, 509 exp.StrToTime: _str_to_time_sql, 510 exp.StrToUnix: _str_to_unix_sql, 511 exp.StructExtract: struct_extract_sql, 512 exp.TimeStrToDate: rename_func("TO_DATE"), 513 exp.TimeStrToTime: timestrtotime_sql, 514 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 515 exp.TimeToStr: _time_to_str, 516 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 517 exp.ToBase64: rename_func("BASE64"), 518 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 519 exp.TsOrDsAdd: _add_date_sql, 520 exp.TsOrDsDiff: _date_diff_sql, 521 exp.TsOrDsToDate: _to_date_sql, 522 exp.TryCast: no_trycast_sql, 523 exp.UnixToStr: lambda self, e: self.func( 524 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 525 ), 526 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 527 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 528 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 529 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 530 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 531 exp.National: lambda self, e: self.national_sql(e, prefix=""), 532 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 533 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 534 exp.NotForReplicationColumnConstraint: lambda self, e: "", 535 exp.OnProperty: lambda self, e: "", 536 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 537 } 538 539 PROPERTIES_LOCATION = { 540 **generator.Generator.PROPERTIES_LOCATION, 541 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 542 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 543 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 544 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 545 } 546 547 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 548 # Hive has no temporary storage provider (there are hive settings though) 549 return expression 550 551 def parameter_sql(self, expression: exp.Parameter) -> str: 552 this = self.sql(expression, "this") 553 expression_sql = self.sql(expression, "expression") 554 555 parent = expression.parent 556 this = f"{this}:{expression_sql}" if expression_sql else this 557 558 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 559 # We need to produce SET key = value instead of SET ${key} = value 560 return this 561 562 return f"${{{this}}}" 563 564 def schema_sql(self, expression: exp.Schema) -> str: 565 for ordered in expression.find_all(exp.Ordered): 566 if ordered.args.get("desc") is False: 567 ordered.set("desc", None) 568 569 return super().schema_sql(expression) 570 571 def constraint_sql(self, expression: exp.Constraint) -> str: 572 for prop in list(expression.find_all(exp.Properties)): 573 prop.pop() 574 575 this = self.sql(expression, "this") 576 expressions = self.expressions(expression, sep=" ", flat=True) 577 return f"CONSTRAINT {this} {expressions}" 578 579 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 580 serde_props = self.sql(expression, "serde_properties") 581 serde_props = f" {serde_props}" if serde_props else "" 582 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 583 584 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 585 return self.func( 586 "COLLECT_LIST", 587 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 588 ) 589 590 def with_properties(self, properties: exp.Properties) -> str: 591 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 592 593 def datatype_sql(self, expression: exp.DataType) -> str: 594 if ( 595 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 596 and not expression.expressions 597 ): 598 expression = exp.DataType.build("text") 599 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 600 expression.set("this", exp.DataType.Type.VARCHAR) 601 elif expression.this in exp.DataType.TEMPORAL_TYPES: 602 expression = exp.DataType.build(expression.this) 603 elif expression.is_type("float"): 604 size_expression = expression.find(exp.DataTypeParam) 605 if size_expression: 606 size = int(size_expression.name) 607 expression = ( 608 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 609 ) 610 611 return super().datatype_sql(expression) 612 613 def version_sql(self, expression: exp.Version) -> str: 614 sql = super().version_sql(expression) 615 return sql.replace("FOR ", "", 1)
IDENTIFIERS_CAN_START_WITH_DIGIT =
True
Determines whether or not an unquoted identifier can start with a digit.
SUPPORTS_USER_DEFINED_TYPES =
False
Determines whether or not user-defined data types are supported.
SAFE_DIVISION =
True
Determines whether division by zero throws an error (False
) or returns NULL (True
).
NORMALIZATION_STRATEGY =
<NormalizationStrategy.CASE_INSENSITIVE: 'CASE_INSENSITIVE'>
Specifies the strategy according to which identifiers should be normalized.
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
Associates this dialect's time formats with their equivalent Python strftime
format.
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- TABLESAMPLE_SIZE_IS_PERCENT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- TYPED_DIVISION
- CONCAT_COALESCE
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
240 class Tokenizer(tokens.Tokenizer): 241 QUOTES = ["'", '"'] 242 IDENTIFIERS = ["`"] 243 STRING_ESCAPES = ["\\"] 244 245 SINGLE_TOKENS = { 246 **tokens.Tokenizer.SINGLE_TOKENS, 247 "$": TokenType.PARAMETER, 248 } 249 250 KEYWORDS = { 251 **tokens.Tokenizer.KEYWORDS, 252 "ADD ARCHIVE": TokenType.COMMAND, 253 "ADD ARCHIVES": TokenType.COMMAND, 254 "ADD FILE": TokenType.COMMAND, 255 "ADD FILES": TokenType.COMMAND, 256 "ADD JAR": TokenType.COMMAND, 257 "ADD JARS": TokenType.COMMAND, 258 "MSCK REPAIR": TokenType.COMMAND, 259 "REFRESH": TokenType.REFRESH, 260 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 261 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 262 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 263 } 264 265 NUMERIC_LITERALS = { 266 "L": "BIGINT", 267 "S": "SMALLINT", 268 "Y": "TINYINT", 269 "D": "DOUBLE", 270 "F": "FLOAT", 271 "BD": "DECIMAL", 272 }
SINGLE_TOKENS =
{'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '#': <TokenType.HASH: 'HASH'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
274 class Parser(parser.Parser): 275 LOG_DEFAULTS_TO_LN = True 276 STRICT_CAST = False 277 278 FUNCTIONS = { 279 **parser.Parser.FUNCTIONS, 280 "BASE64": exp.ToBase64.from_arg_list, 281 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 282 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 283 "DATE_ADD": lambda args: exp.TsOrDsAdd( 284 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 285 ), 286 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 287 [ 288 exp.TimeStrToTime(this=seq_get(args, 0)), 289 seq_get(args, 1), 290 ] 291 ), 292 "DATE_SUB": lambda args: exp.TsOrDsAdd( 293 this=seq_get(args, 0), 294 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 295 unit=exp.Literal.string("DAY"), 296 ), 297 "DATEDIFF": lambda args: exp.DateDiff( 298 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 299 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 300 ), 301 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 302 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 303 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 304 "LOCATE": locate_to_strposition, 305 "MAP": parse_var_map, 306 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 307 "PERCENTILE": exp.Quantile.from_arg_list, 308 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 309 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 310 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 311 ), 312 "SIZE": exp.ArraySize.from_arg_list, 313 "SPLIT": exp.RegexpSplit.from_arg_list, 314 "STR_TO_MAP": lambda args: exp.StrToMap( 315 this=seq_get(args, 0), 316 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 317 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 318 ), 319 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 320 "TO_JSON": exp.JSONFormat.from_arg_list, 321 "UNBASE64": exp.FromBase64.from_arg_list, 322 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 323 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 324 } 325 326 NO_PAREN_FUNCTION_PARSERS = { 327 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 328 "TRANSFORM": lambda self: self._parse_transform(), 329 } 330 331 PROPERTY_PARSERS = { 332 **parser.Parser.PROPERTY_PARSERS, 333 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 334 expressions=self._parse_wrapped_csv(self._parse_property) 335 ), 336 } 337 338 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 339 if not self._match(TokenType.L_PAREN, advance=False): 340 self._retreat(self._index - 1) 341 return None 342 343 args = self._parse_wrapped_csv(self._parse_lambda) 344 row_format_before = self._parse_row_format(match_row=True) 345 346 record_writer = None 347 if self._match_text_seq("RECORDWRITER"): 348 record_writer = self._parse_string() 349 350 if not self._match(TokenType.USING): 351 return exp.Transform.from_arg_list(args) 352 353 command_script = self._parse_string() 354 355 self._match(TokenType.ALIAS) 356 schema = self._parse_schema() 357 358 row_format_after = self._parse_row_format(match_row=True) 359 record_reader = None 360 if self._match_text_seq("RECORDREADER"): 361 record_reader = self._parse_string() 362 363 return self.expression( 364 exp.QueryTransform, 365 expressions=args, 366 command_script=command_script, 367 schema=schema, 368 row_format_before=row_format_before, 369 record_writer=record_writer, 370 row_format_after=row_format_after, 371 record_reader=record_reader, 372 ) 373 374 def _parse_types( 375 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 376 ) -> t.Optional[exp.Expression]: 377 """ 378 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 379 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 380 381 spark-sql (default)> select cast(1234 as varchar(2)); 382 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 383 char/varchar type and simply treats them as string type. Please use string type 384 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 385 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 386 387 1234 388 Time taken: 4.265 seconds, Fetched 1 row(s) 389 390 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 391 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 392 393 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 394 """ 395 this = super()._parse_types( 396 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 397 ) 398 399 if this and not schema: 400 return this.transform( 401 lambda node: ( 402 node.replace(exp.DataType.build("text")) 403 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 404 else node 405 ), 406 copy=False, 407 ) 408 409 return this 410 411 def _parse_partition_and_order( 412 self, 413 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 414 return ( 415 ( 416 self._parse_csv(self._parse_conjunction) 417 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 418 else [] 419 ), 420 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 421 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANONYMOUS_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnonymousAggFunc'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'ARRAY_UNIQUE_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'COMBINED_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedAggFunc'>>, 'COMBINED_PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedParameterizedAgg'>>, 'CONCAT': <function Parser.<lambda>>, 'CONCAT_WS': <function Parser.<lambda>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'COUNTIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATE_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GET_PATH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GetPath'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_INF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'ISINF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_OBJECT_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObjectAgg'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <function parse_logarithm>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NULLIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nullif'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RAND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDOM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Randn'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIMEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMPFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToArray'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsDiff'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'TS_OR_DS_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToTime'>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixDate'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'CONTAINS': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INHERITS': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MODIFIES': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'READS': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'SYSTEM_VERSIONING': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
423 class Generator(generator.Generator): 424 LIMIT_FETCH = "LIMIT" 425 TABLESAMPLE_WITH_METHOD = False 426 JOIN_HINTS = False 427 TABLE_HINTS = False 428 QUERY_HINTS = False 429 INDEX_ON = "ON TABLE" 430 EXTRACT_ALLOWS_QUOTES = False 431 NVL2_SUPPORTED = False 432 LAST_DAY_SUPPORTS_DATE_PART = False 433 434 EXPRESSIONS_WITHOUT_NESTED_CTES = { 435 exp.Insert, 436 exp.Select, 437 exp.Subquery, 438 exp.Union, 439 } 440 441 TYPE_MAPPING = { 442 **generator.Generator.TYPE_MAPPING, 443 exp.DataType.Type.BIT: "BOOLEAN", 444 exp.DataType.Type.DATETIME: "TIMESTAMP", 445 exp.DataType.Type.TEXT: "STRING", 446 exp.DataType.Type.TIME: "TIMESTAMP", 447 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 448 exp.DataType.Type.VARBINARY: "BINARY", 449 } 450 451 TRANSFORMS = { 452 **generator.Generator.TRANSFORMS, 453 exp.Group: transforms.preprocess([transforms.unalias_group]), 454 exp.Select: transforms.preprocess( 455 [ 456 transforms.eliminate_qualify, 457 transforms.eliminate_distinct_on, 458 transforms.unnest_to_explode, 459 ] 460 ), 461 exp.Property: _property_sql, 462 exp.AnyValue: rename_func("FIRST"), 463 exp.ApproxDistinct: approx_count_distinct_sql, 464 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 465 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 466 exp.ArrayConcat: rename_func("CONCAT"), 467 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 468 exp.ArraySize: rename_func("SIZE"), 469 exp.ArraySort: _array_sort_sql, 470 exp.With: no_recursive_cte_sql, 471 exp.DateAdd: _add_date_sql, 472 exp.DateDiff: _date_diff_sql, 473 exp.DateStrToDate: datestrtodate_sql, 474 exp.DateSub: _add_date_sql, 475 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 476 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 477 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 478 exp.FromBase64: rename_func("UNBASE64"), 479 exp.If: if_sql(), 480 exp.ILike: no_ilike_sql, 481 exp.IsNan: rename_func("ISNAN"), 482 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 483 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 484 exp.JSONFormat: _json_format_sql, 485 exp.Left: left_to_substring_sql, 486 exp.Map: var_map_sql, 487 exp.Max: max_or_greatest, 488 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 489 exp.Min: min_or_least, 490 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 491 exp.NotNullColumnConstraint: lambda self, e: ( 492 "" if e.args.get("allow_null") else "NOT NULL" 493 ), 494 exp.VarMap: var_map_sql, 495 exp.Create: _create_sql, 496 exp.Quantile: rename_func("PERCENTILE"), 497 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 498 exp.RegexpExtract: regexp_extract_sql, 499 exp.RegexpReplace: regexp_replace_sql, 500 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 501 exp.RegexpSplit: rename_func("SPLIT"), 502 exp.Right: right_to_substring_sql, 503 exp.SafeDivide: no_safe_divide_sql, 504 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 505 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 506 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 507 exp.StrPosition: strposition_to_locate_sql, 508 exp.StrToDate: _str_to_date_sql, 509 exp.StrToTime: _str_to_time_sql, 510 exp.StrToUnix: _str_to_unix_sql, 511 exp.StructExtract: struct_extract_sql, 512 exp.TimeStrToDate: rename_func("TO_DATE"), 513 exp.TimeStrToTime: timestrtotime_sql, 514 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 515 exp.TimeToStr: _time_to_str, 516 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 517 exp.ToBase64: rename_func("BASE64"), 518 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 519 exp.TsOrDsAdd: _add_date_sql, 520 exp.TsOrDsDiff: _date_diff_sql, 521 exp.TsOrDsToDate: _to_date_sql, 522 exp.TryCast: no_trycast_sql, 523 exp.UnixToStr: lambda self, e: self.func( 524 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 525 ), 526 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 527 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 528 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 529 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 530 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 531 exp.National: lambda self, e: self.national_sql(e, prefix=""), 532 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 533 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 534 exp.NotForReplicationColumnConstraint: lambda self, e: "", 535 exp.OnProperty: lambda self, e: "", 536 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 537 } 538 539 PROPERTIES_LOCATION = { 540 **generator.Generator.PROPERTIES_LOCATION, 541 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 542 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 543 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 544 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 545 } 546 547 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 548 # Hive has no temporary storage provider (there are hive settings though) 549 return expression 550 551 def parameter_sql(self, expression: exp.Parameter) -> str: 552 this = self.sql(expression, "this") 553 expression_sql = self.sql(expression, "expression") 554 555 parent = expression.parent 556 this = f"{this}:{expression_sql}" if expression_sql else this 557 558 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 559 # We need to produce SET key = value instead of SET ${key} = value 560 return this 561 562 return f"${{{this}}}" 563 564 def schema_sql(self, expression: exp.Schema) -> str: 565 for ordered in expression.find_all(exp.Ordered): 566 if ordered.args.get("desc") is False: 567 ordered.set("desc", None) 568 569 return super().schema_sql(expression) 570 571 def constraint_sql(self, expression: exp.Constraint) -> str: 572 for prop in list(expression.find_all(exp.Properties)): 573 prop.pop() 574 575 this = self.sql(expression, "this") 576 expressions = self.expressions(expression, sep=" ", flat=True) 577 return f"CONSTRAINT {this} {expressions}" 578 579 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 580 serde_props = self.sql(expression, "serde_properties") 581 serde_props = f" {serde_props}" if serde_props else "" 582 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 583 584 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 585 return self.func( 586 "COLLECT_LIST", 587 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 588 ) 589 590 def with_properties(self, properties: exp.Properties) -> str: 591 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 592 593 def datatype_sql(self, expression: exp.DataType) -> str: 594 if ( 595 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 596 and not expression.expressions 597 ): 598 expression = exp.DataType.build("text") 599 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 600 expression.set("this", exp.DataType.Type.VARCHAR) 601 elif expression.this in exp.DataType.TEMPORAL_TYPES: 602 expression = exp.DataType.build(expression.this) 603 elif expression.is_type("float"): 604 size_expression = expression.find(exp.DataTypeParam) 605 if size_expression: 606 size = int(size_expression.name) 607 expression = ( 608 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 609 ) 610 611 return super().datatype_sql(expression) 612 613 def version_sql(self, expression: exp.Version) -> str: 614 sql = super().version_sql(expression) 615 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
EXPRESSIONS_WITHOUT_NESTED_CTES =
{<class 'sqlglot.expressions.Insert'>, <class 'sqlglot.expressions.Select'>, <class 'sqlglot.expressions.Union'>, <class 'sqlglot.expressions.Subquery'>}
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InheritsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetConfigProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function datestrtodate_sql>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function _create_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArrayUniqueAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InheritsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedOfProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SetConfigProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.WithSystemVersioningProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>}
def
temporary_storage_provider( self, expression: sqlglot.expressions.Create) -> sqlglot.expressions.Create:
551 def parameter_sql(self, expression: exp.Parameter) -> str: 552 this = self.sql(expression, "this") 553 expression_sql = self.sql(expression, "expression") 554 555 parent = expression.parent 556 this = f"{this}:{expression_sql}" if expression_sql else this 557 558 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 559 # We need to produce SET key = value instead of SET ${key} = value 560 return this 561 562 return f"${{{this}}}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
593 def datatype_sql(self, expression: exp.DataType) -> str: 594 if ( 595 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 596 and not expression.expressions 597 ): 598 expression = exp.DataType.build("text") 599 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 600 expression.set("this", exp.DataType.Type.VARCHAR) 601 elif expression.this in exp.DataType.TEMPORAL_TYPES: 602 expression = exp.DataType.build(expression.this) 603 elif expression.is_type("float"): 604 size_expression = expression.find(exp.DataTypeParam) 605 if size_expression: 606 size = int(size_expression.name) 607 expression = ( 608 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 609 ) 610 611 return super().datatype_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql