sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 DATE_ADD_OR_SUB, 8 Dialect, 9 approx_count_distinct_sql, 10 arg_max_or_min_no_count, 11 create_with_partitions_sql, 12 datestrtodate_sql, 13 format_time_lambda, 14 if_sql, 15 is_parse_json, 16 left_to_substring_sql, 17 locate_to_strposition, 18 max_or_greatest, 19 min_or_least, 20 no_ilike_sql, 21 no_recursive_cte_sql, 22 no_safe_divide_sql, 23 no_trycast_sql, 24 regexp_extract_sql, 25 regexp_replace_sql, 26 rename_func, 27 right_to_substring_sql, 28 strposition_to_locate_sql, 29 struct_extract_sql, 30 time_format, 31 timestrtotime_sql, 32 var_map_sql, 33) 34from sqlglot.helper import seq_get 35from sqlglot.parser import parse_var_map 36from sqlglot.tokens import TokenType 37 38# (FuncType, Multiplier) 39DATE_DELTA_INTERVAL = { 40 "YEAR": ("ADD_MONTHS", 12), 41 "MONTH": ("ADD_MONTHS", 1), 42 "QUARTER": ("ADD_MONTHS", 3), 43 "WEEK": ("DATE_ADD", 7), 44 "DAY": ("DATE_ADD", 1), 45} 46 47TIME_DIFF_FACTOR = { 48 "MILLISECOND": " * 1000", 49 "SECOND": "", 50 "MINUTE": " / 60", 51 "HOUR": " / 3600", 52} 53 54DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 55 56 57def _create_sql(self, expression: exp.Create) -> str: 58 # remove UNIQUE column constraints 59 for constraint in expression.find_all(exp.UniqueColumnConstraint): 60 if constraint.parent: 61 constraint.parent.pop() 62 63 properties = expression.args.get("properties") 64 temporary = any( 65 isinstance(prop, exp.TemporaryProperty) 66 for prop in (properties.expressions if properties else []) 67 ) 68 69 # CTAS with temp tables map to CREATE TEMPORARY VIEW 70 kind = expression.args["kind"] 71 if kind.upper() == "TABLE" and temporary: 72 if expression.expression: 73 return f"CREATE TEMPORARY VIEW {self.sql(expression, 'this')} AS {self.sql(expression, 'expression')}" 74 else: 75 # CREATE TEMPORARY TABLE may require storage provider 76 expression = self.temporary_storage_provider(expression) 77 78 return create_with_partitions_sql(self, expression) 79 80 81def _add_date_sql(self: Hive.Generator, expression: DATE_ADD_OR_SUB) -> str: 82 if isinstance(expression, exp.TsOrDsAdd) and not expression.unit: 83 return self.func("DATE_ADD", expression.this, expression.expression) 84 85 unit = expression.text("unit").upper() 86 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 87 88 if isinstance(expression, exp.DateSub): 89 multiplier *= -1 90 91 if expression.expression.is_number: 92 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 93 else: 94 modified_increment = expression.expression 95 if multiplier != 1: 96 modified_increment = exp.Mul( # type: ignore 97 this=modified_increment, expression=exp.Literal.number(multiplier) 98 ) 99 100 return self.func(func, expression.this, modified_increment) 101 102 103def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff | exp.TsOrDsDiff) -> str: 104 unit = expression.text("unit").upper() 105 106 factor = TIME_DIFF_FACTOR.get(unit) 107 if factor is not None: 108 left = self.sql(expression, "this") 109 right = self.sql(expression, "expression") 110 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 111 return f"({sec_diff}){factor}" if factor else sec_diff 112 113 months_between = unit in DIFF_MONTH_SWITCH 114 sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF" 115 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 116 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 117 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 118 119 if months_between or multiplier_sql: 120 # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part. 121 # For the same reason, we want to truncate if there's a divisor present. 122 diff_sql = f"CAST({diff_sql}{multiplier_sql} AS INT)" 123 124 return diff_sql 125 126 127def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 128 this = expression.this 129 130 if is_parse_json(this): 131 if this.this.is_string: 132 # Since FROM_JSON requires a nested type, we always wrap the json string with 133 # an array to ensure that "naked" strings like "'a'" will be handled correctly 134 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 135 136 from_json = self.func( 137 "FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json) 138 ) 139 to_json = self.func("TO_JSON", from_json) 140 141 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 142 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 143 return self.sql(this) 144 145 return self.func("TO_JSON", this, expression.args.get("options")) 146 147 148def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 149 if expression.expression: 150 self.unsupported("Hive SORT_ARRAY does not support a comparator") 151 return f"SORT_ARRAY({self.sql(expression, 'this')})" 152 153 154def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 155 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 156 157 158def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 159 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 160 161 162def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 163 this = self.sql(expression, "this") 164 time_format = self.format_time(expression) 165 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 166 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 167 return f"CAST({this} AS DATE)" 168 169 170def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 171 this = self.sql(expression, "this") 172 time_format = self.format_time(expression) 173 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 174 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 175 return f"CAST({this} AS TIMESTAMP)" 176 177 178def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 179 this = self.sql(expression, "this") 180 time_format = self.format_time(expression) 181 return f"DATE_FORMAT({this}, {time_format})" 182 183 184def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 185 this = self.sql(expression, "this") 186 time_format = self.format_time(expression) 187 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 188 return f"TO_DATE({this}, {time_format})" 189 return f"TO_DATE({this})" 190 191 192class Hive(Dialect): 193 ALIAS_POST_TABLESAMPLE = True 194 IDENTIFIERS_CAN_START_WITH_DIGIT = True 195 SUPPORTS_USER_DEFINED_TYPES = False 196 SAFE_DIVISION = True 197 198 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 199 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 200 201 TIME_MAPPING = { 202 "y": "%Y", 203 "Y": "%Y", 204 "YYYY": "%Y", 205 "yyyy": "%Y", 206 "YY": "%y", 207 "yy": "%y", 208 "MMMM": "%B", 209 "MMM": "%b", 210 "MM": "%m", 211 "M": "%-m", 212 "dd": "%d", 213 "d": "%-d", 214 "HH": "%H", 215 "H": "%-H", 216 "hh": "%I", 217 "h": "%-I", 218 "mm": "%M", 219 "m": "%-M", 220 "ss": "%S", 221 "s": "%-S", 222 "SSSSSS": "%f", 223 "a": "%p", 224 "DD": "%j", 225 "D": "%-j", 226 "E": "%a", 227 "EE": "%a", 228 "EEE": "%a", 229 "EEEE": "%A", 230 } 231 232 DATE_FORMAT = "'yyyy-MM-dd'" 233 DATEINT_FORMAT = "'yyyyMMdd'" 234 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 235 236 class Tokenizer(tokens.Tokenizer): 237 QUOTES = ["'", '"'] 238 IDENTIFIERS = ["`"] 239 STRING_ESCAPES = ["\\"] 240 ENCODE = "utf-8" 241 242 SINGLE_TOKENS = { 243 **tokens.Tokenizer.SINGLE_TOKENS, 244 "$": TokenType.PARAMETER, 245 } 246 247 KEYWORDS = { 248 **tokens.Tokenizer.KEYWORDS, 249 "ADD ARCHIVE": TokenType.COMMAND, 250 "ADD ARCHIVES": TokenType.COMMAND, 251 "ADD FILE": TokenType.COMMAND, 252 "ADD FILES": TokenType.COMMAND, 253 "ADD JAR": TokenType.COMMAND, 254 "ADD JARS": TokenType.COMMAND, 255 "MSCK REPAIR": TokenType.COMMAND, 256 "REFRESH": TokenType.REFRESH, 257 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 258 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 259 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 260 } 261 262 NUMERIC_LITERALS = { 263 "L": "BIGINT", 264 "S": "SMALLINT", 265 "Y": "TINYINT", 266 "D": "DOUBLE", 267 "F": "FLOAT", 268 "BD": "DECIMAL", 269 } 270 271 class Parser(parser.Parser): 272 LOG_DEFAULTS_TO_LN = True 273 STRICT_CAST = False 274 275 FUNCTIONS = { 276 **parser.Parser.FUNCTIONS, 277 "BASE64": exp.ToBase64.from_arg_list, 278 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 279 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 280 "DATE_ADD": lambda args: exp.TsOrDsAdd( 281 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 282 ), 283 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 284 [ 285 exp.TimeStrToTime(this=seq_get(args, 0)), 286 seq_get(args, 1), 287 ] 288 ), 289 "DATE_SUB": lambda args: exp.TsOrDsAdd( 290 this=seq_get(args, 0), 291 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 292 unit=exp.Literal.string("DAY"), 293 ), 294 "DATEDIFF": lambda args: exp.DateDiff( 295 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 296 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 297 ), 298 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 299 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 300 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 301 "LOCATE": locate_to_strposition, 302 "MAP": parse_var_map, 303 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 304 "PERCENTILE": exp.Quantile.from_arg_list, 305 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 306 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 307 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 308 ), 309 "SIZE": exp.ArraySize.from_arg_list, 310 "SPLIT": exp.RegexpSplit.from_arg_list, 311 "STR_TO_MAP": lambda args: exp.StrToMap( 312 this=seq_get(args, 0), 313 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 314 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 315 ), 316 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 317 "TO_JSON": exp.JSONFormat.from_arg_list, 318 "UNBASE64": exp.FromBase64.from_arg_list, 319 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 320 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 321 } 322 323 NO_PAREN_FUNCTION_PARSERS = { 324 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 325 "TRANSFORM": lambda self: self._parse_transform(), 326 } 327 328 PROPERTY_PARSERS = { 329 **parser.Parser.PROPERTY_PARSERS, 330 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 331 expressions=self._parse_wrapped_csv(self._parse_property) 332 ), 333 } 334 335 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 336 if not self._match(TokenType.L_PAREN, advance=False): 337 self._retreat(self._index - 1) 338 return None 339 340 args = self._parse_wrapped_csv(self._parse_lambda) 341 row_format_before = self._parse_row_format(match_row=True) 342 343 record_writer = None 344 if self._match_text_seq("RECORDWRITER"): 345 record_writer = self._parse_string() 346 347 if not self._match(TokenType.USING): 348 return exp.Transform.from_arg_list(args) 349 350 command_script = self._parse_string() 351 352 self._match(TokenType.ALIAS) 353 schema = self._parse_schema() 354 355 row_format_after = self._parse_row_format(match_row=True) 356 record_reader = None 357 if self._match_text_seq("RECORDREADER"): 358 record_reader = self._parse_string() 359 360 return self.expression( 361 exp.QueryTransform, 362 expressions=args, 363 command_script=command_script, 364 schema=schema, 365 row_format_before=row_format_before, 366 record_writer=record_writer, 367 row_format_after=row_format_after, 368 record_reader=record_reader, 369 ) 370 371 def _parse_types( 372 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 373 ) -> t.Optional[exp.Expression]: 374 """ 375 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 376 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 377 378 spark-sql (default)> select cast(1234 as varchar(2)); 379 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 380 char/varchar type and simply treats them as string type. Please use string type 381 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 382 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 383 384 1234 385 Time taken: 4.265 seconds, Fetched 1 row(s) 386 387 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 388 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 389 390 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 391 """ 392 this = super()._parse_types( 393 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 394 ) 395 396 if this and not schema: 397 return this.transform( 398 lambda node: node.replace(exp.DataType.build("text")) 399 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 400 else node, 401 copy=False, 402 ) 403 404 return this 405 406 def _parse_partition_and_order( 407 self, 408 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 409 return ( 410 self._parse_csv(self._parse_conjunction) 411 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 412 else [], 413 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 414 ) 415 416 class Generator(generator.Generator): 417 LIMIT_FETCH = "LIMIT" 418 TABLESAMPLE_WITH_METHOD = False 419 TABLESAMPLE_SIZE_IS_PERCENT = True 420 JOIN_HINTS = False 421 TABLE_HINTS = False 422 QUERY_HINTS = False 423 INDEX_ON = "ON TABLE" 424 EXTRACT_ALLOWS_QUOTES = False 425 NVL2_SUPPORTED = False 426 427 EXPRESSIONS_WITHOUT_NESTED_CTES = { 428 exp.Insert, 429 exp.Select, 430 exp.Subquery, 431 exp.Union, 432 } 433 434 TYPE_MAPPING = { 435 **generator.Generator.TYPE_MAPPING, 436 exp.DataType.Type.BIT: "BOOLEAN", 437 exp.DataType.Type.DATETIME: "TIMESTAMP", 438 exp.DataType.Type.TEXT: "STRING", 439 exp.DataType.Type.TIME: "TIMESTAMP", 440 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 441 exp.DataType.Type.VARBINARY: "BINARY", 442 } 443 444 TRANSFORMS = { 445 **generator.Generator.TRANSFORMS, 446 exp.Group: transforms.preprocess([transforms.unalias_group]), 447 exp.Select: transforms.preprocess( 448 [ 449 transforms.eliminate_qualify, 450 transforms.eliminate_distinct_on, 451 transforms.unnest_to_explode, 452 ] 453 ), 454 exp.Property: _property_sql, 455 exp.AnyValue: rename_func("FIRST"), 456 exp.ApproxDistinct: approx_count_distinct_sql, 457 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 458 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 459 exp.ArrayConcat: rename_func("CONCAT"), 460 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 461 exp.ArraySize: rename_func("SIZE"), 462 exp.ArraySort: _array_sort_sql, 463 exp.With: no_recursive_cte_sql, 464 exp.DateAdd: _add_date_sql, 465 exp.DateDiff: _date_diff_sql, 466 exp.DateStrToDate: datestrtodate_sql, 467 exp.DateSub: _add_date_sql, 468 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 469 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 470 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 471 exp.FromBase64: rename_func("UNBASE64"), 472 exp.If: if_sql(), 473 exp.ILike: no_ilike_sql, 474 exp.IsNan: rename_func("ISNAN"), 475 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 476 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 477 exp.JSONFormat: _json_format_sql, 478 exp.Left: left_to_substring_sql, 479 exp.Map: var_map_sql, 480 exp.Max: max_or_greatest, 481 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 482 exp.Min: min_or_least, 483 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 484 exp.NotNullColumnConstraint: lambda self, e: "" 485 if e.args.get("allow_null") 486 else "NOT NULL", 487 exp.VarMap: var_map_sql, 488 exp.Create: _create_sql, 489 exp.Quantile: rename_func("PERCENTILE"), 490 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 491 exp.RegexpExtract: regexp_extract_sql, 492 exp.RegexpReplace: regexp_replace_sql, 493 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 494 exp.RegexpSplit: rename_func("SPLIT"), 495 exp.Right: right_to_substring_sql, 496 exp.SafeDivide: no_safe_divide_sql, 497 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 498 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 499 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 500 exp.StrPosition: strposition_to_locate_sql, 501 exp.StrToDate: _str_to_date_sql, 502 exp.StrToTime: _str_to_time_sql, 503 exp.StrToUnix: _str_to_unix_sql, 504 exp.StructExtract: struct_extract_sql, 505 exp.TimeStrToDate: rename_func("TO_DATE"), 506 exp.TimeStrToTime: timestrtotime_sql, 507 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 508 exp.TimeToStr: _time_to_str, 509 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 510 exp.ToBase64: rename_func("BASE64"), 511 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 512 exp.TsOrDsAdd: _add_date_sql, 513 exp.TsOrDsDiff: _date_diff_sql, 514 exp.TsOrDsToDate: _to_date_sql, 515 exp.TryCast: no_trycast_sql, 516 exp.UnixToStr: lambda self, e: self.func( 517 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 518 ), 519 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 520 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 521 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 522 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 523 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 524 exp.LastDateOfMonth: rename_func("LAST_DAY"), 525 exp.National: lambda self, e: self.national_sql(e, prefix=""), 526 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 527 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 528 exp.NotForReplicationColumnConstraint: lambda self, e: "", 529 exp.OnProperty: lambda self, e: "", 530 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 531 } 532 533 PROPERTIES_LOCATION = { 534 **generator.Generator.PROPERTIES_LOCATION, 535 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 536 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 537 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 538 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 539 } 540 541 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 542 # Hive has no temporary storage provider (there are hive settings though) 543 return expression 544 545 def parameter_sql(self, expression: exp.Parameter) -> str: 546 this = self.sql(expression, "this") 547 expression_sql = self.sql(expression, "expression") 548 549 parent = expression.parent 550 this = f"{this}:{expression_sql}" if expression_sql else this 551 552 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 553 # We need to produce SET key = value instead of SET ${key} = value 554 return this 555 556 return f"${{{this}}}" 557 558 def schema_sql(self, expression: exp.Schema) -> str: 559 for ordered in expression.find_all(exp.Ordered): 560 if ordered.args.get("desc") is False: 561 ordered.set("desc", None) 562 563 return super().schema_sql(expression) 564 565 def constraint_sql(self, expression: exp.Constraint) -> str: 566 for prop in list(expression.find_all(exp.Properties)): 567 prop.pop() 568 569 this = self.sql(expression, "this") 570 expressions = self.expressions(expression, sep=" ", flat=True) 571 return f"CONSTRAINT {this} {expressions}" 572 573 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 574 serde_props = self.sql(expression, "serde_properties") 575 serde_props = f" {serde_props}" if serde_props else "" 576 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 577 578 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 579 return self.func( 580 "COLLECT_LIST", 581 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 582 ) 583 584 def with_properties(self, properties: exp.Properties) -> str: 585 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 586 587 def datatype_sql(self, expression: exp.DataType) -> str: 588 if ( 589 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 590 and not expression.expressions 591 ): 592 expression = exp.DataType.build("text") 593 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 594 expression.set("this", exp.DataType.Type.VARCHAR) 595 elif expression.this in exp.DataType.TEMPORAL_TYPES: 596 expression = exp.DataType.build(expression.this) 597 elif expression.is_type("float"): 598 size_expression = expression.find(exp.DataTypeParam) 599 if size_expression: 600 size = int(size_expression.name) 601 expression = ( 602 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 603 ) 604 605 return super().datatype_sql(expression) 606 607 def version_sql(self, expression: exp.Version) -> str: 608 sql = super().version_sql(expression) 609 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
193class Hive(Dialect): 194 ALIAS_POST_TABLESAMPLE = True 195 IDENTIFIERS_CAN_START_WITH_DIGIT = True 196 SUPPORTS_USER_DEFINED_TYPES = False 197 SAFE_DIVISION = True 198 199 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 200 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 201 202 TIME_MAPPING = { 203 "y": "%Y", 204 "Y": "%Y", 205 "YYYY": "%Y", 206 "yyyy": "%Y", 207 "YY": "%y", 208 "yy": "%y", 209 "MMMM": "%B", 210 "MMM": "%b", 211 "MM": "%m", 212 "M": "%-m", 213 "dd": "%d", 214 "d": "%-d", 215 "HH": "%H", 216 "H": "%-H", 217 "hh": "%I", 218 "h": "%-I", 219 "mm": "%M", 220 "m": "%-M", 221 "ss": "%S", 222 "s": "%-S", 223 "SSSSSS": "%f", 224 "a": "%p", 225 "DD": "%j", 226 "D": "%-j", 227 "E": "%a", 228 "EE": "%a", 229 "EEE": "%a", 230 "EEEE": "%A", 231 } 232 233 DATE_FORMAT = "'yyyy-MM-dd'" 234 DATEINT_FORMAT = "'yyyyMMdd'" 235 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 236 237 class Tokenizer(tokens.Tokenizer): 238 QUOTES = ["'", '"'] 239 IDENTIFIERS = ["`"] 240 STRING_ESCAPES = ["\\"] 241 ENCODE = "utf-8" 242 243 SINGLE_TOKENS = { 244 **tokens.Tokenizer.SINGLE_TOKENS, 245 "$": TokenType.PARAMETER, 246 } 247 248 KEYWORDS = { 249 **tokens.Tokenizer.KEYWORDS, 250 "ADD ARCHIVE": TokenType.COMMAND, 251 "ADD ARCHIVES": TokenType.COMMAND, 252 "ADD FILE": TokenType.COMMAND, 253 "ADD FILES": TokenType.COMMAND, 254 "ADD JAR": TokenType.COMMAND, 255 "ADD JARS": TokenType.COMMAND, 256 "MSCK REPAIR": TokenType.COMMAND, 257 "REFRESH": TokenType.REFRESH, 258 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 259 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 260 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 261 } 262 263 NUMERIC_LITERALS = { 264 "L": "BIGINT", 265 "S": "SMALLINT", 266 "Y": "TINYINT", 267 "D": "DOUBLE", 268 "F": "FLOAT", 269 "BD": "DECIMAL", 270 } 271 272 class Parser(parser.Parser): 273 LOG_DEFAULTS_TO_LN = True 274 STRICT_CAST = False 275 276 FUNCTIONS = { 277 **parser.Parser.FUNCTIONS, 278 "BASE64": exp.ToBase64.from_arg_list, 279 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 280 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 281 "DATE_ADD": lambda args: exp.TsOrDsAdd( 282 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 283 ), 284 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 285 [ 286 exp.TimeStrToTime(this=seq_get(args, 0)), 287 seq_get(args, 1), 288 ] 289 ), 290 "DATE_SUB": lambda args: exp.TsOrDsAdd( 291 this=seq_get(args, 0), 292 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 293 unit=exp.Literal.string("DAY"), 294 ), 295 "DATEDIFF": lambda args: exp.DateDiff( 296 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 297 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 298 ), 299 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 300 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 301 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 302 "LOCATE": locate_to_strposition, 303 "MAP": parse_var_map, 304 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 305 "PERCENTILE": exp.Quantile.from_arg_list, 306 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 307 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 308 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 309 ), 310 "SIZE": exp.ArraySize.from_arg_list, 311 "SPLIT": exp.RegexpSplit.from_arg_list, 312 "STR_TO_MAP": lambda args: exp.StrToMap( 313 this=seq_get(args, 0), 314 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 315 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 316 ), 317 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 318 "TO_JSON": exp.JSONFormat.from_arg_list, 319 "UNBASE64": exp.FromBase64.from_arg_list, 320 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 321 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 322 } 323 324 NO_PAREN_FUNCTION_PARSERS = { 325 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 326 "TRANSFORM": lambda self: self._parse_transform(), 327 } 328 329 PROPERTY_PARSERS = { 330 **parser.Parser.PROPERTY_PARSERS, 331 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 332 expressions=self._parse_wrapped_csv(self._parse_property) 333 ), 334 } 335 336 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 337 if not self._match(TokenType.L_PAREN, advance=False): 338 self._retreat(self._index - 1) 339 return None 340 341 args = self._parse_wrapped_csv(self._parse_lambda) 342 row_format_before = self._parse_row_format(match_row=True) 343 344 record_writer = None 345 if self._match_text_seq("RECORDWRITER"): 346 record_writer = self._parse_string() 347 348 if not self._match(TokenType.USING): 349 return exp.Transform.from_arg_list(args) 350 351 command_script = self._parse_string() 352 353 self._match(TokenType.ALIAS) 354 schema = self._parse_schema() 355 356 row_format_after = self._parse_row_format(match_row=True) 357 record_reader = None 358 if self._match_text_seq("RECORDREADER"): 359 record_reader = self._parse_string() 360 361 return self.expression( 362 exp.QueryTransform, 363 expressions=args, 364 command_script=command_script, 365 schema=schema, 366 row_format_before=row_format_before, 367 record_writer=record_writer, 368 row_format_after=row_format_after, 369 record_reader=record_reader, 370 ) 371 372 def _parse_types( 373 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 374 ) -> t.Optional[exp.Expression]: 375 """ 376 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 377 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 378 379 spark-sql (default)> select cast(1234 as varchar(2)); 380 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 381 char/varchar type and simply treats them as string type. Please use string type 382 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 383 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 384 385 1234 386 Time taken: 4.265 seconds, Fetched 1 row(s) 387 388 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 389 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 390 391 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 392 """ 393 this = super()._parse_types( 394 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 395 ) 396 397 if this and not schema: 398 return this.transform( 399 lambda node: node.replace(exp.DataType.build("text")) 400 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 401 else node, 402 copy=False, 403 ) 404 405 return this 406 407 def _parse_partition_and_order( 408 self, 409 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 410 return ( 411 self._parse_csv(self._parse_conjunction) 412 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 413 else [], 414 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 415 ) 416 417 class Generator(generator.Generator): 418 LIMIT_FETCH = "LIMIT" 419 TABLESAMPLE_WITH_METHOD = False 420 TABLESAMPLE_SIZE_IS_PERCENT = True 421 JOIN_HINTS = False 422 TABLE_HINTS = False 423 QUERY_HINTS = False 424 INDEX_ON = "ON TABLE" 425 EXTRACT_ALLOWS_QUOTES = False 426 NVL2_SUPPORTED = False 427 428 EXPRESSIONS_WITHOUT_NESTED_CTES = { 429 exp.Insert, 430 exp.Select, 431 exp.Subquery, 432 exp.Union, 433 } 434 435 TYPE_MAPPING = { 436 **generator.Generator.TYPE_MAPPING, 437 exp.DataType.Type.BIT: "BOOLEAN", 438 exp.DataType.Type.DATETIME: "TIMESTAMP", 439 exp.DataType.Type.TEXT: "STRING", 440 exp.DataType.Type.TIME: "TIMESTAMP", 441 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 442 exp.DataType.Type.VARBINARY: "BINARY", 443 } 444 445 TRANSFORMS = { 446 **generator.Generator.TRANSFORMS, 447 exp.Group: transforms.preprocess([transforms.unalias_group]), 448 exp.Select: transforms.preprocess( 449 [ 450 transforms.eliminate_qualify, 451 transforms.eliminate_distinct_on, 452 transforms.unnest_to_explode, 453 ] 454 ), 455 exp.Property: _property_sql, 456 exp.AnyValue: rename_func("FIRST"), 457 exp.ApproxDistinct: approx_count_distinct_sql, 458 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 459 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 460 exp.ArrayConcat: rename_func("CONCAT"), 461 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 462 exp.ArraySize: rename_func("SIZE"), 463 exp.ArraySort: _array_sort_sql, 464 exp.With: no_recursive_cte_sql, 465 exp.DateAdd: _add_date_sql, 466 exp.DateDiff: _date_diff_sql, 467 exp.DateStrToDate: datestrtodate_sql, 468 exp.DateSub: _add_date_sql, 469 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 470 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 471 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 472 exp.FromBase64: rename_func("UNBASE64"), 473 exp.If: if_sql(), 474 exp.ILike: no_ilike_sql, 475 exp.IsNan: rename_func("ISNAN"), 476 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 477 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 478 exp.JSONFormat: _json_format_sql, 479 exp.Left: left_to_substring_sql, 480 exp.Map: var_map_sql, 481 exp.Max: max_or_greatest, 482 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 483 exp.Min: min_or_least, 484 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 485 exp.NotNullColumnConstraint: lambda self, e: "" 486 if e.args.get("allow_null") 487 else "NOT NULL", 488 exp.VarMap: var_map_sql, 489 exp.Create: _create_sql, 490 exp.Quantile: rename_func("PERCENTILE"), 491 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 492 exp.RegexpExtract: regexp_extract_sql, 493 exp.RegexpReplace: regexp_replace_sql, 494 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 495 exp.RegexpSplit: rename_func("SPLIT"), 496 exp.Right: right_to_substring_sql, 497 exp.SafeDivide: no_safe_divide_sql, 498 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 499 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 500 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 501 exp.StrPosition: strposition_to_locate_sql, 502 exp.StrToDate: _str_to_date_sql, 503 exp.StrToTime: _str_to_time_sql, 504 exp.StrToUnix: _str_to_unix_sql, 505 exp.StructExtract: struct_extract_sql, 506 exp.TimeStrToDate: rename_func("TO_DATE"), 507 exp.TimeStrToTime: timestrtotime_sql, 508 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 509 exp.TimeToStr: _time_to_str, 510 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 511 exp.ToBase64: rename_func("BASE64"), 512 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 513 exp.TsOrDsAdd: _add_date_sql, 514 exp.TsOrDsDiff: _date_diff_sql, 515 exp.TsOrDsToDate: _to_date_sql, 516 exp.TryCast: no_trycast_sql, 517 exp.UnixToStr: lambda self, e: self.func( 518 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 519 ), 520 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 521 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 522 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 523 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 524 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 525 exp.LastDateOfMonth: rename_func("LAST_DAY"), 526 exp.National: lambda self, e: self.national_sql(e, prefix=""), 527 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 528 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 529 exp.NotForReplicationColumnConstraint: lambda self, e: "", 530 exp.OnProperty: lambda self, e: "", 531 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 532 } 533 534 PROPERTIES_LOCATION = { 535 **generator.Generator.PROPERTIES_LOCATION, 536 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 537 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 538 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 539 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 540 } 541 542 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 543 # Hive has no temporary storage provider (there are hive settings though) 544 return expression 545 546 def parameter_sql(self, expression: exp.Parameter) -> str: 547 this = self.sql(expression, "this") 548 expression_sql = self.sql(expression, "expression") 549 550 parent = expression.parent 551 this = f"{this}:{expression_sql}" if expression_sql else this 552 553 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 554 # We need to produce SET key = value instead of SET ${key} = value 555 return this 556 557 return f"${{{this}}}" 558 559 def schema_sql(self, expression: exp.Schema) -> str: 560 for ordered in expression.find_all(exp.Ordered): 561 if ordered.args.get("desc") is False: 562 ordered.set("desc", None) 563 564 return super().schema_sql(expression) 565 566 def constraint_sql(self, expression: exp.Constraint) -> str: 567 for prop in list(expression.find_all(exp.Properties)): 568 prop.pop() 569 570 this = self.sql(expression, "this") 571 expressions = self.expressions(expression, sep=" ", flat=True) 572 return f"CONSTRAINT {this} {expressions}" 573 574 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 575 serde_props = self.sql(expression, "serde_properties") 576 serde_props = f" {serde_props}" if serde_props else "" 577 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 578 579 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 580 return self.func( 581 "COLLECT_LIST", 582 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 583 ) 584 585 def with_properties(self, properties: exp.Properties) -> str: 586 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 587 588 def datatype_sql(self, expression: exp.DataType) -> str: 589 if ( 590 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 591 and not expression.expressions 592 ): 593 expression = exp.DataType.build("text") 594 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 595 expression.set("this", exp.DataType.Type.VARCHAR) 596 elif expression.this in exp.DataType.TEMPORAL_TYPES: 597 expression = exp.DataType.build(expression.this) 598 elif expression.is_type("float"): 599 size_expression = expression.find(exp.DataTypeParam) 600 if size_expression: 601 size = int(size_expression.name) 602 expression = ( 603 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 604 ) 605 606 return super().datatype_sql(expression) 607 608 def version_sql(self, expression: exp.Version) -> str: 609 sql = super().version_sql(expression) 610 return sql.replace("FOR ", "", 1)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- TYPED_DIVISION
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
237 class Tokenizer(tokens.Tokenizer): 238 QUOTES = ["'", '"'] 239 IDENTIFIERS = ["`"] 240 STRING_ESCAPES = ["\\"] 241 ENCODE = "utf-8" 242 243 SINGLE_TOKENS = { 244 **tokens.Tokenizer.SINGLE_TOKENS, 245 "$": TokenType.PARAMETER, 246 } 247 248 KEYWORDS = { 249 **tokens.Tokenizer.KEYWORDS, 250 "ADD ARCHIVE": TokenType.COMMAND, 251 "ADD ARCHIVES": TokenType.COMMAND, 252 "ADD FILE": TokenType.COMMAND, 253 "ADD FILES": TokenType.COMMAND, 254 "ADD JAR": TokenType.COMMAND, 255 "ADD JARS": TokenType.COMMAND, 256 "MSCK REPAIR": TokenType.COMMAND, 257 "REFRESH": TokenType.REFRESH, 258 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 259 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 260 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 261 } 262 263 NUMERIC_LITERALS = { 264 "L": "BIGINT", 265 "S": "SMALLINT", 266 "Y": "TINYINT", 267 "D": "DOUBLE", 268 "F": "FLOAT", 269 "BD": "DECIMAL", 270 }
SINGLE_TOKENS =
{'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '#': <TokenType.HASH: 'HASH'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
272 class Parser(parser.Parser): 273 LOG_DEFAULTS_TO_LN = True 274 STRICT_CAST = False 275 276 FUNCTIONS = { 277 **parser.Parser.FUNCTIONS, 278 "BASE64": exp.ToBase64.from_arg_list, 279 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 280 "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list, 281 "DATE_ADD": lambda args: exp.TsOrDsAdd( 282 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 283 ), 284 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 285 [ 286 exp.TimeStrToTime(this=seq_get(args, 0)), 287 seq_get(args, 1), 288 ] 289 ), 290 "DATE_SUB": lambda args: exp.TsOrDsAdd( 291 this=seq_get(args, 0), 292 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 293 unit=exp.Literal.string("DAY"), 294 ), 295 "DATEDIFF": lambda args: exp.DateDiff( 296 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 297 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 298 ), 299 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 300 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 301 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 302 "LOCATE": locate_to_strposition, 303 "MAP": parse_var_map, 304 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 305 "PERCENTILE": exp.Quantile.from_arg_list, 306 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 307 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 308 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 309 ), 310 "SIZE": exp.ArraySize.from_arg_list, 311 "SPLIT": exp.RegexpSplit.from_arg_list, 312 "STR_TO_MAP": lambda args: exp.StrToMap( 313 this=seq_get(args, 0), 314 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 315 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 316 ), 317 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 318 "TO_JSON": exp.JSONFormat.from_arg_list, 319 "UNBASE64": exp.FromBase64.from_arg_list, 320 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 321 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 322 } 323 324 NO_PAREN_FUNCTION_PARSERS = { 325 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 326 "TRANSFORM": lambda self: self._parse_transform(), 327 } 328 329 PROPERTY_PARSERS = { 330 **parser.Parser.PROPERTY_PARSERS, 331 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 332 expressions=self._parse_wrapped_csv(self._parse_property) 333 ), 334 } 335 336 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 337 if not self._match(TokenType.L_PAREN, advance=False): 338 self._retreat(self._index - 1) 339 return None 340 341 args = self._parse_wrapped_csv(self._parse_lambda) 342 row_format_before = self._parse_row_format(match_row=True) 343 344 record_writer = None 345 if self._match_text_seq("RECORDWRITER"): 346 record_writer = self._parse_string() 347 348 if not self._match(TokenType.USING): 349 return exp.Transform.from_arg_list(args) 350 351 command_script = self._parse_string() 352 353 self._match(TokenType.ALIAS) 354 schema = self._parse_schema() 355 356 row_format_after = self._parse_row_format(match_row=True) 357 record_reader = None 358 if self._match_text_seq("RECORDREADER"): 359 record_reader = self._parse_string() 360 361 return self.expression( 362 exp.QueryTransform, 363 expressions=args, 364 command_script=command_script, 365 schema=schema, 366 row_format_before=row_format_before, 367 record_writer=record_writer, 368 row_format_after=row_format_after, 369 record_reader=record_reader, 370 ) 371 372 def _parse_types( 373 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 374 ) -> t.Optional[exp.Expression]: 375 """ 376 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 377 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 378 379 spark-sql (default)> select cast(1234 as varchar(2)); 380 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 381 char/varchar type and simply treats them as string type. Please use string type 382 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 383 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 384 385 1234 386 Time taken: 4.265 seconds, Fetched 1 row(s) 387 388 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 389 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 390 391 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 392 """ 393 this = super()._parse_types( 394 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 395 ) 396 397 if this and not schema: 398 return this.transform( 399 lambda node: node.replace(exp.DataType.build("text")) 400 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 401 else node, 402 copy=False, 403 ) 404 405 return this 406 407 def _parse_partition_and_order( 408 self, 409 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 410 return ( 411 self._parse_csv(self._parse_conjunction) 412 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 413 else [], 414 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 415 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'ARRAY_UNIQUE_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_INF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'ISINF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NULLIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nullif'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsDiff'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'SYSTEM_VERSIONING': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- CLONE_KINDS
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- TYPED_DIVISION
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
417 class Generator(generator.Generator): 418 LIMIT_FETCH = "LIMIT" 419 TABLESAMPLE_WITH_METHOD = False 420 TABLESAMPLE_SIZE_IS_PERCENT = True 421 JOIN_HINTS = False 422 TABLE_HINTS = False 423 QUERY_HINTS = False 424 INDEX_ON = "ON TABLE" 425 EXTRACT_ALLOWS_QUOTES = False 426 NVL2_SUPPORTED = False 427 428 EXPRESSIONS_WITHOUT_NESTED_CTES = { 429 exp.Insert, 430 exp.Select, 431 exp.Subquery, 432 exp.Union, 433 } 434 435 TYPE_MAPPING = { 436 **generator.Generator.TYPE_MAPPING, 437 exp.DataType.Type.BIT: "BOOLEAN", 438 exp.DataType.Type.DATETIME: "TIMESTAMP", 439 exp.DataType.Type.TEXT: "STRING", 440 exp.DataType.Type.TIME: "TIMESTAMP", 441 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 442 exp.DataType.Type.VARBINARY: "BINARY", 443 } 444 445 TRANSFORMS = { 446 **generator.Generator.TRANSFORMS, 447 exp.Group: transforms.preprocess([transforms.unalias_group]), 448 exp.Select: transforms.preprocess( 449 [ 450 transforms.eliminate_qualify, 451 transforms.eliminate_distinct_on, 452 transforms.unnest_to_explode, 453 ] 454 ), 455 exp.Property: _property_sql, 456 exp.AnyValue: rename_func("FIRST"), 457 exp.ApproxDistinct: approx_count_distinct_sql, 458 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 459 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 460 exp.ArrayConcat: rename_func("CONCAT"), 461 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 462 exp.ArraySize: rename_func("SIZE"), 463 exp.ArraySort: _array_sort_sql, 464 exp.With: no_recursive_cte_sql, 465 exp.DateAdd: _add_date_sql, 466 exp.DateDiff: _date_diff_sql, 467 exp.DateStrToDate: datestrtodate_sql, 468 exp.DateSub: _add_date_sql, 469 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 470 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 471 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 472 exp.FromBase64: rename_func("UNBASE64"), 473 exp.If: if_sql(), 474 exp.ILike: no_ilike_sql, 475 exp.IsNan: rename_func("ISNAN"), 476 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 477 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 478 exp.JSONFormat: _json_format_sql, 479 exp.Left: left_to_substring_sql, 480 exp.Map: var_map_sql, 481 exp.Max: max_or_greatest, 482 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 483 exp.Min: min_or_least, 484 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 485 exp.NotNullColumnConstraint: lambda self, e: "" 486 if e.args.get("allow_null") 487 else "NOT NULL", 488 exp.VarMap: var_map_sql, 489 exp.Create: _create_sql, 490 exp.Quantile: rename_func("PERCENTILE"), 491 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 492 exp.RegexpExtract: regexp_extract_sql, 493 exp.RegexpReplace: regexp_replace_sql, 494 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 495 exp.RegexpSplit: rename_func("SPLIT"), 496 exp.Right: right_to_substring_sql, 497 exp.SafeDivide: no_safe_divide_sql, 498 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 499 exp.ArrayUniqueAgg: rename_func("COLLECT_SET"), 500 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 501 exp.StrPosition: strposition_to_locate_sql, 502 exp.StrToDate: _str_to_date_sql, 503 exp.StrToTime: _str_to_time_sql, 504 exp.StrToUnix: _str_to_unix_sql, 505 exp.StructExtract: struct_extract_sql, 506 exp.TimeStrToDate: rename_func("TO_DATE"), 507 exp.TimeStrToTime: timestrtotime_sql, 508 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 509 exp.TimeToStr: _time_to_str, 510 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 511 exp.ToBase64: rename_func("BASE64"), 512 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 513 exp.TsOrDsAdd: _add_date_sql, 514 exp.TsOrDsDiff: _date_diff_sql, 515 exp.TsOrDsToDate: _to_date_sql, 516 exp.TryCast: no_trycast_sql, 517 exp.UnixToStr: lambda self, e: self.func( 518 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 519 ), 520 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 521 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 522 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 523 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 524 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 525 exp.LastDateOfMonth: rename_func("LAST_DAY"), 526 exp.National: lambda self, e: self.national_sql(e, prefix=""), 527 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 528 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 529 exp.NotForReplicationColumnConstraint: lambda self, e: "", 530 exp.OnProperty: lambda self, e: "", 531 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 532 } 533 534 PROPERTIES_LOCATION = { 535 **generator.Generator.PROPERTIES_LOCATION, 536 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 537 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 538 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 539 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 540 } 541 542 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 543 # Hive has no temporary storage provider (there are hive settings though) 544 return expression 545 546 def parameter_sql(self, expression: exp.Parameter) -> str: 547 this = self.sql(expression, "this") 548 expression_sql = self.sql(expression, "expression") 549 550 parent = expression.parent 551 this = f"{this}:{expression_sql}" if expression_sql else this 552 553 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 554 # We need to produce SET key = value instead of SET ${key} = value 555 return this 556 557 return f"${{{this}}}" 558 559 def schema_sql(self, expression: exp.Schema) -> str: 560 for ordered in expression.find_all(exp.Ordered): 561 if ordered.args.get("desc") is False: 562 ordered.set("desc", None) 563 564 return super().schema_sql(expression) 565 566 def constraint_sql(self, expression: exp.Constraint) -> str: 567 for prop in list(expression.find_all(exp.Properties)): 568 prop.pop() 569 570 this = self.sql(expression, "this") 571 expressions = self.expressions(expression, sep=" ", flat=True) 572 return f"CONSTRAINT {this} {expressions}" 573 574 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 575 serde_props = self.sql(expression, "serde_properties") 576 serde_props = f" {serde_props}" if serde_props else "" 577 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 578 579 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 580 return self.func( 581 "COLLECT_LIST", 582 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 583 ) 584 585 def with_properties(self, properties: exp.Properties) -> str: 586 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 587 588 def datatype_sql(self, expression: exp.DataType) -> str: 589 if ( 590 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 591 and not expression.expressions 592 ): 593 expression = exp.DataType.build("text") 594 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 595 expression.set("this", exp.DataType.Type.VARCHAR) 596 elif expression.this in exp.DataType.TEMPORAL_TYPES: 597 expression = exp.DataType.build(expression.this) 598 elif expression.is_type("float"): 599 size_expression = expression.find(exp.DataTypeParam) 600 if size_expression: 601 size = int(size_expression.name) 602 expression = ( 603 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 604 ) 605 606 return super().datatype_sql(expression) 607 608 def version_sql(self, expression: exp.Version) -> str: 609 sql = super().version_sql(expression) 610 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
EXPRESSIONS_WITHOUT_NESTED_CTES =
{<class 'sqlglot.expressions.Insert'>, <class 'sqlglot.expressions.Subquery'>, <class 'sqlglot.expressions.Select'>, <class 'sqlglot.expressions.Union'>}
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function datestrtodate_sql>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function _create_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArrayUniqueAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedOfProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.WithSystemVersioningProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>}
def
temporary_storage_provider( self, expression: sqlglot.expressions.Create) -> sqlglot.expressions.Create:
546 def parameter_sql(self, expression: exp.Parameter) -> str: 547 this = self.sql(expression, "this") 548 expression_sql = self.sql(expression, "expression") 549 550 parent = expression.parent 551 this = f"{this}:{expression_sql}" if expression_sql else this 552 553 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 554 # We need to produce SET key = value instead of SET ${key} = value 555 return this 556 557 return f"${{{this}}}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
588 def datatype_sql(self, expression: exp.DataType) -> str: 589 if ( 590 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 591 and not expression.expressions 592 ): 593 expression = exp.DataType.build("text") 594 elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions: 595 expression.set("this", exp.DataType.Type.VARCHAR) 596 elif expression.this in exp.DataType.TEMPORAL_TYPES: 597 expression = exp.DataType.build(expression.this) 598 elif expression.is_type("float"): 599 size_expression = expression.find(exp.DataTypeParam) 600 if size_expression: 601 size = int(size_expression.name) 602 expression = ( 603 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 604 ) 605 606 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
291 @classmethod 292 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 293 """Checks if text can be identified given an identify option. 294 295 Args: 296 text: The text to check. 297 identify: 298 "always" or `True`: Always returns true. 299 "safe": True if the identifier is case-insensitive. 300 301 Returns: 302 Whether or not the given text can be identified. 303 """ 304 if identify is True or identify == "always": 305 return True 306 307 if identify == "safe": 308 return not cls.case_sensitive(text) 309 310 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
TOKENIZER_CLASS =
<class 'Hive.Tokenizer'>
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOG_BASE_FIRST
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- SUPPORTS_PARAMETERS
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- TYPED_DIVISION
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- KEY_VALUE_DEFINITONS
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql