sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 arg_max_or_min_no_count, 10 create_with_partitions_sql, 11 format_time_lambda, 12 if_sql, 13 is_parse_json, 14 left_to_substring_sql, 15 locate_to_strposition, 16 max_or_greatest, 17 min_or_least, 18 no_ilike_sql, 19 no_recursive_cte_sql, 20 no_safe_divide_sql, 21 no_trycast_sql, 22 regexp_extract_sql, 23 regexp_replace_sql, 24 rename_func, 25 right_to_substring_sql, 26 strposition_to_locate_sql, 27 struct_extract_sql, 28 time_format, 29 timestrtotime_sql, 30 var_map_sql, 31) 32from sqlglot.helper import seq_get 33from sqlglot.parser import parse_var_map 34from sqlglot.tokens import TokenType 35 36# (FuncType, Multiplier) 37DATE_DELTA_INTERVAL = { 38 "YEAR": ("ADD_MONTHS", 12), 39 "MONTH": ("ADD_MONTHS", 1), 40 "QUARTER": ("ADD_MONTHS", 3), 41 "WEEK": ("DATE_ADD", 7), 42 "DAY": ("DATE_ADD", 1), 43} 44 45TIME_DIFF_FACTOR = { 46 "MILLISECOND": " * 1000", 47 "SECOND": "", 48 "MINUTE": " / 60", 49 "HOUR": " / 3600", 50} 51 52DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 53 54 55def _create_sql(self, expression: exp.Create) -> str: 56 # remove UNIQUE column constraints 57 for constraint in expression.find_all(exp.UniqueColumnConstraint): 58 if constraint.parent: 59 constraint.parent.pop() 60 61 properties = expression.args.get("properties") 62 temporary = any( 63 isinstance(prop, exp.TemporaryProperty) 64 for prop in (properties.expressions if properties else []) 65 ) 66 67 # CTAS with temp tables map to CREATE TEMPORARY VIEW 68 kind = expression.args["kind"] 69 if kind.upper() == "TABLE" and temporary: 70 if expression.expression: 71 return f"CREATE TEMPORARY VIEW {self.sql(expression, 'this')} AS {self.sql(expression, 'expression')}" 72 else: 73 # CREATE TEMPORARY TABLE may require storage provider 74 expression = self.temporary_storage_provider(expression) 75 76 return create_with_partitions_sql(self, expression) 77 78 79def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 80 unit = expression.text("unit").upper() 81 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 82 83 if isinstance(expression, exp.DateSub): 84 multiplier *= -1 85 86 if expression.expression.is_number: 87 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 88 else: 89 modified_increment = expression.expression 90 if multiplier != 1: 91 modified_increment = exp.Mul( # type: ignore 92 this=modified_increment, expression=exp.Literal.number(multiplier) 93 ) 94 95 return self.func(func, expression.this, modified_increment) 96 97 98def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str: 99 unit = expression.text("unit").upper() 100 101 factor = TIME_DIFF_FACTOR.get(unit) 102 if factor is not None: 103 left = self.sql(expression, "this") 104 right = self.sql(expression, "expression") 105 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 106 return f"({sec_diff}){factor}" if factor else sec_diff 107 108 months_between = unit in DIFF_MONTH_SWITCH 109 sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF" 110 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 111 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 112 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 113 114 if months_between: 115 # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part 116 diff_sql = f"CAST({diff_sql} AS INT)" 117 118 return f"{diff_sql}{multiplier_sql}" 119 120 121def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 122 this = expression.this 123 124 if is_parse_json(this): 125 if this.this.is_string: 126 # Since FROM_JSON requires a nested type, we always wrap the json string with 127 # an array to ensure that "naked" strings like "'a'" will be handled correctly 128 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 129 130 from_json = self.func( 131 "FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json) 132 ) 133 to_json = self.func("TO_JSON", from_json) 134 135 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 136 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 137 return self.sql(this) 138 139 return self.func("TO_JSON", this, expression.args.get("options")) 140 141 142def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 143 if expression.expression: 144 self.unsupported("Hive SORT_ARRAY does not support a comparator") 145 return f"SORT_ARRAY({self.sql(expression, 'this')})" 146 147 148def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 149 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 150 151 152def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 153 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 154 155 156def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 157 this = self.sql(expression, "this") 158 time_format = self.format_time(expression) 159 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 160 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 161 return f"CAST({this} AS DATE)" 162 163 164def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 165 this = self.sql(expression, "this") 166 time_format = self.format_time(expression) 167 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 168 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 169 return f"CAST({this} AS TIMESTAMP)" 170 171 172def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 173 this = self.sql(expression, "this") 174 time_format = self.format_time(expression) 175 return f"DATE_FORMAT({this}, {time_format})" 176 177 178def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 179 this = self.sql(expression, "this") 180 time_format = self.format_time(expression) 181 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 182 return f"TO_DATE({this}, {time_format})" 183 return f"TO_DATE({this})" 184 185 186class Hive(Dialect): 187 ALIAS_POST_TABLESAMPLE = True 188 IDENTIFIERS_CAN_START_WITH_DIGIT = True 189 SUPPORTS_USER_DEFINED_TYPES = False 190 SAFE_DIVISION = True 191 192 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 193 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 194 195 TIME_MAPPING = { 196 "y": "%Y", 197 "Y": "%Y", 198 "YYYY": "%Y", 199 "yyyy": "%Y", 200 "YY": "%y", 201 "yy": "%y", 202 "MMMM": "%B", 203 "MMM": "%b", 204 "MM": "%m", 205 "M": "%-m", 206 "dd": "%d", 207 "d": "%-d", 208 "HH": "%H", 209 "H": "%-H", 210 "hh": "%I", 211 "h": "%-I", 212 "mm": "%M", 213 "m": "%-M", 214 "ss": "%S", 215 "s": "%-S", 216 "SSSSSS": "%f", 217 "a": "%p", 218 "DD": "%j", 219 "D": "%-j", 220 "E": "%a", 221 "EE": "%a", 222 "EEE": "%a", 223 "EEEE": "%A", 224 } 225 226 DATE_FORMAT = "'yyyy-MM-dd'" 227 DATEINT_FORMAT = "'yyyyMMdd'" 228 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 229 230 class Tokenizer(tokens.Tokenizer): 231 QUOTES = ["'", '"'] 232 IDENTIFIERS = ["`"] 233 STRING_ESCAPES = ["\\"] 234 ENCODE = "utf-8" 235 236 SINGLE_TOKENS = { 237 **tokens.Tokenizer.SINGLE_TOKENS, 238 "$": TokenType.PARAMETER, 239 } 240 241 KEYWORDS = { 242 **tokens.Tokenizer.KEYWORDS, 243 "ADD ARCHIVE": TokenType.COMMAND, 244 "ADD ARCHIVES": TokenType.COMMAND, 245 "ADD FILE": TokenType.COMMAND, 246 "ADD FILES": TokenType.COMMAND, 247 "ADD JAR": TokenType.COMMAND, 248 "ADD JARS": TokenType.COMMAND, 249 "MSCK REPAIR": TokenType.COMMAND, 250 "REFRESH": TokenType.REFRESH, 251 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 252 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 253 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 254 } 255 256 NUMERIC_LITERALS = { 257 "L": "BIGINT", 258 "S": "SMALLINT", 259 "Y": "TINYINT", 260 "D": "DOUBLE", 261 "F": "FLOAT", 262 "BD": "DECIMAL", 263 } 264 265 class Parser(parser.Parser): 266 LOG_DEFAULTS_TO_LN = True 267 STRICT_CAST = False 268 269 FUNCTIONS = { 270 **parser.Parser.FUNCTIONS, 271 "BASE64": exp.ToBase64.from_arg_list, 272 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 273 "COLLECT_SET": exp.SetAgg.from_arg_list, 274 "DATE_ADD": lambda args: exp.TsOrDsAdd( 275 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 276 ), 277 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 278 [ 279 exp.TimeStrToTime(this=seq_get(args, 0)), 280 seq_get(args, 1), 281 ] 282 ), 283 "DATE_SUB": lambda args: exp.TsOrDsAdd( 284 this=seq_get(args, 0), 285 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 286 unit=exp.Literal.string("DAY"), 287 ), 288 "DATEDIFF": lambda args: exp.DateDiff( 289 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 290 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 291 ), 292 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 293 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 294 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 295 "LOCATE": locate_to_strposition, 296 "MAP": parse_var_map, 297 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 298 "PERCENTILE": exp.Quantile.from_arg_list, 299 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 300 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 301 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 302 ), 303 "SIZE": exp.ArraySize.from_arg_list, 304 "SPLIT": exp.RegexpSplit.from_arg_list, 305 "STR_TO_MAP": lambda args: exp.StrToMap( 306 this=seq_get(args, 0), 307 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 308 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 309 ), 310 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 311 "TO_JSON": exp.JSONFormat.from_arg_list, 312 "UNBASE64": exp.FromBase64.from_arg_list, 313 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 314 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 315 } 316 317 NO_PAREN_FUNCTION_PARSERS = { 318 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 319 "TRANSFORM": lambda self: self._parse_transform(), 320 } 321 322 PROPERTY_PARSERS = { 323 **parser.Parser.PROPERTY_PARSERS, 324 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 325 expressions=self._parse_wrapped_csv(self._parse_property) 326 ), 327 } 328 329 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 330 if not self._match(TokenType.L_PAREN, advance=False): 331 self._retreat(self._index - 1) 332 return None 333 334 args = self._parse_wrapped_csv(self._parse_lambda) 335 row_format_before = self._parse_row_format(match_row=True) 336 337 record_writer = None 338 if self._match_text_seq("RECORDWRITER"): 339 record_writer = self._parse_string() 340 341 if not self._match(TokenType.USING): 342 return exp.Transform.from_arg_list(args) 343 344 command_script = self._parse_string() 345 346 self._match(TokenType.ALIAS) 347 schema = self._parse_schema() 348 349 row_format_after = self._parse_row_format(match_row=True) 350 record_reader = None 351 if self._match_text_seq("RECORDREADER"): 352 record_reader = self._parse_string() 353 354 return self.expression( 355 exp.QueryTransform, 356 expressions=args, 357 command_script=command_script, 358 schema=schema, 359 row_format_before=row_format_before, 360 record_writer=record_writer, 361 row_format_after=row_format_after, 362 record_reader=record_reader, 363 ) 364 365 def _parse_types( 366 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 367 ) -> t.Optional[exp.Expression]: 368 """ 369 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 370 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 371 372 spark-sql (default)> select cast(1234 as varchar(2)); 373 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 374 char/varchar type and simply treats them as string type. Please use string type 375 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 376 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 377 378 1234 379 Time taken: 4.265 seconds, Fetched 1 row(s) 380 381 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 382 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 383 384 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 385 """ 386 this = super()._parse_types( 387 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 388 ) 389 390 if this and not schema: 391 return this.transform( 392 lambda node: node.replace(exp.DataType.build("text")) 393 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 394 else node, 395 copy=False, 396 ) 397 398 return this 399 400 def _parse_partition_and_order( 401 self, 402 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 403 return ( 404 self._parse_csv(self._parse_conjunction) 405 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 406 else [], 407 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 408 ) 409 410 class Generator(generator.Generator): 411 LIMIT_FETCH = "LIMIT" 412 TABLESAMPLE_WITH_METHOD = False 413 TABLESAMPLE_SIZE_IS_PERCENT = True 414 JOIN_HINTS = False 415 TABLE_HINTS = False 416 QUERY_HINTS = False 417 INDEX_ON = "ON TABLE" 418 EXTRACT_ALLOWS_QUOTES = False 419 NVL2_SUPPORTED = False 420 SUPPORTS_NESTED_CTES = False 421 422 TYPE_MAPPING = { 423 **generator.Generator.TYPE_MAPPING, 424 exp.DataType.Type.BIT: "BOOLEAN", 425 exp.DataType.Type.DATETIME: "TIMESTAMP", 426 exp.DataType.Type.TEXT: "STRING", 427 exp.DataType.Type.TIME: "TIMESTAMP", 428 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 429 exp.DataType.Type.VARBINARY: "BINARY", 430 } 431 432 TRANSFORMS = { 433 **generator.Generator.TRANSFORMS, 434 exp.Group: transforms.preprocess([transforms.unalias_group]), 435 exp.Select: transforms.preprocess( 436 [ 437 transforms.eliminate_qualify, 438 transforms.eliminate_distinct_on, 439 transforms.unnest_to_explode, 440 ] 441 ), 442 exp.Property: _property_sql, 443 exp.AnyValue: rename_func("FIRST"), 444 exp.ApproxDistinct: approx_count_distinct_sql, 445 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 446 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 447 exp.ArrayConcat: rename_func("CONCAT"), 448 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 449 exp.ArraySize: rename_func("SIZE"), 450 exp.ArraySort: _array_sort_sql, 451 exp.With: no_recursive_cte_sql, 452 exp.DateAdd: _add_date_sql, 453 exp.DateDiff: _date_diff_sql, 454 exp.DateStrToDate: rename_func("TO_DATE"), 455 exp.DateSub: _add_date_sql, 456 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 457 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 458 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 459 exp.FromBase64: rename_func("UNBASE64"), 460 exp.If: if_sql(), 461 exp.ILike: no_ilike_sql, 462 exp.IsNan: rename_func("ISNAN"), 463 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 464 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 465 exp.JSONFormat: _json_format_sql, 466 exp.Left: left_to_substring_sql, 467 exp.Map: var_map_sql, 468 exp.Max: max_or_greatest, 469 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 470 exp.Min: min_or_least, 471 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 472 exp.NotNullColumnConstraint: lambda self, e: "" 473 if e.args.get("allow_null") 474 else "NOT NULL", 475 exp.VarMap: var_map_sql, 476 exp.Create: _create_sql, 477 exp.Quantile: rename_func("PERCENTILE"), 478 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 479 exp.RegexpExtract: regexp_extract_sql, 480 exp.RegexpReplace: regexp_replace_sql, 481 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 482 exp.RegexpSplit: rename_func("SPLIT"), 483 exp.Right: right_to_substring_sql, 484 exp.SafeDivide: no_safe_divide_sql, 485 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 486 exp.SetAgg: rename_func("COLLECT_SET"), 487 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 488 exp.StrPosition: strposition_to_locate_sql, 489 exp.StrToDate: _str_to_date_sql, 490 exp.StrToTime: _str_to_time_sql, 491 exp.StrToUnix: _str_to_unix_sql, 492 exp.StructExtract: struct_extract_sql, 493 exp.TimeStrToDate: rename_func("TO_DATE"), 494 exp.TimeStrToTime: timestrtotime_sql, 495 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 496 exp.TimeToStr: _time_to_str, 497 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 498 exp.ToBase64: rename_func("BASE64"), 499 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 500 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 501 exp.TsOrDsToDate: _to_date_sql, 502 exp.TryCast: no_trycast_sql, 503 exp.UnixToStr: lambda self, e: self.func( 504 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 505 ), 506 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 507 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 508 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 509 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 510 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 511 exp.LastDateOfMonth: rename_func("LAST_DAY"), 512 exp.National: lambda self, e: self.national_sql(e, prefix=""), 513 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 514 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 515 exp.NotForReplicationColumnConstraint: lambda self, e: "", 516 exp.OnProperty: lambda self, e: "", 517 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 518 } 519 520 PROPERTIES_LOCATION = { 521 **generator.Generator.PROPERTIES_LOCATION, 522 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 523 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 524 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 525 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 526 } 527 528 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 529 # Hive has no temporary storage provider (there are hive settings though) 530 return expression 531 532 def parameter_sql(self, expression: exp.Parameter) -> str: 533 this = self.sql(expression, "this") 534 expression_sql = self.sql(expression, "expression") 535 536 parent = expression.parent 537 this = f"{this}:{expression_sql}" if expression_sql else this 538 539 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 540 # We need to produce SET key = value instead of SET ${key} = value 541 return this 542 543 return f"${{{this}}}" 544 545 def schema_sql(self, expression: exp.Schema) -> str: 546 for ordered in expression.find_all(exp.Ordered): 547 if ordered.args.get("desc") is False: 548 ordered.set("desc", None) 549 550 return super().schema_sql(expression) 551 552 def constraint_sql(self, expression: exp.Constraint) -> str: 553 for prop in list(expression.find_all(exp.Properties)): 554 prop.pop() 555 556 this = self.sql(expression, "this") 557 expressions = self.expressions(expression, sep=" ", flat=True) 558 return f"CONSTRAINT {this} {expressions}" 559 560 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 561 serde_props = self.sql(expression, "serde_properties") 562 serde_props = f" {serde_props}" if serde_props else "" 563 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 564 565 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 566 return self.func( 567 "COLLECT_LIST", 568 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 569 ) 570 571 def with_properties(self, properties: exp.Properties) -> str: 572 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 573 574 def datatype_sql(self, expression: exp.DataType) -> str: 575 if ( 576 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 577 and not expression.expressions 578 ): 579 expression = exp.DataType.build("text") 580 elif expression.this in exp.DataType.TEMPORAL_TYPES: 581 expression = exp.DataType.build(expression.this) 582 elif expression.is_type("float"): 583 size_expression = expression.find(exp.DataTypeParam) 584 if size_expression: 585 size = int(size_expression.name) 586 expression = ( 587 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 588 ) 589 590 return super().datatype_sql(expression) 591 592 def version_sql(self, expression: exp.Version) -> str: 593 sql = super().version_sql(expression) 594 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
187class Hive(Dialect): 188 ALIAS_POST_TABLESAMPLE = True 189 IDENTIFIERS_CAN_START_WITH_DIGIT = True 190 SUPPORTS_USER_DEFINED_TYPES = False 191 SAFE_DIVISION = True 192 193 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 194 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 195 196 TIME_MAPPING = { 197 "y": "%Y", 198 "Y": "%Y", 199 "YYYY": "%Y", 200 "yyyy": "%Y", 201 "YY": "%y", 202 "yy": "%y", 203 "MMMM": "%B", 204 "MMM": "%b", 205 "MM": "%m", 206 "M": "%-m", 207 "dd": "%d", 208 "d": "%-d", 209 "HH": "%H", 210 "H": "%-H", 211 "hh": "%I", 212 "h": "%-I", 213 "mm": "%M", 214 "m": "%-M", 215 "ss": "%S", 216 "s": "%-S", 217 "SSSSSS": "%f", 218 "a": "%p", 219 "DD": "%j", 220 "D": "%-j", 221 "E": "%a", 222 "EE": "%a", 223 "EEE": "%a", 224 "EEEE": "%A", 225 } 226 227 DATE_FORMAT = "'yyyy-MM-dd'" 228 DATEINT_FORMAT = "'yyyyMMdd'" 229 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 230 231 class Tokenizer(tokens.Tokenizer): 232 QUOTES = ["'", '"'] 233 IDENTIFIERS = ["`"] 234 STRING_ESCAPES = ["\\"] 235 ENCODE = "utf-8" 236 237 SINGLE_TOKENS = { 238 **tokens.Tokenizer.SINGLE_TOKENS, 239 "$": TokenType.PARAMETER, 240 } 241 242 KEYWORDS = { 243 **tokens.Tokenizer.KEYWORDS, 244 "ADD ARCHIVE": TokenType.COMMAND, 245 "ADD ARCHIVES": TokenType.COMMAND, 246 "ADD FILE": TokenType.COMMAND, 247 "ADD FILES": TokenType.COMMAND, 248 "ADD JAR": TokenType.COMMAND, 249 "ADD JARS": TokenType.COMMAND, 250 "MSCK REPAIR": TokenType.COMMAND, 251 "REFRESH": TokenType.REFRESH, 252 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 253 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 254 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 255 } 256 257 NUMERIC_LITERALS = { 258 "L": "BIGINT", 259 "S": "SMALLINT", 260 "Y": "TINYINT", 261 "D": "DOUBLE", 262 "F": "FLOAT", 263 "BD": "DECIMAL", 264 } 265 266 class Parser(parser.Parser): 267 LOG_DEFAULTS_TO_LN = True 268 STRICT_CAST = False 269 270 FUNCTIONS = { 271 **parser.Parser.FUNCTIONS, 272 "BASE64": exp.ToBase64.from_arg_list, 273 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 274 "COLLECT_SET": exp.SetAgg.from_arg_list, 275 "DATE_ADD": lambda args: exp.TsOrDsAdd( 276 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 277 ), 278 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 279 [ 280 exp.TimeStrToTime(this=seq_get(args, 0)), 281 seq_get(args, 1), 282 ] 283 ), 284 "DATE_SUB": lambda args: exp.TsOrDsAdd( 285 this=seq_get(args, 0), 286 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 287 unit=exp.Literal.string("DAY"), 288 ), 289 "DATEDIFF": lambda args: exp.DateDiff( 290 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 291 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 292 ), 293 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 294 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 295 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 296 "LOCATE": locate_to_strposition, 297 "MAP": parse_var_map, 298 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 299 "PERCENTILE": exp.Quantile.from_arg_list, 300 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 301 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 302 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 303 ), 304 "SIZE": exp.ArraySize.from_arg_list, 305 "SPLIT": exp.RegexpSplit.from_arg_list, 306 "STR_TO_MAP": lambda args: exp.StrToMap( 307 this=seq_get(args, 0), 308 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 309 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 310 ), 311 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 312 "TO_JSON": exp.JSONFormat.from_arg_list, 313 "UNBASE64": exp.FromBase64.from_arg_list, 314 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 315 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 316 } 317 318 NO_PAREN_FUNCTION_PARSERS = { 319 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 320 "TRANSFORM": lambda self: self._parse_transform(), 321 } 322 323 PROPERTY_PARSERS = { 324 **parser.Parser.PROPERTY_PARSERS, 325 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 326 expressions=self._parse_wrapped_csv(self._parse_property) 327 ), 328 } 329 330 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 331 if not self._match(TokenType.L_PAREN, advance=False): 332 self._retreat(self._index - 1) 333 return None 334 335 args = self._parse_wrapped_csv(self._parse_lambda) 336 row_format_before = self._parse_row_format(match_row=True) 337 338 record_writer = None 339 if self._match_text_seq("RECORDWRITER"): 340 record_writer = self._parse_string() 341 342 if not self._match(TokenType.USING): 343 return exp.Transform.from_arg_list(args) 344 345 command_script = self._parse_string() 346 347 self._match(TokenType.ALIAS) 348 schema = self._parse_schema() 349 350 row_format_after = self._parse_row_format(match_row=True) 351 record_reader = None 352 if self._match_text_seq("RECORDREADER"): 353 record_reader = self._parse_string() 354 355 return self.expression( 356 exp.QueryTransform, 357 expressions=args, 358 command_script=command_script, 359 schema=schema, 360 row_format_before=row_format_before, 361 record_writer=record_writer, 362 row_format_after=row_format_after, 363 record_reader=record_reader, 364 ) 365 366 def _parse_types( 367 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 368 ) -> t.Optional[exp.Expression]: 369 """ 370 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 371 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 372 373 spark-sql (default)> select cast(1234 as varchar(2)); 374 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 375 char/varchar type and simply treats them as string type. Please use string type 376 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 377 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 378 379 1234 380 Time taken: 4.265 seconds, Fetched 1 row(s) 381 382 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 383 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 384 385 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 386 """ 387 this = super()._parse_types( 388 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 389 ) 390 391 if this and not schema: 392 return this.transform( 393 lambda node: node.replace(exp.DataType.build("text")) 394 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 395 else node, 396 copy=False, 397 ) 398 399 return this 400 401 def _parse_partition_and_order( 402 self, 403 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 404 return ( 405 self._parse_csv(self._parse_conjunction) 406 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 407 else [], 408 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 409 ) 410 411 class Generator(generator.Generator): 412 LIMIT_FETCH = "LIMIT" 413 TABLESAMPLE_WITH_METHOD = False 414 TABLESAMPLE_SIZE_IS_PERCENT = True 415 JOIN_HINTS = False 416 TABLE_HINTS = False 417 QUERY_HINTS = False 418 INDEX_ON = "ON TABLE" 419 EXTRACT_ALLOWS_QUOTES = False 420 NVL2_SUPPORTED = False 421 SUPPORTS_NESTED_CTES = False 422 423 TYPE_MAPPING = { 424 **generator.Generator.TYPE_MAPPING, 425 exp.DataType.Type.BIT: "BOOLEAN", 426 exp.DataType.Type.DATETIME: "TIMESTAMP", 427 exp.DataType.Type.TEXT: "STRING", 428 exp.DataType.Type.TIME: "TIMESTAMP", 429 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 430 exp.DataType.Type.VARBINARY: "BINARY", 431 } 432 433 TRANSFORMS = { 434 **generator.Generator.TRANSFORMS, 435 exp.Group: transforms.preprocess([transforms.unalias_group]), 436 exp.Select: transforms.preprocess( 437 [ 438 transforms.eliminate_qualify, 439 transforms.eliminate_distinct_on, 440 transforms.unnest_to_explode, 441 ] 442 ), 443 exp.Property: _property_sql, 444 exp.AnyValue: rename_func("FIRST"), 445 exp.ApproxDistinct: approx_count_distinct_sql, 446 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 447 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 448 exp.ArrayConcat: rename_func("CONCAT"), 449 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 450 exp.ArraySize: rename_func("SIZE"), 451 exp.ArraySort: _array_sort_sql, 452 exp.With: no_recursive_cte_sql, 453 exp.DateAdd: _add_date_sql, 454 exp.DateDiff: _date_diff_sql, 455 exp.DateStrToDate: rename_func("TO_DATE"), 456 exp.DateSub: _add_date_sql, 457 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 458 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 459 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 460 exp.FromBase64: rename_func("UNBASE64"), 461 exp.If: if_sql(), 462 exp.ILike: no_ilike_sql, 463 exp.IsNan: rename_func("ISNAN"), 464 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 465 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 466 exp.JSONFormat: _json_format_sql, 467 exp.Left: left_to_substring_sql, 468 exp.Map: var_map_sql, 469 exp.Max: max_or_greatest, 470 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 471 exp.Min: min_or_least, 472 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 473 exp.NotNullColumnConstraint: lambda self, e: "" 474 if e.args.get("allow_null") 475 else "NOT NULL", 476 exp.VarMap: var_map_sql, 477 exp.Create: _create_sql, 478 exp.Quantile: rename_func("PERCENTILE"), 479 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 480 exp.RegexpExtract: regexp_extract_sql, 481 exp.RegexpReplace: regexp_replace_sql, 482 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 483 exp.RegexpSplit: rename_func("SPLIT"), 484 exp.Right: right_to_substring_sql, 485 exp.SafeDivide: no_safe_divide_sql, 486 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 487 exp.SetAgg: rename_func("COLLECT_SET"), 488 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 489 exp.StrPosition: strposition_to_locate_sql, 490 exp.StrToDate: _str_to_date_sql, 491 exp.StrToTime: _str_to_time_sql, 492 exp.StrToUnix: _str_to_unix_sql, 493 exp.StructExtract: struct_extract_sql, 494 exp.TimeStrToDate: rename_func("TO_DATE"), 495 exp.TimeStrToTime: timestrtotime_sql, 496 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 497 exp.TimeToStr: _time_to_str, 498 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 499 exp.ToBase64: rename_func("BASE64"), 500 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 501 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 502 exp.TsOrDsToDate: _to_date_sql, 503 exp.TryCast: no_trycast_sql, 504 exp.UnixToStr: lambda self, e: self.func( 505 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 506 ), 507 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 508 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 509 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 510 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 511 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 512 exp.LastDateOfMonth: rename_func("LAST_DAY"), 513 exp.National: lambda self, e: self.national_sql(e, prefix=""), 514 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 515 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 516 exp.NotForReplicationColumnConstraint: lambda self, e: "", 517 exp.OnProperty: lambda self, e: "", 518 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 519 } 520 521 PROPERTIES_LOCATION = { 522 **generator.Generator.PROPERTIES_LOCATION, 523 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 524 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 525 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 526 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 527 } 528 529 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 530 # Hive has no temporary storage provider (there are hive settings though) 531 return expression 532 533 def parameter_sql(self, expression: exp.Parameter) -> str: 534 this = self.sql(expression, "this") 535 expression_sql = self.sql(expression, "expression") 536 537 parent = expression.parent 538 this = f"{this}:{expression_sql}" if expression_sql else this 539 540 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 541 # We need to produce SET key = value instead of SET ${key} = value 542 return this 543 544 return f"${{{this}}}" 545 546 def schema_sql(self, expression: exp.Schema) -> str: 547 for ordered in expression.find_all(exp.Ordered): 548 if ordered.args.get("desc") is False: 549 ordered.set("desc", None) 550 551 return super().schema_sql(expression) 552 553 def constraint_sql(self, expression: exp.Constraint) -> str: 554 for prop in list(expression.find_all(exp.Properties)): 555 prop.pop() 556 557 this = self.sql(expression, "this") 558 expressions = self.expressions(expression, sep=" ", flat=True) 559 return f"CONSTRAINT {this} {expressions}" 560 561 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 562 serde_props = self.sql(expression, "serde_properties") 563 serde_props = f" {serde_props}" if serde_props else "" 564 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 565 566 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 567 return self.func( 568 "COLLECT_LIST", 569 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 570 ) 571 572 def with_properties(self, properties: exp.Properties) -> str: 573 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 574 575 def datatype_sql(self, expression: exp.DataType) -> str: 576 if ( 577 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 578 and not expression.expressions 579 ): 580 expression = exp.DataType.build("text") 581 elif expression.this in exp.DataType.TEMPORAL_TYPES: 582 expression = exp.DataType.build(expression.this) 583 elif expression.is_type("float"): 584 size_expression = expression.find(exp.DataTypeParam) 585 if size_expression: 586 size = int(size_expression.name) 587 expression = ( 588 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 589 ) 590 591 return super().datatype_sql(expression) 592 593 def version_sql(self, expression: exp.Version) -> str: 594 sql = super().version_sql(expression) 595 return sql.replace("FOR ", "", 1)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- TYPED_DIVISION
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
231 class Tokenizer(tokens.Tokenizer): 232 QUOTES = ["'", '"'] 233 IDENTIFIERS = ["`"] 234 STRING_ESCAPES = ["\\"] 235 ENCODE = "utf-8" 236 237 SINGLE_TOKENS = { 238 **tokens.Tokenizer.SINGLE_TOKENS, 239 "$": TokenType.PARAMETER, 240 } 241 242 KEYWORDS = { 243 **tokens.Tokenizer.KEYWORDS, 244 "ADD ARCHIVE": TokenType.COMMAND, 245 "ADD ARCHIVES": TokenType.COMMAND, 246 "ADD FILE": TokenType.COMMAND, 247 "ADD FILES": TokenType.COMMAND, 248 "ADD JAR": TokenType.COMMAND, 249 "ADD JARS": TokenType.COMMAND, 250 "MSCK REPAIR": TokenType.COMMAND, 251 "REFRESH": TokenType.REFRESH, 252 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 253 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 254 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 255 } 256 257 NUMERIC_LITERALS = { 258 "L": "BIGINT", 259 "S": "SMALLINT", 260 "Y": "TINYINT", 261 "D": "DOUBLE", 262 "F": "FLOAT", 263 "BD": "DECIMAL", 264 }
SINGLE_TOKENS =
{'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '#': <TokenType.HASH: 'HASH'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
266 class Parser(parser.Parser): 267 LOG_DEFAULTS_TO_LN = True 268 STRICT_CAST = False 269 270 FUNCTIONS = { 271 **parser.Parser.FUNCTIONS, 272 "BASE64": exp.ToBase64.from_arg_list, 273 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 274 "COLLECT_SET": exp.SetAgg.from_arg_list, 275 "DATE_ADD": lambda args: exp.TsOrDsAdd( 276 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 277 ), 278 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 279 [ 280 exp.TimeStrToTime(this=seq_get(args, 0)), 281 seq_get(args, 1), 282 ] 283 ), 284 "DATE_SUB": lambda args: exp.TsOrDsAdd( 285 this=seq_get(args, 0), 286 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 287 unit=exp.Literal.string("DAY"), 288 ), 289 "DATEDIFF": lambda args: exp.DateDiff( 290 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 291 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 292 ), 293 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 294 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 295 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 296 "LOCATE": locate_to_strposition, 297 "MAP": parse_var_map, 298 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 299 "PERCENTILE": exp.Quantile.from_arg_list, 300 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 301 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 302 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 303 ), 304 "SIZE": exp.ArraySize.from_arg_list, 305 "SPLIT": exp.RegexpSplit.from_arg_list, 306 "STR_TO_MAP": lambda args: exp.StrToMap( 307 this=seq_get(args, 0), 308 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 309 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 310 ), 311 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 312 "TO_JSON": exp.JSONFormat.from_arg_list, 313 "UNBASE64": exp.FromBase64.from_arg_list, 314 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 315 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 316 } 317 318 NO_PAREN_FUNCTION_PARSERS = { 319 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 320 "TRANSFORM": lambda self: self._parse_transform(), 321 } 322 323 PROPERTY_PARSERS = { 324 **parser.Parser.PROPERTY_PARSERS, 325 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 326 expressions=self._parse_wrapped_csv(self._parse_property) 327 ), 328 } 329 330 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 331 if not self._match(TokenType.L_PAREN, advance=False): 332 self._retreat(self._index - 1) 333 return None 334 335 args = self._parse_wrapped_csv(self._parse_lambda) 336 row_format_before = self._parse_row_format(match_row=True) 337 338 record_writer = None 339 if self._match_text_seq("RECORDWRITER"): 340 record_writer = self._parse_string() 341 342 if not self._match(TokenType.USING): 343 return exp.Transform.from_arg_list(args) 344 345 command_script = self._parse_string() 346 347 self._match(TokenType.ALIAS) 348 schema = self._parse_schema() 349 350 row_format_after = self._parse_row_format(match_row=True) 351 record_reader = None 352 if self._match_text_seq("RECORDREADER"): 353 record_reader = self._parse_string() 354 355 return self.expression( 356 exp.QueryTransform, 357 expressions=args, 358 command_script=command_script, 359 schema=schema, 360 row_format_before=row_format_before, 361 record_writer=record_writer, 362 row_format_after=row_format_after, 363 record_reader=record_reader, 364 ) 365 366 def _parse_types( 367 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 368 ) -> t.Optional[exp.Expression]: 369 """ 370 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 371 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 372 373 spark-sql (default)> select cast(1234 as varchar(2)); 374 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 375 char/varchar type and simply treats them as string type. Please use string type 376 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 377 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 378 379 1234 380 Time taken: 4.265 seconds, Fetched 1 row(s) 381 382 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 383 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 384 385 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 386 """ 387 this = super()._parse_types( 388 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 389 ) 390 391 if this and not schema: 392 return this.transform( 393 lambda node: node.replace(exp.DataType.build("text")) 394 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 395 else node, 396 copy=False, 397 ) 398 399 return this 400 401 def _parse_partition_and_order( 402 self, 403 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 404 return ( 405 self._parse_csv(self._parse_conjunction) 406 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 407 else [], 408 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 409 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NULLIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nullif'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'SYSTEM_VERSIONING': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- CLONE_KINDS
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- TYPED_DIVISION
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
411 class Generator(generator.Generator): 412 LIMIT_FETCH = "LIMIT" 413 TABLESAMPLE_WITH_METHOD = False 414 TABLESAMPLE_SIZE_IS_PERCENT = True 415 JOIN_HINTS = False 416 TABLE_HINTS = False 417 QUERY_HINTS = False 418 INDEX_ON = "ON TABLE" 419 EXTRACT_ALLOWS_QUOTES = False 420 NVL2_SUPPORTED = False 421 SUPPORTS_NESTED_CTES = False 422 423 TYPE_MAPPING = { 424 **generator.Generator.TYPE_MAPPING, 425 exp.DataType.Type.BIT: "BOOLEAN", 426 exp.DataType.Type.DATETIME: "TIMESTAMP", 427 exp.DataType.Type.TEXT: "STRING", 428 exp.DataType.Type.TIME: "TIMESTAMP", 429 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 430 exp.DataType.Type.VARBINARY: "BINARY", 431 } 432 433 TRANSFORMS = { 434 **generator.Generator.TRANSFORMS, 435 exp.Group: transforms.preprocess([transforms.unalias_group]), 436 exp.Select: transforms.preprocess( 437 [ 438 transforms.eliminate_qualify, 439 transforms.eliminate_distinct_on, 440 transforms.unnest_to_explode, 441 ] 442 ), 443 exp.Property: _property_sql, 444 exp.AnyValue: rename_func("FIRST"), 445 exp.ApproxDistinct: approx_count_distinct_sql, 446 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 447 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 448 exp.ArrayConcat: rename_func("CONCAT"), 449 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 450 exp.ArraySize: rename_func("SIZE"), 451 exp.ArraySort: _array_sort_sql, 452 exp.With: no_recursive_cte_sql, 453 exp.DateAdd: _add_date_sql, 454 exp.DateDiff: _date_diff_sql, 455 exp.DateStrToDate: rename_func("TO_DATE"), 456 exp.DateSub: _add_date_sql, 457 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 458 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 459 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 460 exp.FromBase64: rename_func("UNBASE64"), 461 exp.If: if_sql(), 462 exp.ILike: no_ilike_sql, 463 exp.IsNan: rename_func("ISNAN"), 464 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 465 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 466 exp.JSONFormat: _json_format_sql, 467 exp.Left: left_to_substring_sql, 468 exp.Map: var_map_sql, 469 exp.Max: max_or_greatest, 470 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 471 exp.Min: min_or_least, 472 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 473 exp.NotNullColumnConstraint: lambda self, e: "" 474 if e.args.get("allow_null") 475 else "NOT NULL", 476 exp.VarMap: var_map_sql, 477 exp.Create: _create_sql, 478 exp.Quantile: rename_func("PERCENTILE"), 479 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 480 exp.RegexpExtract: regexp_extract_sql, 481 exp.RegexpReplace: regexp_replace_sql, 482 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 483 exp.RegexpSplit: rename_func("SPLIT"), 484 exp.Right: right_to_substring_sql, 485 exp.SafeDivide: no_safe_divide_sql, 486 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 487 exp.SetAgg: rename_func("COLLECT_SET"), 488 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 489 exp.StrPosition: strposition_to_locate_sql, 490 exp.StrToDate: _str_to_date_sql, 491 exp.StrToTime: _str_to_time_sql, 492 exp.StrToUnix: _str_to_unix_sql, 493 exp.StructExtract: struct_extract_sql, 494 exp.TimeStrToDate: rename_func("TO_DATE"), 495 exp.TimeStrToTime: timestrtotime_sql, 496 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 497 exp.TimeToStr: _time_to_str, 498 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 499 exp.ToBase64: rename_func("BASE64"), 500 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 501 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 502 exp.TsOrDsToDate: _to_date_sql, 503 exp.TryCast: no_trycast_sql, 504 exp.UnixToStr: lambda self, e: self.func( 505 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 506 ), 507 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 508 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 509 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 510 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 511 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 512 exp.LastDateOfMonth: rename_func("LAST_DAY"), 513 exp.National: lambda self, e: self.national_sql(e, prefix=""), 514 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 515 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 516 exp.NotForReplicationColumnConstraint: lambda self, e: "", 517 exp.OnProperty: lambda self, e: "", 518 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 519 } 520 521 PROPERTIES_LOCATION = { 522 **generator.Generator.PROPERTIES_LOCATION, 523 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 524 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 525 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 526 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 527 } 528 529 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 530 # Hive has no temporary storage provider (there are hive settings though) 531 return expression 532 533 def parameter_sql(self, expression: exp.Parameter) -> str: 534 this = self.sql(expression, "this") 535 expression_sql = self.sql(expression, "expression") 536 537 parent = expression.parent 538 this = f"{this}:{expression_sql}" if expression_sql else this 539 540 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 541 # We need to produce SET key = value instead of SET ${key} = value 542 return this 543 544 return f"${{{this}}}" 545 546 def schema_sql(self, expression: exp.Schema) -> str: 547 for ordered in expression.find_all(exp.Ordered): 548 if ordered.args.get("desc") is False: 549 ordered.set("desc", None) 550 551 return super().schema_sql(expression) 552 553 def constraint_sql(self, expression: exp.Constraint) -> str: 554 for prop in list(expression.find_all(exp.Properties)): 555 prop.pop() 556 557 this = self.sql(expression, "this") 558 expressions = self.expressions(expression, sep=" ", flat=True) 559 return f"CONSTRAINT {this} {expressions}" 560 561 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 562 serde_props = self.sql(expression, "serde_properties") 563 serde_props = f" {serde_props}" if serde_props else "" 564 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 565 566 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 567 return self.func( 568 "COLLECT_LIST", 569 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 570 ) 571 572 def with_properties(self, properties: exp.Properties) -> str: 573 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 574 575 def datatype_sql(self, expression: exp.DataType) -> str: 576 if ( 577 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 578 and not expression.expressions 579 ): 580 expression = exp.DataType.build("text") 581 elif expression.this in exp.DataType.TEMPORAL_TYPES: 582 expression = exp.DataType.build(expression.this) 583 elif expression.is_type("float"): 584 size_expression = expression.find(exp.DataTypeParam) 585 if size_expression: 586 size = int(size_expression.name) 587 expression = ( 588 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 589 ) 590 591 return super().datatype_sql(expression) 592 593 def version_sql(self, expression: exp.Version) -> str: 594 sql = super().version_sql(expression) 595 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function _create_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedOfProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.WithSystemVersioningProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>}
def
temporary_storage_provider( self, expression: sqlglot.expressions.Create) -> sqlglot.expressions.Create:
533 def parameter_sql(self, expression: exp.Parameter) -> str: 534 this = self.sql(expression, "this") 535 expression_sql = self.sql(expression, "expression") 536 537 parent = expression.parent 538 this = f"{this}:{expression_sql}" if expression_sql else this 539 540 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 541 # We need to produce SET key = value instead of SET ${key} = value 542 return this 543 544 return f"${{{this}}}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
575 def datatype_sql(self, expression: exp.DataType) -> str: 576 if ( 577 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 578 and not expression.expressions 579 ): 580 expression = exp.DataType.build("text") 581 elif expression.this in exp.DataType.TEMPORAL_TYPES: 582 expression = exp.DataType.build(expression.this) 583 elif expression.is_type("float"): 584 size_expression = expression.find(exp.DataTypeParam) 585 if size_expression: 586 size = int(size_expression.name) 587 expression = ( 588 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 589 ) 590 591 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
288 @classmethod 289 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 290 """Checks if text can be identified given an identify option. 291 292 Args: 293 text: The text to check. 294 identify: 295 "always" or `True`: Always returns true. 296 "safe": True if the identifier is case-insensitive. 297 298 Returns: 299 Whether or not the given text can be identified. 300 """ 301 if identify is True or identify == "always": 302 return True 303 304 if identify == "safe": 305 return not cls.case_sensitive(text) 306 307 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
TOKENIZER_CLASS =
<class 'Hive.Tokenizer'>
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOG_BASE_FIRST
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- SUPPORTS_PARAMETERS
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- TYPED_DIVISION
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- KEY_VALUE_DEFINITONS
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql