sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 arg_max_or_min_no_count, 10 create_with_partitions_sql, 11 format_time_lambda, 12 if_sql, 13 is_parse_json, 14 left_to_substring_sql, 15 locate_to_strposition, 16 max_or_greatest, 17 min_or_least, 18 no_ilike_sql, 19 no_recursive_cte_sql, 20 no_safe_divide_sql, 21 no_trycast_sql, 22 regexp_extract_sql, 23 regexp_replace_sql, 24 rename_func, 25 right_to_substring_sql, 26 strposition_to_locate_sql, 27 struct_extract_sql, 28 time_format, 29 timestrtotime_sql, 30 var_map_sql, 31) 32from sqlglot.helper import seq_get 33from sqlglot.parser import parse_var_map 34from sqlglot.tokens import TokenType 35 36# (FuncType, Multiplier) 37DATE_DELTA_INTERVAL = { 38 "YEAR": ("ADD_MONTHS", 12), 39 "MONTH": ("ADD_MONTHS", 1), 40 "QUARTER": ("ADD_MONTHS", 3), 41 "WEEK": ("DATE_ADD", 7), 42 "DAY": ("DATE_ADD", 1), 43} 44 45TIME_DIFF_FACTOR = { 46 "MILLISECOND": " * 1000", 47 "SECOND": "", 48 "MINUTE": " / 60", 49 "HOUR": " / 3600", 50} 51 52DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 53 54 55def _create_sql(self, expression: exp.Create) -> str: 56 # remove UNIQUE column constraints 57 for constraint in expression.find_all(exp.UniqueColumnConstraint): 58 if constraint.parent: 59 constraint.parent.pop() 60 61 properties = expression.args.get("properties") 62 temporary = any( 63 isinstance(prop, exp.TemporaryProperty) 64 for prop in (properties.expressions if properties else []) 65 ) 66 67 # CTAS with temp tables map to CREATE TEMPORARY VIEW 68 kind = expression.args["kind"] 69 if kind.upper() == "TABLE" and temporary: 70 if expression.expression: 71 return f"CREATE TEMPORARY VIEW {self.sql(expression, 'this')} AS {self.sql(expression, 'expression')}" 72 else: 73 # CREATE TEMPORARY TABLE may require storage provider 74 expression = self.temporary_storage_provider(expression) 75 76 return create_with_partitions_sql(self, expression) 77 78 79def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 80 unit = expression.text("unit").upper() 81 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 82 83 if isinstance(expression, exp.DateSub): 84 multiplier *= -1 85 86 if expression.expression.is_number: 87 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 88 else: 89 modified_increment = expression.expression 90 if multiplier != 1: 91 modified_increment = exp.Mul( # type: ignore 92 this=modified_increment, expression=exp.Literal.number(multiplier) 93 ) 94 95 return self.func(func, expression.this, modified_increment) 96 97 98def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str: 99 unit = expression.text("unit").upper() 100 101 factor = TIME_DIFF_FACTOR.get(unit) 102 if factor is not None: 103 left = self.sql(expression, "this") 104 right = self.sql(expression, "expression") 105 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 106 return f"({sec_diff}){factor}" if factor else sec_diff 107 108 months_between = unit in DIFF_MONTH_SWITCH 109 sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF" 110 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 111 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 112 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 113 114 if months_between: 115 # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part 116 diff_sql = f"CAST({diff_sql} AS INT)" 117 118 return f"{diff_sql}{multiplier_sql}" 119 120 121def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 122 this = expression.this 123 if is_parse_json(this) and this.this.is_string: 124 # Since FROM_JSON requires a nested type, we always wrap the json string with 125 # an array to ensure that "naked" strings like "'a'" will be handled correctly 126 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 127 128 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 129 to_json = self.func("TO_JSON", from_json) 130 131 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 132 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 133 134 return self.func("TO_JSON", this, expression.args.get("options")) 135 136 137def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 138 if expression.expression: 139 self.unsupported("Hive SORT_ARRAY does not support a comparator") 140 return f"SORT_ARRAY({self.sql(expression, 'this')})" 141 142 143def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 144 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 145 146 147def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 148 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 149 150 151def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 152 this = self.sql(expression, "this") 153 time_format = self.format_time(expression) 154 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 155 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 156 return f"CAST({this} AS DATE)" 157 158 159def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 160 this = self.sql(expression, "this") 161 time_format = self.format_time(expression) 162 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 163 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 164 return f"CAST({this} AS TIMESTAMP)" 165 166 167def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 168 this = self.sql(expression, "this") 169 time_format = self.format_time(expression) 170 return f"DATE_FORMAT({this}, {time_format})" 171 172 173def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 174 this = self.sql(expression, "this") 175 time_format = self.format_time(expression) 176 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 177 return f"TO_DATE({this}, {time_format})" 178 return f"TO_DATE({this})" 179 180 181class Hive(Dialect): 182 ALIAS_POST_TABLESAMPLE = True 183 IDENTIFIERS_CAN_START_WITH_DIGIT = True 184 SUPPORTS_USER_DEFINED_TYPES = False 185 SAFE_DIVISION = True 186 187 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 188 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 189 190 TIME_MAPPING = { 191 "y": "%Y", 192 "Y": "%Y", 193 "YYYY": "%Y", 194 "yyyy": "%Y", 195 "YY": "%y", 196 "yy": "%y", 197 "MMMM": "%B", 198 "MMM": "%b", 199 "MM": "%m", 200 "M": "%-m", 201 "dd": "%d", 202 "d": "%-d", 203 "HH": "%H", 204 "H": "%-H", 205 "hh": "%I", 206 "h": "%-I", 207 "mm": "%M", 208 "m": "%-M", 209 "ss": "%S", 210 "s": "%-S", 211 "SSSSSS": "%f", 212 "a": "%p", 213 "DD": "%j", 214 "D": "%-j", 215 "E": "%a", 216 "EE": "%a", 217 "EEE": "%a", 218 "EEEE": "%A", 219 } 220 221 DATE_FORMAT = "'yyyy-MM-dd'" 222 DATEINT_FORMAT = "'yyyyMMdd'" 223 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 224 225 class Tokenizer(tokens.Tokenizer): 226 QUOTES = ["'", '"'] 227 IDENTIFIERS = ["`"] 228 STRING_ESCAPES = ["\\"] 229 ENCODE = "utf-8" 230 231 SINGLE_TOKENS = { 232 **tokens.Tokenizer.SINGLE_TOKENS, 233 "$": TokenType.PARAMETER, 234 } 235 236 KEYWORDS = { 237 **tokens.Tokenizer.KEYWORDS, 238 "ADD ARCHIVE": TokenType.COMMAND, 239 "ADD ARCHIVES": TokenType.COMMAND, 240 "ADD FILE": TokenType.COMMAND, 241 "ADD FILES": TokenType.COMMAND, 242 "ADD JAR": TokenType.COMMAND, 243 "ADD JARS": TokenType.COMMAND, 244 "MSCK REPAIR": TokenType.COMMAND, 245 "REFRESH": TokenType.REFRESH, 246 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 247 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 248 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 249 } 250 251 NUMERIC_LITERALS = { 252 "L": "BIGINT", 253 "S": "SMALLINT", 254 "Y": "TINYINT", 255 "D": "DOUBLE", 256 "F": "FLOAT", 257 "BD": "DECIMAL", 258 } 259 260 class Parser(parser.Parser): 261 LOG_DEFAULTS_TO_LN = True 262 STRICT_CAST = False 263 264 FUNCTIONS = { 265 **parser.Parser.FUNCTIONS, 266 "BASE64": exp.ToBase64.from_arg_list, 267 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 268 "COLLECT_SET": exp.SetAgg.from_arg_list, 269 "DATE_ADD": lambda args: exp.TsOrDsAdd( 270 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 271 ), 272 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 273 [ 274 exp.TimeStrToTime(this=seq_get(args, 0)), 275 seq_get(args, 1), 276 ] 277 ), 278 "DATE_SUB": lambda args: exp.TsOrDsAdd( 279 this=seq_get(args, 0), 280 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 281 unit=exp.Literal.string("DAY"), 282 ), 283 "DATEDIFF": lambda args: exp.DateDiff( 284 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 285 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 286 ), 287 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 288 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 289 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 290 "LOCATE": locate_to_strposition, 291 "MAP": parse_var_map, 292 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 293 "PERCENTILE": exp.Quantile.from_arg_list, 294 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 295 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 296 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 297 ), 298 "SIZE": exp.ArraySize.from_arg_list, 299 "SPLIT": exp.RegexpSplit.from_arg_list, 300 "STR_TO_MAP": lambda args: exp.StrToMap( 301 this=seq_get(args, 0), 302 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 303 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 304 ), 305 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 306 "TO_JSON": exp.JSONFormat.from_arg_list, 307 "UNBASE64": exp.FromBase64.from_arg_list, 308 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 309 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 310 } 311 312 NO_PAREN_FUNCTION_PARSERS = { 313 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 314 "TRANSFORM": lambda self: self._parse_transform(), 315 } 316 317 PROPERTY_PARSERS = { 318 **parser.Parser.PROPERTY_PARSERS, 319 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 320 expressions=self._parse_wrapped_csv(self._parse_property) 321 ), 322 } 323 324 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 325 if not self._match(TokenType.L_PAREN, advance=False): 326 self._retreat(self._index - 1) 327 return None 328 329 args = self._parse_wrapped_csv(self._parse_lambda) 330 row_format_before = self._parse_row_format(match_row=True) 331 332 record_writer = None 333 if self._match_text_seq("RECORDWRITER"): 334 record_writer = self._parse_string() 335 336 if not self._match(TokenType.USING): 337 return exp.Transform.from_arg_list(args) 338 339 command_script = self._parse_string() 340 341 self._match(TokenType.ALIAS) 342 schema = self._parse_schema() 343 344 row_format_after = self._parse_row_format(match_row=True) 345 record_reader = None 346 if self._match_text_seq("RECORDREADER"): 347 record_reader = self._parse_string() 348 349 return self.expression( 350 exp.QueryTransform, 351 expressions=args, 352 command_script=command_script, 353 schema=schema, 354 row_format_before=row_format_before, 355 record_writer=record_writer, 356 row_format_after=row_format_after, 357 record_reader=record_reader, 358 ) 359 360 def _parse_types( 361 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 362 ) -> t.Optional[exp.Expression]: 363 """ 364 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 365 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 366 367 spark-sql (default)> select cast(1234 as varchar(2)); 368 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 369 char/varchar type and simply treats them as string type. Please use string type 370 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 371 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 372 373 1234 374 Time taken: 4.265 seconds, Fetched 1 row(s) 375 376 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 377 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 378 379 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 380 """ 381 this = super()._parse_types( 382 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 383 ) 384 385 if this and not schema: 386 return this.transform( 387 lambda node: node.replace(exp.DataType.build("text")) 388 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 389 else node, 390 copy=False, 391 ) 392 393 return this 394 395 def _parse_partition_and_order( 396 self, 397 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 398 return ( 399 self._parse_csv(self._parse_conjunction) 400 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 401 else [], 402 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 403 ) 404 405 class Generator(generator.Generator): 406 LIMIT_FETCH = "LIMIT" 407 TABLESAMPLE_WITH_METHOD = False 408 TABLESAMPLE_SIZE_IS_PERCENT = True 409 JOIN_HINTS = False 410 TABLE_HINTS = False 411 QUERY_HINTS = False 412 INDEX_ON = "ON TABLE" 413 EXTRACT_ALLOWS_QUOTES = False 414 NVL2_SUPPORTED = False 415 SUPPORTS_NESTED_CTES = False 416 417 TYPE_MAPPING = { 418 **generator.Generator.TYPE_MAPPING, 419 exp.DataType.Type.BIT: "BOOLEAN", 420 exp.DataType.Type.DATETIME: "TIMESTAMP", 421 exp.DataType.Type.TEXT: "STRING", 422 exp.DataType.Type.TIME: "TIMESTAMP", 423 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 424 exp.DataType.Type.VARBINARY: "BINARY", 425 } 426 427 TRANSFORMS = { 428 **generator.Generator.TRANSFORMS, 429 exp.Group: transforms.preprocess([transforms.unalias_group]), 430 exp.Select: transforms.preprocess( 431 [ 432 transforms.eliminate_qualify, 433 transforms.eliminate_distinct_on, 434 transforms.unnest_to_explode, 435 ] 436 ), 437 exp.Property: _property_sql, 438 exp.AnyValue: rename_func("FIRST"), 439 exp.ApproxDistinct: approx_count_distinct_sql, 440 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 441 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 442 exp.ArrayConcat: rename_func("CONCAT"), 443 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 444 exp.ArraySize: rename_func("SIZE"), 445 exp.ArraySort: _array_sort_sql, 446 exp.With: no_recursive_cte_sql, 447 exp.DateAdd: _add_date_sql, 448 exp.DateDiff: _date_diff_sql, 449 exp.DateStrToDate: rename_func("TO_DATE"), 450 exp.DateSub: _add_date_sql, 451 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 452 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 453 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 454 exp.FromBase64: rename_func("UNBASE64"), 455 exp.If: if_sql(), 456 exp.ILike: no_ilike_sql, 457 exp.IsNan: rename_func("ISNAN"), 458 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 459 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 460 exp.JSONFormat: _json_format_sql, 461 exp.Left: left_to_substring_sql, 462 exp.Map: var_map_sql, 463 exp.Max: max_or_greatest, 464 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 465 exp.Min: min_or_least, 466 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 467 exp.NotNullColumnConstraint: lambda self, e: "" 468 if e.args.get("allow_null") 469 else "NOT NULL", 470 exp.VarMap: var_map_sql, 471 exp.Create: _create_sql, 472 exp.Quantile: rename_func("PERCENTILE"), 473 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 474 exp.RegexpExtract: regexp_extract_sql, 475 exp.RegexpReplace: regexp_replace_sql, 476 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 477 exp.RegexpSplit: rename_func("SPLIT"), 478 exp.Right: right_to_substring_sql, 479 exp.SafeDivide: no_safe_divide_sql, 480 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 481 exp.SetAgg: rename_func("COLLECT_SET"), 482 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 483 exp.StrPosition: strposition_to_locate_sql, 484 exp.StrToDate: _str_to_date_sql, 485 exp.StrToTime: _str_to_time_sql, 486 exp.StrToUnix: _str_to_unix_sql, 487 exp.StructExtract: struct_extract_sql, 488 exp.TimeStrToDate: rename_func("TO_DATE"), 489 exp.TimeStrToTime: timestrtotime_sql, 490 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 491 exp.TimeToStr: _time_to_str, 492 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 493 exp.ToBase64: rename_func("BASE64"), 494 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 495 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 496 exp.TsOrDsToDate: _to_date_sql, 497 exp.TryCast: no_trycast_sql, 498 exp.UnixToStr: lambda self, e: self.func( 499 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 500 ), 501 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 502 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 503 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 504 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 505 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 506 exp.LastDateOfMonth: rename_func("LAST_DAY"), 507 exp.National: lambda self, e: self.national_sql(e, prefix=""), 508 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 509 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 510 exp.NotForReplicationColumnConstraint: lambda self, e: "", 511 exp.OnProperty: lambda self, e: "", 512 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 513 } 514 515 PROPERTIES_LOCATION = { 516 **generator.Generator.PROPERTIES_LOCATION, 517 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 518 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 519 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 520 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 521 } 522 523 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 524 # Hive has no temporary storage provider (there are hive settings though) 525 return expression 526 527 def parameter_sql(self, expression: exp.Parameter) -> str: 528 this = self.sql(expression, "this") 529 expression_sql = self.sql(expression, "expression") 530 531 parent = expression.parent 532 this = f"{this}:{expression_sql}" if expression_sql else this 533 534 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 535 # We need to produce SET key = value instead of SET ${key} = value 536 return this 537 538 return f"${{{this}}}" 539 540 def schema_sql(self, expression: exp.Schema) -> str: 541 for ordered in expression.find_all(exp.Ordered): 542 if ordered.args.get("desc") is False: 543 ordered.set("desc", None) 544 545 return super().schema_sql(expression) 546 547 def constraint_sql(self, expression: exp.Constraint) -> str: 548 for prop in list(expression.find_all(exp.Properties)): 549 prop.pop() 550 551 this = self.sql(expression, "this") 552 expressions = self.expressions(expression, sep=" ", flat=True) 553 return f"CONSTRAINT {this} {expressions}" 554 555 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 556 serde_props = self.sql(expression, "serde_properties") 557 serde_props = f" {serde_props}" if serde_props else "" 558 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 559 560 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 561 return self.func( 562 "COLLECT_LIST", 563 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 564 ) 565 566 def with_properties(self, properties: exp.Properties) -> str: 567 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 568 569 def datatype_sql(self, expression: exp.DataType) -> str: 570 if ( 571 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 572 and not expression.expressions 573 ): 574 expression = exp.DataType.build("text") 575 elif expression.this in exp.DataType.TEMPORAL_TYPES: 576 expression = exp.DataType.build(expression.this) 577 elif expression.is_type("float"): 578 size_expression = expression.find(exp.DataTypeParam) 579 if size_expression: 580 size = int(size_expression.name) 581 expression = ( 582 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 583 ) 584 585 return super().datatype_sql(expression) 586 587 def version_sql(self, expression: exp.Version) -> str: 588 sql = super().version_sql(expression) 589 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
182class Hive(Dialect): 183 ALIAS_POST_TABLESAMPLE = True 184 IDENTIFIERS_CAN_START_WITH_DIGIT = True 185 SUPPORTS_USER_DEFINED_TYPES = False 186 SAFE_DIVISION = True 187 188 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 189 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 190 191 TIME_MAPPING = { 192 "y": "%Y", 193 "Y": "%Y", 194 "YYYY": "%Y", 195 "yyyy": "%Y", 196 "YY": "%y", 197 "yy": "%y", 198 "MMMM": "%B", 199 "MMM": "%b", 200 "MM": "%m", 201 "M": "%-m", 202 "dd": "%d", 203 "d": "%-d", 204 "HH": "%H", 205 "H": "%-H", 206 "hh": "%I", 207 "h": "%-I", 208 "mm": "%M", 209 "m": "%-M", 210 "ss": "%S", 211 "s": "%-S", 212 "SSSSSS": "%f", 213 "a": "%p", 214 "DD": "%j", 215 "D": "%-j", 216 "E": "%a", 217 "EE": "%a", 218 "EEE": "%a", 219 "EEEE": "%A", 220 } 221 222 DATE_FORMAT = "'yyyy-MM-dd'" 223 DATEINT_FORMAT = "'yyyyMMdd'" 224 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 225 226 class Tokenizer(tokens.Tokenizer): 227 QUOTES = ["'", '"'] 228 IDENTIFIERS = ["`"] 229 STRING_ESCAPES = ["\\"] 230 ENCODE = "utf-8" 231 232 SINGLE_TOKENS = { 233 **tokens.Tokenizer.SINGLE_TOKENS, 234 "$": TokenType.PARAMETER, 235 } 236 237 KEYWORDS = { 238 **tokens.Tokenizer.KEYWORDS, 239 "ADD ARCHIVE": TokenType.COMMAND, 240 "ADD ARCHIVES": TokenType.COMMAND, 241 "ADD FILE": TokenType.COMMAND, 242 "ADD FILES": TokenType.COMMAND, 243 "ADD JAR": TokenType.COMMAND, 244 "ADD JARS": TokenType.COMMAND, 245 "MSCK REPAIR": TokenType.COMMAND, 246 "REFRESH": TokenType.REFRESH, 247 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 248 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 249 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 250 } 251 252 NUMERIC_LITERALS = { 253 "L": "BIGINT", 254 "S": "SMALLINT", 255 "Y": "TINYINT", 256 "D": "DOUBLE", 257 "F": "FLOAT", 258 "BD": "DECIMAL", 259 } 260 261 class Parser(parser.Parser): 262 LOG_DEFAULTS_TO_LN = True 263 STRICT_CAST = False 264 265 FUNCTIONS = { 266 **parser.Parser.FUNCTIONS, 267 "BASE64": exp.ToBase64.from_arg_list, 268 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 269 "COLLECT_SET": exp.SetAgg.from_arg_list, 270 "DATE_ADD": lambda args: exp.TsOrDsAdd( 271 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 272 ), 273 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 274 [ 275 exp.TimeStrToTime(this=seq_get(args, 0)), 276 seq_get(args, 1), 277 ] 278 ), 279 "DATE_SUB": lambda args: exp.TsOrDsAdd( 280 this=seq_get(args, 0), 281 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 282 unit=exp.Literal.string("DAY"), 283 ), 284 "DATEDIFF": lambda args: exp.DateDiff( 285 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 286 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 287 ), 288 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 289 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 290 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 291 "LOCATE": locate_to_strposition, 292 "MAP": parse_var_map, 293 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 294 "PERCENTILE": exp.Quantile.from_arg_list, 295 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 296 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 297 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 298 ), 299 "SIZE": exp.ArraySize.from_arg_list, 300 "SPLIT": exp.RegexpSplit.from_arg_list, 301 "STR_TO_MAP": lambda args: exp.StrToMap( 302 this=seq_get(args, 0), 303 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 304 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 305 ), 306 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 307 "TO_JSON": exp.JSONFormat.from_arg_list, 308 "UNBASE64": exp.FromBase64.from_arg_list, 309 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 310 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 311 } 312 313 NO_PAREN_FUNCTION_PARSERS = { 314 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 315 "TRANSFORM": lambda self: self._parse_transform(), 316 } 317 318 PROPERTY_PARSERS = { 319 **parser.Parser.PROPERTY_PARSERS, 320 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 321 expressions=self._parse_wrapped_csv(self._parse_property) 322 ), 323 } 324 325 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 326 if not self._match(TokenType.L_PAREN, advance=False): 327 self._retreat(self._index - 1) 328 return None 329 330 args = self._parse_wrapped_csv(self._parse_lambda) 331 row_format_before = self._parse_row_format(match_row=True) 332 333 record_writer = None 334 if self._match_text_seq("RECORDWRITER"): 335 record_writer = self._parse_string() 336 337 if not self._match(TokenType.USING): 338 return exp.Transform.from_arg_list(args) 339 340 command_script = self._parse_string() 341 342 self._match(TokenType.ALIAS) 343 schema = self._parse_schema() 344 345 row_format_after = self._parse_row_format(match_row=True) 346 record_reader = None 347 if self._match_text_seq("RECORDREADER"): 348 record_reader = self._parse_string() 349 350 return self.expression( 351 exp.QueryTransform, 352 expressions=args, 353 command_script=command_script, 354 schema=schema, 355 row_format_before=row_format_before, 356 record_writer=record_writer, 357 row_format_after=row_format_after, 358 record_reader=record_reader, 359 ) 360 361 def _parse_types( 362 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 363 ) -> t.Optional[exp.Expression]: 364 """ 365 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 366 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 367 368 spark-sql (default)> select cast(1234 as varchar(2)); 369 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 370 char/varchar type and simply treats them as string type. Please use string type 371 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 372 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 373 374 1234 375 Time taken: 4.265 seconds, Fetched 1 row(s) 376 377 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 378 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 379 380 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 381 """ 382 this = super()._parse_types( 383 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 384 ) 385 386 if this and not schema: 387 return this.transform( 388 lambda node: node.replace(exp.DataType.build("text")) 389 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 390 else node, 391 copy=False, 392 ) 393 394 return this 395 396 def _parse_partition_and_order( 397 self, 398 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 399 return ( 400 self._parse_csv(self._parse_conjunction) 401 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 402 else [], 403 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 404 ) 405 406 class Generator(generator.Generator): 407 LIMIT_FETCH = "LIMIT" 408 TABLESAMPLE_WITH_METHOD = False 409 TABLESAMPLE_SIZE_IS_PERCENT = True 410 JOIN_HINTS = False 411 TABLE_HINTS = False 412 QUERY_HINTS = False 413 INDEX_ON = "ON TABLE" 414 EXTRACT_ALLOWS_QUOTES = False 415 NVL2_SUPPORTED = False 416 SUPPORTS_NESTED_CTES = False 417 418 TYPE_MAPPING = { 419 **generator.Generator.TYPE_MAPPING, 420 exp.DataType.Type.BIT: "BOOLEAN", 421 exp.DataType.Type.DATETIME: "TIMESTAMP", 422 exp.DataType.Type.TEXT: "STRING", 423 exp.DataType.Type.TIME: "TIMESTAMP", 424 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 425 exp.DataType.Type.VARBINARY: "BINARY", 426 } 427 428 TRANSFORMS = { 429 **generator.Generator.TRANSFORMS, 430 exp.Group: transforms.preprocess([transforms.unalias_group]), 431 exp.Select: transforms.preprocess( 432 [ 433 transforms.eliminate_qualify, 434 transforms.eliminate_distinct_on, 435 transforms.unnest_to_explode, 436 ] 437 ), 438 exp.Property: _property_sql, 439 exp.AnyValue: rename_func("FIRST"), 440 exp.ApproxDistinct: approx_count_distinct_sql, 441 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 442 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 443 exp.ArrayConcat: rename_func("CONCAT"), 444 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 445 exp.ArraySize: rename_func("SIZE"), 446 exp.ArraySort: _array_sort_sql, 447 exp.With: no_recursive_cte_sql, 448 exp.DateAdd: _add_date_sql, 449 exp.DateDiff: _date_diff_sql, 450 exp.DateStrToDate: rename_func("TO_DATE"), 451 exp.DateSub: _add_date_sql, 452 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 453 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 454 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 455 exp.FromBase64: rename_func("UNBASE64"), 456 exp.If: if_sql(), 457 exp.ILike: no_ilike_sql, 458 exp.IsNan: rename_func("ISNAN"), 459 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 460 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 461 exp.JSONFormat: _json_format_sql, 462 exp.Left: left_to_substring_sql, 463 exp.Map: var_map_sql, 464 exp.Max: max_or_greatest, 465 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 466 exp.Min: min_or_least, 467 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 468 exp.NotNullColumnConstraint: lambda self, e: "" 469 if e.args.get("allow_null") 470 else "NOT NULL", 471 exp.VarMap: var_map_sql, 472 exp.Create: _create_sql, 473 exp.Quantile: rename_func("PERCENTILE"), 474 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 475 exp.RegexpExtract: regexp_extract_sql, 476 exp.RegexpReplace: regexp_replace_sql, 477 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 478 exp.RegexpSplit: rename_func("SPLIT"), 479 exp.Right: right_to_substring_sql, 480 exp.SafeDivide: no_safe_divide_sql, 481 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 482 exp.SetAgg: rename_func("COLLECT_SET"), 483 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 484 exp.StrPosition: strposition_to_locate_sql, 485 exp.StrToDate: _str_to_date_sql, 486 exp.StrToTime: _str_to_time_sql, 487 exp.StrToUnix: _str_to_unix_sql, 488 exp.StructExtract: struct_extract_sql, 489 exp.TimeStrToDate: rename_func("TO_DATE"), 490 exp.TimeStrToTime: timestrtotime_sql, 491 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 492 exp.TimeToStr: _time_to_str, 493 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 494 exp.ToBase64: rename_func("BASE64"), 495 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 496 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 497 exp.TsOrDsToDate: _to_date_sql, 498 exp.TryCast: no_trycast_sql, 499 exp.UnixToStr: lambda self, e: self.func( 500 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 501 ), 502 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 503 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 504 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 505 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 506 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 507 exp.LastDateOfMonth: rename_func("LAST_DAY"), 508 exp.National: lambda self, e: self.national_sql(e, prefix=""), 509 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 510 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 511 exp.NotForReplicationColumnConstraint: lambda self, e: "", 512 exp.OnProperty: lambda self, e: "", 513 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 514 } 515 516 PROPERTIES_LOCATION = { 517 **generator.Generator.PROPERTIES_LOCATION, 518 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 519 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 520 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 521 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 522 } 523 524 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 525 # Hive has no temporary storage provider (there are hive settings though) 526 return expression 527 528 def parameter_sql(self, expression: exp.Parameter) -> str: 529 this = self.sql(expression, "this") 530 expression_sql = self.sql(expression, "expression") 531 532 parent = expression.parent 533 this = f"{this}:{expression_sql}" if expression_sql else this 534 535 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 536 # We need to produce SET key = value instead of SET ${key} = value 537 return this 538 539 return f"${{{this}}}" 540 541 def schema_sql(self, expression: exp.Schema) -> str: 542 for ordered in expression.find_all(exp.Ordered): 543 if ordered.args.get("desc") is False: 544 ordered.set("desc", None) 545 546 return super().schema_sql(expression) 547 548 def constraint_sql(self, expression: exp.Constraint) -> str: 549 for prop in list(expression.find_all(exp.Properties)): 550 prop.pop() 551 552 this = self.sql(expression, "this") 553 expressions = self.expressions(expression, sep=" ", flat=True) 554 return f"CONSTRAINT {this} {expressions}" 555 556 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 557 serde_props = self.sql(expression, "serde_properties") 558 serde_props = f" {serde_props}" if serde_props else "" 559 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 560 561 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 562 return self.func( 563 "COLLECT_LIST", 564 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 565 ) 566 567 def with_properties(self, properties: exp.Properties) -> str: 568 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 569 570 def datatype_sql(self, expression: exp.DataType) -> str: 571 if ( 572 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 573 and not expression.expressions 574 ): 575 expression = exp.DataType.build("text") 576 elif expression.this in exp.DataType.TEMPORAL_TYPES: 577 expression = exp.DataType.build(expression.this) 578 elif expression.is_type("float"): 579 size_expression = expression.find(exp.DataTypeParam) 580 if size_expression: 581 size = int(size_expression.name) 582 expression = ( 583 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 584 ) 585 586 return super().datatype_sql(expression) 587 588 def version_sql(self, expression: exp.Version) -> str: 589 sql = super().version_sql(expression) 590 return sql.replace("FOR ", "", 1)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- TYPED_DIVISION
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
226 class Tokenizer(tokens.Tokenizer): 227 QUOTES = ["'", '"'] 228 IDENTIFIERS = ["`"] 229 STRING_ESCAPES = ["\\"] 230 ENCODE = "utf-8" 231 232 SINGLE_TOKENS = { 233 **tokens.Tokenizer.SINGLE_TOKENS, 234 "$": TokenType.PARAMETER, 235 } 236 237 KEYWORDS = { 238 **tokens.Tokenizer.KEYWORDS, 239 "ADD ARCHIVE": TokenType.COMMAND, 240 "ADD ARCHIVES": TokenType.COMMAND, 241 "ADD FILE": TokenType.COMMAND, 242 "ADD FILES": TokenType.COMMAND, 243 "ADD JAR": TokenType.COMMAND, 244 "ADD JARS": TokenType.COMMAND, 245 "MSCK REPAIR": TokenType.COMMAND, 246 "REFRESH": TokenType.REFRESH, 247 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 248 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 249 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 250 } 251 252 NUMERIC_LITERALS = { 253 "L": "BIGINT", 254 "S": "SMALLINT", 255 "Y": "TINYINT", 256 "D": "DOUBLE", 257 "F": "FLOAT", 258 "BD": "DECIMAL", 259 }
SINGLE_TOKENS =
{'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '#': <TokenType.HASH: 'HASH'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
261 class Parser(parser.Parser): 262 LOG_DEFAULTS_TO_LN = True 263 STRICT_CAST = False 264 265 FUNCTIONS = { 266 **parser.Parser.FUNCTIONS, 267 "BASE64": exp.ToBase64.from_arg_list, 268 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 269 "COLLECT_SET": exp.SetAgg.from_arg_list, 270 "DATE_ADD": lambda args: exp.TsOrDsAdd( 271 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 272 ), 273 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 274 [ 275 exp.TimeStrToTime(this=seq_get(args, 0)), 276 seq_get(args, 1), 277 ] 278 ), 279 "DATE_SUB": lambda args: exp.TsOrDsAdd( 280 this=seq_get(args, 0), 281 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 282 unit=exp.Literal.string("DAY"), 283 ), 284 "DATEDIFF": lambda args: exp.DateDiff( 285 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 286 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 287 ), 288 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 289 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 290 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 291 "LOCATE": locate_to_strposition, 292 "MAP": parse_var_map, 293 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 294 "PERCENTILE": exp.Quantile.from_arg_list, 295 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 296 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 297 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 298 ), 299 "SIZE": exp.ArraySize.from_arg_list, 300 "SPLIT": exp.RegexpSplit.from_arg_list, 301 "STR_TO_MAP": lambda args: exp.StrToMap( 302 this=seq_get(args, 0), 303 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 304 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 305 ), 306 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 307 "TO_JSON": exp.JSONFormat.from_arg_list, 308 "UNBASE64": exp.FromBase64.from_arg_list, 309 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 310 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 311 } 312 313 NO_PAREN_FUNCTION_PARSERS = { 314 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 315 "TRANSFORM": lambda self: self._parse_transform(), 316 } 317 318 PROPERTY_PARSERS = { 319 **parser.Parser.PROPERTY_PARSERS, 320 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 321 expressions=self._parse_wrapped_csv(self._parse_property) 322 ), 323 } 324 325 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 326 if not self._match(TokenType.L_PAREN, advance=False): 327 self._retreat(self._index - 1) 328 return None 329 330 args = self._parse_wrapped_csv(self._parse_lambda) 331 row_format_before = self._parse_row_format(match_row=True) 332 333 record_writer = None 334 if self._match_text_seq("RECORDWRITER"): 335 record_writer = self._parse_string() 336 337 if not self._match(TokenType.USING): 338 return exp.Transform.from_arg_list(args) 339 340 command_script = self._parse_string() 341 342 self._match(TokenType.ALIAS) 343 schema = self._parse_schema() 344 345 row_format_after = self._parse_row_format(match_row=True) 346 record_reader = None 347 if self._match_text_seq("RECORDREADER"): 348 record_reader = self._parse_string() 349 350 return self.expression( 351 exp.QueryTransform, 352 expressions=args, 353 command_script=command_script, 354 schema=schema, 355 row_format_before=row_format_before, 356 record_writer=record_writer, 357 row_format_after=row_format_after, 358 record_reader=record_reader, 359 ) 360 361 def _parse_types( 362 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 363 ) -> t.Optional[exp.Expression]: 364 """ 365 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 366 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 367 368 spark-sql (default)> select cast(1234 as varchar(2)); 369 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 370 char/varchar type and simply treats them as string type. Please use string type 371 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 372 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 373 374 1234 375 Time taken: 4.265 seconds, Fetched 1 row(s) 376 377 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 378 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 379 380 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 381 """ 382 this = super()._parse_types( 383 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 384 ) 385 386 if this and not schema: 387 return this.transform( 388 lambda node: node.replace(exp.DataType.build("text")) 389 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 390 else node, 391 copy=False, 392 ) 393 394 return this 395 396 def _parse_partition_and_order( 397 self, 398 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 399 return ( 400 self._parse_csv(self._parse_conjunction) 401 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 402 else [], 403 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 404 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NULLIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nullif'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'SYSTEM_VERSIONING': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- CLONE_KINDS
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- TYPED_DIVISION
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
406 class Generator(generator.Generator): 407 LIMIT_FETCH = "LIMIT" 408 TABLESAMPLE_WITH_METHOD = False 409 TABLESAMPLE_SIZE_IS_PERCENT = True 410 JOIN_HINTS = False 411 TABLE_HINTS = False 412 QUERY_HINTS = False 413 INDEX_ON = "ON TABLE" 414 EXTRACT_ALLOWS_QUOTES = False 415 NVL2_SUPPORTED = False 416 SUPPORTS_NESTED_CTES = False 417 418 TYPE_MAPPING = { 419 **generator.Generator.TYPE_MAPPING, 420 exp.DataType.Type.BIT: "BOOLEAN", 421 exp.DataType.Type.DATETIME: "TIMESTAMP", 422 exp.DataType.Type.TEXT: "STRING", 423 exp.DataType.Type.TIME: "TIMESTAMP", 424 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 425 exp.DataType.Type.VARBINARY: "BINARY", 426 } 427 428 TRANSFORMS = { 429 **generator.Generator.TRANSFORMS, 430 exp.Group: transforms.preprocess([transforms.unalias_group]), 431 exp.Select: transforms.preprocess( 432 [ 433 transforms.eliminate_qualify, 434 transforms.eliminate_distinct_on, 435 transforms.unnest_to_explode, 436 ] 437 ), 438 exp.Property: _property_sql, 439 exp.AnyValue: rename_func("FIRST"), 440 exp.ApproxDistinct: approx_count_distinct_sql, 441 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 442 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 443 exp.ArrayConcat: rename_func("CONCAT"), 444 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 445 exp.ArraySize: rename_func("SIZE"), 446 exp.ArraySort: _array_sort_sql, 447 exp.With: no_recursive_cte_sql, 448 exp.DateAdd: _add_date_sql, 449 exp.DateDiff: _date_diff_sql, 450 exp.DateStrToDate: rename_func("TO_DATE"), 451 exp.DateSub: _add_date_sql, 452 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 453 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 454 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 455 exp.FromBase64: rename_func("UNBASE64"), 456 exp.If: if_sql(), 457 exp.ILike: no_ilike_sql, 458 exp.IsNan: rename_func("ISNAN"), 459 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 460 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 461 exp.JSONFormat: _json_format_sql, 462 exp.Left: left_to_substring_sql, 463 exp.Map: var_map_sql, 464 exp.Max: max_or_greatest, 465 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 466 exp.Min: min_or_least, 467 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 468 exp.NotNullColumnConstraint: lambda self, e: "" 469 if e.args.get("allow_null") 470 else "NOT NULL", 471 exp.VarMap: var_map_sql, 472 exp.Create: _create_sql, 473 exp.Quantile: rename_func("PERCENTILE"), 474 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 475 exp.RegexpExtract: regexp_extract_sql, 476 exp.RegexpReplace: regexp_replace_sql, 477 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 478 exp.RegexpSplit: rename_func("SPLIT"), 479 exp.Right: right_to_substring_sql, 480 exp.SafeDivide: no_safe_divide_sql, 481 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 482 exp.SetAgg: rename_func("COLLECT_SET"), 483 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 484 exp.StrPosition: strposition_to_locate_sql, 485 exp.StrToDate: _str_to_date_sql, 486 exp.StrToTime: _str_to_time_sql, 487 exp.StrToUnix: _str_to_unix_sql, 488 exp.StructExtract: struct_extract_sql, 489 exp.TimeStrToDate: rename_func("TO_DATE"), 490 exp.TimeStrToTime: timestrtotime_sql, 491 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 492 exp.TimeToStr: _time_to_str, 493 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 494 exp.ToBase64: rename_func("BASE64"), 495 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 496 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 497 exp.TsOrDsToDate: _to_date_sql, 498 exp.TryCast: no_trycast_sql, 499 exp.UnixToStr: lambda self, e: self.func( 500 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 501 ), 502 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 503 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 504 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 505 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 506 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 507 exp.LastDateOfMonth: rename_func("LAST_DAY"), 508 exp.National: lambda self, e: self.national_sql(e, prefix=""), 509 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 510 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 511 exp.NotForReplicationColumnConstraint: lambda self, e: "", 512 exp.OnProperty: lambda self, e: "", 513 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 514 } 515 516 PROPERTIES_LOCATION = { 517 **generator.Generator.PROPERTIES_LOCATION, 518 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 519 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 520 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 521 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 522 } 523 524 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 525 # Hive has no temporary storage provider (there are hive settings though) 526 return expression 527 528 def parameter_sql(self, expression: exp.Parameter) -> str: 529 this = self.sql(expression, "this") 530 expression_sql = self.sql(expression, "expression") 531 532 parent = expression.parent 533 this = f"{this}:{expression_sql}" if expression_sql else this 534 535 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 536 # We need to produce SET key = value instead of SET ${key} = value 537 return this 538 539 return f"${{{this}}}" 540 541 def schema_sql(self, expression: exp.Schema) -> str: 542 for ordered in expression.find_all(exp.Ordered): 543 if ordered.args.get("desc") is False: 544 ordered.set("desc", None) 545 546 return super().schema_sql(expression) 547 548 def constraint_sql(self, expression: exp.Constraint) -> str: 549 for prop in list(expression.find_all(exp.Properties)): 550 prop.pop() 551 552 this = self.sql(expression, "this") 553 expressions = self.expressions(expression, sep=" ", flat=True) 554 return f"CONSTRAINT {this} {expressions}" 555 556 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 557 serde_props = self.sql(expression, "serde_properties") 558 serde_props = f" {serde_props}" if serde_props else "" 559 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 560 561 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 562 return self.func( 563 "COLLECT_LIST", 564 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 565 ) 566 567 def with_properties(self, properties: exp.Properties) -> str: 568 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 569 570 def datatype_sql(self, expression: exp.DataType) -> str: 571 if ( 572 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 573 and not expression.expressions 574 ): 575 expression = exp.DataType.build("text") 576 elif expression.this in exp.DataType.TEMPORAL_TYPES: 577 expression = exp.DataType.build(expression.this) 578 elif expression.is_type("float"): 579 size_expression = expression.find(exp.DataTypeParam) 580 if size_expression: 581 size = int(size_expression.name) 582 expression = ( 583 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 584 ) 585 586 return super().datatype_sql(expression) 587 588 def version_sql(self, expression: exp.Version) -> str: 589 sql = super().version_sql(expression) 590 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function _create_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedOfProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.WithSystemVersioningProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>}
def
temporary_storage_provider( self, expression: sqlglot.expressions.Create) -> sqlglot.expressions.Create:
528 def parameter_sql(self, expression: exp.Parameter) -> str: 529 this = self.sql(expression, "this") 530 expression_sql = self.sql(expression, "expression") 531 532 parent = expression.parent 533 this = f"{this}:{expression_sql}" if expression_sql else this 534 535 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 536 # We need to produce SET key = value instead of SET ${key} = value 537 return this 538 539 return f"${{{this}}}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
570 def datatype_sql(self, expression: exp.DataType) -> str: 571 if ( 572 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 573 and not expression.expressions 574 ): 575 expression = exp.DataType.build("text") 576 elif expression.this in exp.DataType.TEMPORAL_TYPES: 577 expression = exp.DataType.build(expression.this) 578 elif expression.is_type("float"): 579 size_expression = expression.find(exp.DataTypeParam) 580 if size_expression: 581 size = int(size_expression.name) 582 expression = ( 583 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 584 ) 585 586 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
288 @classmethod 289 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 290 """Checks if text can be identified given an identify option. 291 292 Args: 293 text: The text to check. 294 identify: 295 "always" or `True`: Always returns true. 296 "safe": True if the identifier is case-insensitive. 297 298 Returns: 299 Whether or not the given text can be identified. 300 """ 301 if identify is True or identify == "always": 302 return True 303 304 if identify == "safe": 305 return not cls.case_sensitive(text) 306 307 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
TOKENIZER_CLASS =
<class 'Hive.Tokenizer'>
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOG_BASE_FIRST
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- SUPPORTS_PARAMETERS
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- CTE_RECURSIVE_KEYWORD_REQUIRED
- TYPED_DIVISION
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- KEY_VALUE_DEFINITONS
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql