sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 arg_max_or_min_no_count, 10 create_with_partitions_sql, 11 format_time_lambda, 12 if_sql, 13 is_parse_json, 14 left_to_substring_sql, 15 locate_to_strposition, 16 max_or_greatest, 17 min_or_least, 18 no_ilike_sql, 19 no_recursive_cte_sql, 20 no_safe_divide_sql, 21 no_trycast_sql, 22 regexp_extract_sql, 23 regexp_replace_sql, 24 rename_func, 25 right_to_substring_sql, 26 strposition_to_locate_sql, 27 struct_extract_sql, 28 time_format, 29 timestrtotime_sql, 30 var_map_sql, 31) 32from sqlglot.helper import seq_get 33from sqlglot.parser import parse_var_map 34from sqlglot.tokens import TokenType 35 36# (FuncType, Multiplier) 37DATE_DELTA_INTERVAL = { 38 "YEAR": ("ADD_MONTHS", 12), 39 "MONTH": ("ADD_MONTHS", 1), 40 "QUARTER": ("ADD_MONTHS", 3), 41 "WEEK": ("DATE_ADD", 7), 42 "DAY": ("DATE_ADD", 1), 43} 44 45TIME_DIFF_FACTOR = { 46 "MILLISECOND": " * 1000", 47 "SECOND": "", 48 "MINUTE": " / 60", 49 "HOUR": " / 3600", 50} 51 52DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 53 54 55def _create_sql(self, expression: exp.Create) -> str: 56 # remove UNIQUE column constraints 57 for constraint in expression.find_all(exp.UniqueColumnConstraint): 58 if constraint.parent: 59 constraint.parent.pop() 60 61 properties = expression.args.get("properties") 62 temporary = any( 63 isinstance(prop, exp.TemporaryProperty) 64 for prop in (properties.expressions if properties else []) 65 ) 66 67 # CTAS with temp tables map to CREATE TEMPORARY VIEW 68 kind = expression.args["kind"] 69 if kind.upper() == "TABLE" and temporary: 70 if expression.expression: 71 return f"CREATE TEMPORARY VIEW {self.sql(expression, 'this')} AS {self.sql(expression, 'expression')}" 72 else: 73 # CREATE TEMPORARY TABLE may require storage provider 74 expression = self.temporary_storage_provider(expression) 75 76 return create_with_partitions_sql(self, expression) 77 78 79def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 80 unit = expression.text("unit").upper() 81 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 82 83 if isinstance(expression, exp.DateSub): 84 multiplier *= -1 85 86 if expression.expression.is_number: 87 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 88 else: 89 modified_increment = expression.expression 90 if multiplier != 1: 91 modified_increment = exp.Mul( # type: ignore 92 this=modified_increment, expression=exp.Literal.number(multiplier) 93 ) 94 95 return self.func(func, expression.this, modified_increment) 96 97 98def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str: 99 unit = expression.text("unit").upper() 100 101 factor = TIME_DIFF_FACTOR.get(unit) 102 if factor is not None: 103 left = self.sql(expression, "this") 104 right = self.sql(expression, "expression") 105 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 106 return f"({sec_diff}){factor}" if factor else sec_diff 107 108 months_between = unit in DIFF_MONTH_SWITCH 109 sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF" 110 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 111 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 112 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 113 114 if months_between: 115 # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part 116 diff_sql = f"CAST({diff_sql} AS INT)" 117 118 return f"{diff_sql}{multiplier_sql}" 119 120 121def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 122 this = expression.this 123 if is_parse_json(this) and this.this.is_string: 124 # Since FROM_JSON requires a nested type, we always wrap the json string with 125 # an array to ensure that "naked" strings like "'a'" will be handled correctly 126 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 127 128 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 129 to_json = self.func("TO_JSON", from_json) 130 131 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 132 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 133 134 return self.func("TO_JSON", this, expression.args.get("options")) 135 136 137def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 138 if expression.expression: 139 self.unsupported("Hive SORT_ARRAY does not support a comparator") 140 return f"SORT_ARRAY({self.sql(expression, 'this')})" 141 142 143def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 144 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 145 146 147def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 148 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 149 150 151def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 152 this = self.sql(expression, "this") 153 time_format = self.format_time(expression) 154 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 155 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 156 return f"CAST({this} AS DATE)" 157 158 159def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 160 this = self.sql(expression, "this") 161 time_format = self.format_time(expression) 162 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 163 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 164 return f"CAST({this} AS TIMESTAMP)" 165 166 167def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 168 this = self.sql(expression, "this") 169 time_format = self.format_time(expression) 170 return f"DATE_FORMAT({this}, {time_format})" 171 172 173def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 174 this = self.sql(expression, "this") 175 time_format = self.format_time(expression) 176 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 177 return f"TO_DATE({this}, {time_format})" 178 return f"TO_DATE({this})" 179 180 181class Hive(Dialect): 182 ALIAS_POST_TABLESAMPLE = True 183 IDENTIFIERS_CAN_START_WITH_DIGIT = True 184 SUPPORTS_USER_DEFINED_TYPES = False 185 186 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 187 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 188 189 TIME_MAPPING = { 190 "y": "%Y", 191 "Y": "%Y", 192 "YYYY": "%Y", 193 "yyyy": "%Y", 194 "YY": "%y", 195 "yy": "%y", 196 "MMMM": "%B", 197 "MMM": "%b", 198 "MM": "%m", 199 "M": "%-m", 200 "dd": "%d", 201 "d": "%-d", 202 "HH": "%H", 203 "H": "%-H", 204 "hh": "%I", 205 "h": "%-I", 206 "mm": "%M", 207 "m": "%-M", 208 "ss": "%S", 209 "s": "%-S", 210 "SSSSSS": "%f", 211 "a": "%p", 212 "DD": "%j", 213 "D": "%-j", 214 "E": "%a", 215 "EE": "%a", 216 "EEE": "%a", 217 "EEEE": "%A", 218 } 219 220 DATE_FORMAT = "'yyyy-MM-dd'" 221 DATEINT_FORMAT = "'yyyyMMdd'" 222 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 223 224 class Tokenizer(tokens.Tokenizer): 225 QUOTES = ["'", '"'] 226 IDENTIFIERS = ["`"] 227 STRING_ESCAPES = ["\\"] 228 ENCODE = "utf-8" 229 230 SINGLE_TOKENS = { 231 **tokens.Tokenizer.SINGLE_TOKENS, 232 "$": TokenType.PARAMETER, 233 } 234 235 KEYWORDS = { 236 **tokens.Tokenizer.KEYWORDS, 237 "ADD ARCHIVE": TokenType.COMMAND, 238 "ADD ARCHIVES": TokenType.COMMAND, 239 "ADD FILE": TokenType.COMMAND, 240 "ADD FILES": TokenType.COMMAND, 241 "ADD JAR": TokenType.COMMAND, 242 "ADD JARS": TokenType.COMMAND, 243 "MSCK REPAIR": TokenType.COMMAND, 244 "REFRESH": TokenType.COMMAND, 245 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 246 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 247 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 248 } 249 250 NUMERIC_LITERALS = { 251 "L": "BIGINT", 252 "S": "SMALLINT", 253 "Y": "TINYINT", 254 "D": "DOUBLE", 255 "F": "FLOAT", 256 "BD": "DECIMAL", 257 } 258 259 class Parser(parser.Parser): 260 LOG_DEFAULTS_TO_LN = True 261 STRICT_CAST = False 262 263 FUNCTIONS = { 264 **parser.Parser.FUNCTIONS, 265 "BASE64": exp.ToBase64.from_arg_list, 266 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 267 "COLLECT_SET": exp.SetAgg.from_arg_list, 268 "DATE_ADD": lambda args: exp.TsOrDsAdd( 269 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 270 ), 271 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 272 [ 273 exp.TimeStrToTime(this=seq_get(args, 0)), 274 seq_get(args, 1), 275 ] 276 ), 277 "DATE_SUB": lambda args: exp.TsOrDsAdd( 278 this=seq_get(args, 0), 279 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 280 unit=exp.Literal.string("DAY"), 281 ), 282 "DATEDIFF": lambda args: exp.DateDiff( 283 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 284 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 285 ), 286 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 287 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 288 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 289 "LOCATE": locate_to_strposition, 290 "MAP": parse_var_map, 291 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 292 "PERCENTILE": exp.Quantile.from_arg_list, 293 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 294 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 295 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 296 ), 297 "SIZE": exp.ArraySize.from_arg_list, 298 "SPLIT": exp.RegexpSplit.from_arg_list, 299 "STR_TO_MAP": lambda args: exp.StrToMap( 300 this=seq_get(args, 0), 301 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 302 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 303 ), 304 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 305 "TO_JSON": exp.JSONFormat.from_arg_list, 306 "UNBASE64": exp.FromBase64.from_arg_list, 307 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 308 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 309 } 310 311 NO_PAREN_FUNCTION_PARSERS = { 312 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 313 "TRANSFORM": lambda self: self._parse_transform(), 314 } 315 316 PROPERTY_PARSERS = { 317 **parser.Parser.PROPERTY_PARSERS, 318 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 319 expressions=self._parse_wrapped_csv(self._parse_property) 320 ), 321 } 322 323 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 324 if not self._match(TokenType.L_PAREN, advance=False): 325 self._retreat(self._index - 1) 326 return None 327 328 args = self._parse_wrapped_csv(self._parse_lambda) 329 row_format_before = self._parse_row_format(match_row=True) 330 331 record_writer = None 332 if self._match_text_seq("RECORDWRITER"): 333 record_writer = self._parse_string() 334 335 if not self._match(TokenType.USING): 336 return exp.Transform.from_arg_list(args) 337 338 command_script = self._parse_string() 339 340 self._match(TokenType.ALIAS) 341 schema = self._parse_schema() 342 343 row_format_after = self._parse_row_format(match_row=True) 344 record_reader = None 345 if self._match_text_seq("RECORDREADER"): 346 record_reader = self._parse_string() 347 348 return self.expression( 349 exp.QueryTransform, 350 expressions=args, 351 command_script=command_script, 352 schema=schema, 353 row_format_before=row_format_before, 354 record_writer=record_writer, 355 row_format_after=row_format_after, 356 record_reader=record_reader, 357 ) 358 359 def _parse_types( 360 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 361 ) -> t.Optional[exp.Expression]: 362 """ 363 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 364 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 365 366 spark-sql (default)> select cast(1234 as varchar(2)); 367 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 368 char/varchar type and simply treats them as string type. Please use string type 369 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 370 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 371 372 1234 373 Time taken: 4.265 seconds, Fetched 1 row(s) 374 375 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 376 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 377 378 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 379 """ 380 this = super()._parse_types( 381 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 382 ) 383 384 if this and not schema: 385 return this.transform( 386 lambda node: node.replace(exp.DataType.build("text")) 387 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 388 else node, 389 copy=False, 390 ) 391 392 return this 393 394 def _parse_partition_and_order( 395 self, 396 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 397 return ( 398 self._parse_csv(self._parse_conjunction) 399 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 400 else [], 401 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 402 ) 403 404 class Generator(generator.Generator): 405 LIMIT_FETCH = "LIMIT" 406 TABLESAMPLE_WITH_METHOD = False 407 TABLESAMPLE_SIZE_IS_PERCENT = True 408 JOIN_HINTS = False 409 TABLE_HINTS = False 410 QUERY_HINTS = False 411 INDEX_ON = "ON TABLE" 412 EXTRACT_ALLOWS_QUOTES = False 413 NVL2_SUPPORTED = False 414 SUPPORTS_NESTED_CTES = False 415 416 TYPE_MAPPING = { 417 **generator.Generator.TYPE_MAPPING, 418 exp.DataType.Type.BIT: "BOOLEAN", 419 exp.DataType.Type.DATETIME: "TIMESTAMP", 420 exp.DataType.Type.TEXT: "STRING", 421 exp.DataType.Type.TIME: "TIMESTAMP", 422 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 423 exp.DataType.Type.VARBINARY: "BINARY", 424 } 425 426 TRANSFORMS = { 427 **generator.Generator.TRANSFORMS, 428 exp.Group: transforms.preprocess([transforms.unalias_group]), 429 exp.Select: transforms.preprocess( 430 [ 431 transforms.eliminate_qualify, 432 transforms.eliminate_distinct_on, 433 transforms.unnest_to_explode, 434 ] 435 ), 436 exp.Property: _property_sql, 437 exp.AnyValue: rename_func("FIRST"), 438 exp.ApproxDistinct: approx_count_distinct_sql, 439 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 440 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 441 exp.ArrayConcat: rename_func("CONCAT"), 442 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 443 exp.ArraySize: rename_func("SIZE"), 444 exp.ArraySort: _array_sort_sql, 445 exp.With: no_recursive_cte_sql, 446 exp.DateAdd: _add_date_sql, 447 exp.DateDiff: _date_diff_sql, 448 exp.DateStrToDate: rename_func("TO_DATE"), 449 exp.DateSub: _add_date_sql, 450 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 451 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 452 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 453 exp.FromBase64: rename_func("UNBASE64"), 454 exp.If: if_sql(), 455 exp.ILike: no_ilike_sql, 456 exp.IsNan: rename_func("ISNAN"), 457 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 458 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 459 exp.JSONFormat: _json_format_sql, 460 exp.Left: left_to_substring_sql, 461 exp.Map: var_map_sql, 462 exp.Max: max_or_greatest, 463 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 464 exp.Min: min_or_least, 465 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 466 exp.NotNullColumnConstraint: lambda self, e: "" 467 if e.args.get("allow_null") 468 else "NOT NULL", 469 exp.VarMap: var_map_sql, 470 exp.Create: _create_sql, 471 exp.Quantile: rename_func("PERCENTILE"), 472 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 473 exp.RegexpExtract: regexp_extract_sql, 474 exp.RegexpReplace: regexp_replace_sql, 475 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 476 exp.RegexpSplit: rename_func("SPLIT"), 477 exp.Right: right_to_substring_sql, 478 exp.SafeDivide: no_safe_divide_sql, 479 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 480 exp.SetAgg: rename_func("COLLECT_SET"), 481 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 482 exp.StrPosition: strposition_to_locate_sql, 483 exp.StrToDate: _str_to_date_sql, 484 exp.StrToTime: _str_to_time_sql, 485 exp.StrToUnix: _str_to_unix_sql, 486 exp.StructExtract: struct_extract_sql, 487 exp.TimeStrToDate: rename_func("TO_DATE"), 488 exp.TimeStrToTime: timestrtotime_sql, 489 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 490 exp.TimeToStr: _time_to_str, 491 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 492 exp.ToBase64: rename_func("BASE64"), 493 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 494 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 495 exp.TsOrDsToDate: _to_date_sql, 496 exp.TryCast: no_trycast_sql, 497 exp.UnixToStr: lambda self, e: self.func( 498 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 499 ), 500 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 501 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 502 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 503 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 504 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 505 exp.LastDateOfMonth: rename_func("LAST_DAY"), 506 exp.National: lambda self, e: self.national_sql(e, prefix=""), 507 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 508 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 509 exp.NotForReplicationColumnConstraint: lambda self, e: "", 510 exp.OnProperty: lambda self, e: "", 511 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 512 } 513 514 PROPERTIES_LOCATION = { 515 **generator.Generator.PROPERTIES_LOCATION, 516 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 517 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 518 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 519 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 520 } 521 522 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 523 # Hive has no temporary storage provider (there are hive settings though) 524 return expression 525 526 def parameter_sql(self, expression: exp.Parameter) -> str: 527 this = self.sql(expression, "this") 528 expression_sql = self.sql(expression, "expression") 529 530 parent = expression.parent 531 this = f"{this}:{expression_sql}" if expression_sql else this 532 533 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 534 # We need to produce SET key = value instead of SET ${key} = value 535 return this 536 537 return f"${{{this}}}" 538 539 def schema_sql(self, expression: exp.Schema) -> str: 540 for ordered in expression.find_all(exp.Ordered): 541 if ordered.args.get("desc") is False: 542 ordered.set("desc", None) 543 544 return super().schema_sql(expression) 545 546 def constraint_sql(self, expression: exp.Constraint) -> str: 547 for prop in list(expression.find_all(exp.Properties)): 548 prop.pop() 549 550 this = self.sql(expression, "this") 551 expressions = self.expressions(expression, sep=" ", flat=True) 552 return f"CONSTRAINT {this} {expressions}" 553 554 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 555 serde_props = self.sql(expression, "serde_properties") 556 serde_props = f" {serde_props}" if serde_props else "" 557 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 558 559 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 560 return self.func( 561 "COLLECT_LIST", 562 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 563 ) 564 565 def with_properties(self, properties: exp.Properties) -> str: 566 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 567 568 def datatype_sql(self, expression: exp.DataType) -> str: 569 if ( 570 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 571 and not expression.expressions 572 ): 573 expression = exp.DataType.build("text") 574 elif expression.this in exp.DataType.TEMPORAL_TYPES: 575 expression = exp.DataType.build(expression.this) 576 elif expression.is_type("float"): 577 size_expression = expression.find(exp.DataTypeParam) 578 if size_expression: 579 size = int(size_expression.name) 580 expression = ( 581 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 582 ) 583 584 return super().datatype_sql(expression) 585 586 def version_sql(self, expression: exp.Version) -> str: 587 sql = super().version_sql(expression) 588 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
182class Hive(Dialect): 183 ALIAS_POST_TABLESAMPLE = True 184 IDENTIFIERS_CAN_START_WITH_DIGIT = True 185 SUPPORTS_USER_DEFINED_TYPES = False 186 187 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 188 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 189 190 TIME_MAPPING = { 191 "y": "%Y", 192 "Y": "%Y", 193 "YYYY": "%Y", 194 "yyyy": "%Y", 195 "YY": "%y", 196 "yy": "%y", 197 "MMMM": "%B", 198 "MMM": "%b", 199 "MM": "%m", 200 "M": "%-m", 201 "dd": "%d", 202 "d": "%-d", 203 "HH": "%H", 204 "H": "%-H", 205 "hh": "%I", 206 "h": "%-I", 207 "mm": "%M", 208 "m": "%-M", 209 "ss": "%S", 210 "s": "%-S", 211 "SSSSSS": "%f", 212 "a": "%p", 213 "DD": "%j", 214 "D": "%-j", 215 "E": "%a", 216 "EE": "%a", 217 "EEE": "%a", 218 "EEEE": "%A", 219 } 220 221 DATE_FORMAT = "'yyyy-MM-dd'" 222 DATEINT_FORMAT = "'yyyyMMdd'" 223 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 224 225 class Tokenizer(tokens.Tokenizer): 226 QUOTES = ["'", '"'] 227 IDENTIFIERS = ["`"] 228 STRING_ESCAPES = ["\\"] 229 ENCODE = "utf-8" 230 231 SINGLE_TOKENS = { 232 **tokens.Tokenizer.SINGLE_TOKENS, 233 "$": TokenType.PARAMETER, 234 } 235 236 KEYWORDS = { 237 **tokens.Tokenizer.KEYWORDS, 238 "ADD ARCHIVE": TokenType.COMMAND, 239 "ADD ARCHIVES": TokenType.COMMAND, 240 "ADD FILE": TokenType.COMMAND, 241 "ADD FILES": TokenType.COMMAND, 242 "ADD JAR": TokenType.COMMAND, 243 "ADD JARS": TokenType.COMMAND, 244 "MSCK REPAIR": TokenType.COMMAND, 245 "REFRESH": TokenType.COMMAND, 246 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 247 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 248 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 249 } 250 251 NUMERIC_LITERALS = { 252 "L": "BIGINT", 253 "S": "SMALLINT", 254 "Y": "TINYINT", 255 "D": "DOUBLE", 256 "F": "FLOAT", 257 "BD": "DECIMAL", 258 } 259 260 class Parser(parser.Parser): 261 LOG_DEFAULTS_TO_LN = True 262 STRICT_CAST = False 263 264 FUNCTIONS = { 265 **parser.Parser.FUNCTIONS, 266 "BASE64": exp.ToBase64.from_arg_list, 267 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 268 "COLLECT_SET": exp.SetAgg.from_arg_list, 269 "DATE_ADD": lambda args: exp.TsOrDsAdd( 270 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 271 ), 272 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 273 [ 274 exp.TimeStrToTime(this=seq_get(args, 0)), 275 seq_get(args, 1), 276 ] 277 ), 278 "DATE_SUB": lambda args: exp.TsOrDsAdd( 279 this=seq_get(args, 0), 280 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 281 unit=exp.Literal.string("DAY"), 282 ), 283 "DATEDIFF": lambda args: exp.DateDiff( 284 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 285 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 286 ), 287 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 288 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 289 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 290 "LOCATE": locate_to_strposition, 291 "MAP": parse_var_map, 292 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 293 "PERCENTILE": exp.Quantile.from_arg_list, 294 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 295 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 296 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 297 ), 298 "SIZE": exp.ArraySize.from_arg_list, 299 "SPLIT": exp.RegexpSplit.from_arg_list, 300 "STR_TO_MAP": lambda args: exp.StrToMap( 301 this=seq_get(args, 0), 302 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 303 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 304 ), 305 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 306 "TO_JSON": exp.JSONFormat.from_arg_list, 307 "UNBASE64": exp.FromBase64.from_arg_list, 308 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 309 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 310 } 311 312 NO_PAREN_FUNCTION_PARSERS = { 313 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 314 "TRANSFORM": lambda self: self._parse_transform(), 315 } 316 317 PROPERTY_PARSERS = { 318 **parser.Parser.PROPERTY_PARSERS, 319 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 320 expressions=self._parse_wrapped_csv(self._parse_property) 321 ), 322 } 323 324 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 325 if not self._match(TokenType.L_PAREN, advance=False): 326 self._retreat(self._index - 1) 327 return None 328 329 args = self._parse_wrapped_csv(self._parse_lambda) 330 row_format_before = self._parse_row_format(match_row=True) 331 332 record_writer = None 333 if self._match_text_seq("RECORDWRITER"): 334 record_writer = self._parse_string() 335 336 if not self._match(TokenType.USING): 337 return exp.Transform.from_arg_list(args) 338 339 command_script = self._parse_string() 340 341 self._match(TokenType.ALIAS) 342 schema = self._parse_schema() 343 344 row_format_after = self._parse_row_format(match_row=True) 345 record_reader = None 346 if self._match_text_seq("RECORDREADER"): 347 record_reader = self._parse_string() 348 349 return self.expression( 350 exp.QueryTransform, 351 expressions=args, 352 command_script=command_script, 353 schema=schema, 354 row_format_before=row_format_before, 355 record_writer=record_writer, 356 row_format_after=row_format_after, 357 record_reader=record_reader, 358 ) 359 360 def _parse_types( 361 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 362 ) -> t.Optional[exp.Expression]: 363 """ 364 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 365 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 366 367 spark-sql (default)> select cast(1234 as varchar(2)); 368 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 369 char/varchar type and simply treats them as string type. Please use string type 370 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 371 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 372 373 1234 374 Time taken: 4.265 seconds, Fetched 1 row(s) 375 376 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 377 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 378 379 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 380 """ 381 this = super()._parse_types( 382 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 383 ) 384 385 if this and not schema: 386 return this.transform( 387 lambda node: node.replace(exp.DataType.build("text")) 388 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 389 else node, 390 copy=False, 391 ) 392 393 return this 394 395 def _parse_partition_and_order( 396 self, 397 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 398 return ( 399 self._parse_csv(self._parse_conjunction) 400 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 401 else [], 402 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 403 ) 404 405 class Generator(generator.Generator): 406 LIMIT_FETCH = "LIMIT" 407 TABLESAMPLE_WITH_METHOD = False 408 TABLESAMPLE_SIZE_IS_PERCENT = True 409 JOIN_HINTS = False 410 TABLE_HINTS = False 411 QUERY_HINTS = False 412 INDEX_ON = "ON TABLE" 413 EXTRACT_ALLOWS_QUOTES = False 414 NVL2_SUPPORTED = False 415 SUPPORTS_NESTED_CTES = False 416 417 TYPE_MAPPING = { 418 **generator.Generator.TYPE_MAPPING, 419 exp.DataType.Type.BIT: "BOOLEAN", 420 exp.DataType.Type.DATETIME: "TIMESTAMP", 421 exp.DataType.Type.TEXT: "STRING", 422 exp.DataType.Type.TIME: "TIMESTAMP", 423 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 424 exp.DataType.Type.VARBINARY: "BINARY", 425 } 426 427 TRANSFORMS = { 428 **generator.Generator.TRANSFORMS, 429 exp.Group: transforms.preprocess([transforms.unalias_group]), 430 exp.Select: transforms.preprocess( 431 [ 432 transforms.eliminate_qualify, 433 transforms.eliminate_distinct_on, 434 transforms.unnest_to_explode, 435 ] 436 ), 437 exp.Property: _property_sql, 438 exp.AnyValue: rename_func("FIRST"), 439 exp.ApproxDistinct: approx_count_distinct_sql, 440 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 441 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 442 exp.ArrayConcat: rename_func("CONCAT"), 443 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 444 exp.ArraySize: rename_func("SIZE"), 445 exp.ArraySort: _array_sort_sql, 446 exp.With: no_recursive_cte_sql, 447 exp.DateAdd: _add_date_sql, 448 exp.DateDiff: _date_diff_sql, 449 exp.DateStrToDate: rename_func("TO_DATE"), 450 exp.DateSub: _add_date_sql, 451 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 452 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 453 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 454 exp.FromBase64: rename_func("UNBASE64"), 455 exp.If: if_sql(), 456 exp.ILike: no_ilike_sql, 457 exp.IsNan: rename_func("ISNAN"), 458 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 459 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 460 exp.JSONFormat: _json_format_sql, 461 exp.Left: left_to_substring_sql, 462 exp.Map: var_map_sql, 463 exp.Max: max_or_greatest, 464 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 465 exp.Min: min_or_least, 466 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 467 exp.NotNullColumnConstraint: lambda self, e: "" 468 if e.args.get("allow_null") 469 else "NOT NULL", 470 exp.VarMap: var_map_sql, 471 exp.Create: _create_sql, 472 exp.Quantile: rename_func("PERCENTILE"), 473 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 474 exp.RegexpExtract: regexp_extract_sql, 475 exp.RegexpReplace: regexp_replace_sql, 476 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 477 exp.RegexpSplit: rename_func("SPLIT"), 478 exp.Right: right_to_substring_sql, 479 exp.SafeDivide: no_safe_divide_sql, 480 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 481 exp.SetAgg: rename_func("COLLECT_SET"), 482 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 483 exp.StrPosition: strposition_to_locate_sql, 484 exp.StrToDate: _str_to_date_sql, 485 exp.StrToTime: _str_to_time_sql, 486 exp.StrToUnix: _str_to_unix_sql, 487 exp.StructExtract: struct_extract_sql, 488 exp.TimeStrToDate: rename_func("TO_DATE"), 489 exp.TimeStrToTime: timestrtotime_sql, 490 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 491 exp.TimeToStr: _time_to_str, 492 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 493 exp.ToBase64: rename_func("BASE64"), 494 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 495 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 496 exp.TsOrDsToDate: _to_date_sql, 497 exp.TryCast: no_trycast_sql, 498 exp.UnixToStr: lambda self, e: self.func( 499 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 500 ), 501 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 502 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 503 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 504 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 505 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 506 exp.LastDateOfMonth: rename_func("LAST_DAY"), 507 exp.National: lambda self, e: self.national_sql(e, prefix=""), 508 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 509 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 510 exp.NotForReplicationColumnConstraint: lambda self, e: "", 511 exp.OnProperty: lambda self, e: "", 512 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 513 } 514 515 PROPERTIES_LOCATION = { 516 **generator.Generator.PROPERTIES_LOCATION, 517 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 518 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 519 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 520 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 521 } 522 523 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 524 # Hive has no temporary storage provider (there are hive settings though) 525 return expression 526 527 def parameter_sql(self, expression: exp.Parameter) -> str: 528 this = self.sql(expression, "this") 529 expression_sql = self.sql(expression, "expression") 530 531 parent = expression.parent 532 this = f"{this}:{expression_sql}" if expression_sql else this 533 534 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 535 # We need to produce SET key = value instead of SET ${key} = value 536 return this 537 538 return f"${{{this}}}" 539 540 def schema_sql(self, expression: exp.Schema) -> str: 541 for ordered in expression.find_all(exp.Ordered): 542 if ordered.args.get("desc") is False: 543 ordered.set("desc", None) 544 545 return super().schema_sql(expression) 546 547 def constraint_sql(self, expression: exp.Constraint) -> str: 548 for prop in list(expression.find_all(exp.Properties)): 549 prop.pop() 550 551 this = self.sql(expression, "this") 552 expressions = self.expressions(expression, sep=" ", flat=True) 553 return f"CONSTRAINT {this} {expressions}" 554 555 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 556 serde_props = self.sql(expression, "serde_properties") 557 serde_props = f" {serde_props}" if serde_props else "" 558 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 559 560 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 561 return self.func( 562 "COLLECT_LIST", 563 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 564 ) 565 566 def with_properties(self, properties: exp.Properties) -> str: 567 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 568 569 def datatype_sql(self, expression: exp.DataType) -> str: 570 if ( 571 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 572 and not expression.expressions 573 ): 574 expression = exp.DataType.build("text") 575 elif expression.this in exp.DataType.TEMPORAL_TYPES: 576 expression = exp.DataType.build(expression.this) 577 elif expression.is_type("float"): 578 size_expression = expression.find(exp.DataTypeParam) 579 if size_expression: 580 size = int(size_expression.name) 581 expression = ( 582 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 583 ) 584 585 return super().datatype_sql(expression) 586 587 def version_sql(self, expression: exp.Version) -> str: 588 sql = super().version_sql(expression) 589 return sql.replace("FOR ", "", 1)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
225 class Tokenizer(tokens.Tokenizer): 226 QUOTES = ["'", '"'] 227 IDENTIFIERS = ["`"] 228 STRING_ESCAPES = ["\\"] 229 ENCODE = "utf-8" 230 231 SINGLE_TOKENS = { 232 **tokens.Tokenizer.SINGLE_TOKENS, 233 "$": TokenType.PARAMETER, 234 } 235 236 KEYWORDS = { 237 **tokens.Tokenizer.KEYWORDS, 238 "ADD ARCHIVE": TokenType.COMMAND, 239 "ADD ARCHIVES": TokenType.COMMAND, 240 "ADD FILE": TokenType.COMMAND, 241 "ADD FILES": TokenType.COMMAND, 242 "ADD JAR": TokenType.COMMAND, 243 "ADD JARS": TokenType.COMMAND, 244 "MSCK REPAIR": TokenType.COMMAND, 245 "REFRESH": TokenType.COMMAND, 246 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 247 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 248 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 249 } 250 251 NUMERIC_LITERALS = { 252 "L": "BIGINT", 253 "S": "SMALLINT", 254 "Y": "TINYINT", 255 "D": "DOUBLE", 256 "F": "FLOAT", 257 "BD": "DECIMAL", 258 }
SINGLE_TOKENS =
{'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '#': <TokenType.HASH: 'HASH'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.COMMAND: 'COMMAND'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>}
260 class Parser(parser.Parser): 261 LOG_DEFAULTS_TO_LN = True 262 STRICT_CAST = False 263 264 FUNCTIONS = { 265 **parser.Parser.FUNCTIONS, 266 "BASE64": exp.ToBase64.from_arg_list, 267 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 268 "COLLECT_SET": exp.SetAgg.from_arg_list, 269 "DATE_ADD": lambda args: exp.TsOrDsAdd( 270 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 271 ), 272 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 273 [ 274 exp.TimeStrToTime(this=seq_get(args, 0)), 275 seq_get(args, 1), 276 ] 277 ), 278 "DATE_SUB": lambda args: exp.TsOrDsAdd( 279 this=seq_get(args, 0), 280 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 281 unit=exp.Literal.string("DAY"), 282 ), 283 "DATEDIFF": lambda args: exp.DateDiff( 284 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 285 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 286 ), 287 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 288 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 289 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 290 "LOCATE": locate_to_strposition, 291 "MAP": parse_var_map, 292 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 293 "PERCENTILE": exp.Quantile.from_arg_list, 294 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 295 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 296 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 297 ), 298 "SIZE": exp.ArraySize.from_arg_list, 299 "SPLIT": exp.RegexpSplit.from_arg_list, 300 "STR_TO_MAP": lambda args: exp.StrToMap( 301 this=seq_get(args, 0), 302 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 303 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 304 ), 305 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 306 "TO_JSON": exp.JSONFormat.from_arg_list, 307 "UNBASE64": exp.FromBase64.from_arg_list, 308 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 309 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 310 } 311 312 NO_PAREN_FUNCTION_PARSERS = { 313 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 314 "TRANSFORM": lambda self: self._parse_transform(), 315 } 316 317 PROPERTY_PARSERS = { 318 **parser.Parser.PROPERTY_PARSERS, 319 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 320 expressions=self._parse_wrapped_csv(self._parse_property) 321 ), 322 } 323 324 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 325 if not self._match(TokenType.L_PAREN, advance=False): 326 self._retreat(self._index - 1) 327 return None 328 329 args = self._parse_wrapped_csv(self._parse_lambda) 330 row_format_before = self._parse_row_format(match_row=True) 331 332 record_writer = None 333 if self._match_text_seq("RECORDWRITER"): 334 record_writer = self._parse_string() 335 336 if not self._match(TokenType.USING): 337 return exp.Transform.from_arg_list(args) 338 339 command_script = self._parse_string() 340 341 self._match(TokenType.ALIAS) 342 schema = self._parse_schema() 343 344 row_format_after = self._parse_row_format(match_row=True) 345 record_reader = None 346 if self._match_text_seq("RECORDREADER"): 347 record_reader = self._parse_string() 348 349 return self.expression( 350 exp.QueryTransform, 351 expressions=args, 352 command_script=command_script, 353 schema=schema, 354 row_format_before=row_format_before, 355 record_writer=record_writer, 356 row_format_after=row_format_after, 357 record_reader=record_reader, 358 ) 359 360 def _parse_types( 361 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 362 ) -> t.Optional[exp.Expression]: 363 """ 364 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 365 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 366 367 spark-sql (default)> select cast(1234 as varchar(2)); 368 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 369 char/varchar type and simply treats them as string type. Please use string type 370 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 371 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 372 373 1234 374 Time taken: 4.265 seconds, Fetched 1 row(s) 375 376 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 377 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 378 379 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 380 """ 381 this = super()._parse_types( 382 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 383 ) 384 385 if this and not schema: 386 return this.transform( 387 lambda node: node.replace(exp.DataType.build("text")) 388 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 389 else node, 390 copy=False, 391 ) 392 393 return this 394 395 def _parse_partition_and_order( 396 self, 397 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 398 return ( 399 self._parse_csv(self._parse_conjunction) 400 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 401 else [], 402 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 403 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- CLONE_KINDS
- OPCLASS_FOLLOW_KEYWORDS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
405 class Generator(generator.Generator): 406 LIMIT_FETCH = "LIMIT" 407 TABLESAMPLE_WITH_METHOD = False 408 TABLESAMPLE_SIZE_IS_PERCENT = True 409 JOIN_HINTS = False 410 TABLE_HINTS = False 411 QUERY_HINTS = False 412 INDEX_ON = "ON TABLE" 413 EXTRACT_ALLOWS_QUOTES = False 414 NVL2_SUPPORTED = False 415 SUPPORTS_NESTED_CTES = False 416 417 TYPE_MAPPING = { 418 **generator.Generator.TYPE_MAPPING, 419 exp.DataType.Type.BIT: "BOOLEAN", 420 exp.DataType.Type.DATETIME: "TIMESTAMP", 421 exp.DataType.Type.TEXT: "STRING", 422 exp.DataType.Type.TIME: "TIMESTAMP", 423 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 424 exp.DataType.Type.VARBINARY: "BINARY", 425 } 426 427 TRANSFORMS = { 428 **generator.Generator.TRANSFORMS, 429 exp.Group: transforms.preprocess([transforms.unalias_group]), 430 exp.Select: transforms.preprocess( 431 [ 432 transforms.eliminate_qualify, 433 transforms.eliminate_distinct_on, 434 transforms.unnest_to_explode, 435 ] 436 ), 437 exp.Property: _property_sql, 438 exp.AnyValue: rename_func("FIRST"), 439 exp.ApproxDistinct: approx_count_distinct_sql, 440 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 441 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 442 exp.ArrayConcat: rename_func("CONCAT"), 443 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 444 exp.ArraySize: rename_func("SIZE"), 445 exp.ArraySort: _array_sort_sql, 446 exp.With: no_recursive_cte_sql, 447 exp.DateAdd: _add_date_sql, 448 exp.DateDiff: _date_diff_sql, 449 exp.DateStrToDate: rename_func("TO_DATE"), 450 exp.DateSub: _add_date_sql, 451 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 452 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 453 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 454 exp.FromBase64: rename_func("UNBASE64"), 455 exp.If: if_sql(), 456 exp.ILike: no_ilike_sql, 457 exp.IsNan: rename_func("ISNAN"), 458 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 459 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 460 exp.JSONFormat: _json_format_sql, 461 exp.Left: left_to_substring_sql, 462 exp.Map: var_map_sql, 463 exp.Max: max_or_greatest, 464 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 465 exp.Min: min_or_least, 466 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 467 exp.NotNullColumnConstraint: lambda self, e: "" 468 if e.args.get("allow_null") 469 else "NOT NULL", 470 exp.VarMap: var_map_sql, 471 exp.Create: _create_sql, 472 exp.Quantile: rename_func("PERCENTILE"), 473 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 474 exp.RegexpExtract: regexp_extract_sql, 475 exp.RegexpReplace: regexp_replace_sql, 476 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 477 exp.RegexpSplit: rename_func("SPLIT"), 478 exp.Right: right_to_substring_sql, 479 exp.SafeDivide: no_safe_divide_sql, 480 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 481 exp.SetAgg: rename_func("COLLECT_SET"), 482 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 483 exp.StrPosition: strposition_to_locate_sql, 484 exp.StrToDate: _str_to_date_sql, 485 exp.StrToTime: _str_to_time_sql, 486 exp.StrToUnix: _str_to_unix_sql, 487 exp.StructExtract: struct_extract_sql, 488 exp.TimeStrToDate: rename_func("TO_DATE"), 489 exp.TimeStrToTime: timestrtotime_sql, 490 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 491 exp.TimeToStr: _time_to_str, 492 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 493 exp.ToBase64: rename_func("BASE64"), 494 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 495 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 496 exp.TsOrDsToDate: _to_date_sql, 497 exp.TryCast: no_trycast_sql, 498 exp.UnixToStr: lambda self, e: self.func( 499 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 500 ), 501 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 502 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 503 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 504 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 505 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 506 exp.LastDateOfMonth: rename_func("LAST_DAY"), 507 exp.National: lambda self, e: self.national_sql(e, prefix=""), 508 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 509 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 510 exp.NotForReplicationColumnConstraint: lambda self, e: "", 511 exp.OnProperty: lambda self, e: "", 512 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 513 } 514 515 PROPERTIES_LOCATION = { 516 **generator.Generator.PROPERTIES_LOCATION, 517 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 518 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 519 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 520 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 521 } 522 523 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 524 # Hive has no temporary storage provider (there are hive settings though) 525 return expression 526 527 def parameter_sql(self, expression: exp.Parameter) -> str: 528 this = self.sql(expression, "this") 529 expression_sql = self.sql(expression, "expression") 530 531 parent = expression.parent 532 this = f"{this}:{expression_sql}" if expression_sql else this 533 534 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 535 # We need to produce SET key = value instead of SET ${key} = value 536 return this 537 538 return f"${{{this}}}" 539 540 def schema_sql(self, expression: exp.Schema) -> str: 541 for ordered in expression.find_all(exp.Ordered): 542 if ordered.args.get("desc") is False: 543 ordered.set("desc", None) 544 545 return super().schema_sql(expression) 546 547 def constraint_sql(self, expression: exp.Constraint) -> str: 548 for prop in list(expression.find_all(exp.Properties)): 549 prop.pop() 550 551 this = self.sql(expression, "this") 552 expressions = self.expressions(expression, sep=" ", flat=True) 553 return f"CONSTRAINT {this} {expressions}" 554 555 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 556 serde_props = self.sql(expression, "serde_properties") 557 serde_props = f" {serde_props}" if serde_props else "" 558 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 559 560 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 561 return self.func( 562 "COLLECT_LIST", 563 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 564 ) 565 566 def with_properties(self, properties: exp.Properties) -> str: 567 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 568 569 def datatype_sql(self, expression: exp.DataType) -> str: 570 if ( 571 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 572 and not expression.expressions 573 ): 574 expression = exp.DataType.build("text") 575 elif expression.this in exp.DataType.TEMPORAL_TYPES: 576 expression = exp.DataType.build(expression.this) 577 elif expression.is_type("float"): 578 size_expression = expression.find(exp.DataTypeParam) 579 if size_expression: 580 size = int(size_expression.name) 581 expression = ( 582 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 583 ) 584 585 return super().datatype_sql(expression) 586 587 def version_sql(self, expression: exp.Version) -> str: 588 sql = super().version_sql(expression) 589 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function _create_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedOfProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>}
def
temporary_storage_provider( self, expression: sqlglot.expressions.Create) -> sqlglot.expressions.Create:
527 def parameter_sql(self, expression: exp.Parameter) -> str: 528 this = self.sql(expression, "this") 529 expression_sql = self.sql(expression, "expression") 530 531 parent = expression.parent 532 this = f"{this}:{expression_sql}" if expression_sql else this 533 534 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 535 # We need to produce SET key = value instead of SET ${key} = value 536 return this 537 538 return f"${{{this}}}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
569 def datatype_sql(self, expression: exp.DataType) -> str: 570 if ( 571 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 572 and not expression.expressions 573 ): 574 expression = exp.DataType.build("text") 575 elif expression.this in exp.DataType.TEMPORAL_TYPES: 576 expression = exp.DataType.build(expression.this) 577 elif expression.is_type("float"): 578 size_expression = expression.find(exp.DataTypeParam) 579 if size_expression: 580 size = int(size_expression.name) 581 expression = ( 582 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 583 ) 584 585 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
279 @classmethod 280 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 281 """Checks if text can be identified given an identify option. 282 283 Args: 284 text: The text to check. 285 identify: 286 "always" or `True`: Always returns true. 287 "safe": True if the identifier is case-insensitive. 288 289 Returns: 290 Whether or not the given text can be identified. 291 """ 292 if identify is True or identify == "always": 293 return True 294 295 if identify == "safe": 296 return not cls.case_sensitive(text) 297 298 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
TOKENIZER_CLASS =
<class 'Hive.Tokenizer'>
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOG_BASE_FIRST
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- SUPPORTS_PARAMETERS
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- CTE_RECURSIVE_KEYWORD_REQUIRED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql