sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 arg_max_or_min_no_count, 10 create_with_partitions_sql, 11 format_time_lambda, 12 if_sql, 13 is_parse_json, 14 left_to_substring_sql, 15 locate_to_strposition, 16 max_or_greatest, 17 min_or_least, 18 no_ilike_sql, 19 no_recursive_cte_sql, 20 no_safe_divide_sql, 21 no_trycast_sql, 22 regexp_extract_sql, 23 regexp_replace_sql, 24 rename_func, 25 right_to_substring_sql, 26 strposition_to_locate_sql, 27 struct_extract_sql, 28 time_format, 29 timestrtotime_sql, 30 var_map_sql, 31) 32from sqlglot.helper import seq_get 33from sqlglot.parser import parse_var_map 34from sqlglot.tokens import TokenType 35 36# (FuncType, Multiplier) 37DATE_DELTA_INTERVAL = { 38 "YEAR": ("ADD_MONTHS", 12), 39 "MONTH": ("ADD_MONTHS", 1), 40 "QUARTER": ("ADD_MONTHS", 3), 41 "WEEK": ("DATE_ADD", 7), 42 "DAY": ("DATE_ADD", 1), 43} 44 45TIME_DIFF_FACTOR = { 46 "MILLISECOND": " * 1000", 47 "SECOND": "", 48 "MINUTE": " / 60", 49 "HOUR": " / 3600", 50} 51 52DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 53 54 55def _create_sql(self, expression: exp.Create) -> str: 56 expression = expression.copy() 57 58 # remove UNIQUE column constraints 59 for constraint in expression.find_all(exp.UniqueColumnConstraint): 60 if constraint.parent: 61 constraint.parent.pop() 62 63 properties = expression.args.get("properties") 64 temporary = any( 65 isinstance(prop, exp.TemporaryProperty) 66 for prop in (properties.expressions if properties else []) 67 ) 68 69 # CTAS with temp tables map to CREATE TEMPORARY VIEW 70 kind = expression.args["kind"] 71 if kind.upper() == "TABLE" and temporary: 72 if expression.expression: 73 return f"CREATE TEMPORARY VIEW {self.sql(expression, 'this')} AS {self.sql(expression, 'expression')}" 74 else: 75 # CREATE TEMPORARY TABLE may require storage provider 76 expression = self.temporary_storage_provider(expression) 77 78 return create_with_partitions_sql(self, expression) 79 80 81def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 82 unit = expression.text("unit").upper() 83 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 84 85 if isinstance(expression, exp.DateSub): 86 multiplier *= -1 87 88 if expression.expression.is_number: 89 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 90 else: 91 modified_increment = expression.expression.copy() 92 if multiplier != 1: 93 modified_increment = exp.Mul( # type: ignore 94 this=modified_increment, expression=exp.Literal.number(multiplier) 95 ) 96 97 return self.func(func, expression.this, modified_increment) 98 99 100def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str: 101 unit = expression.text("unit").upper() 102 103 factor = TIME_DIFF_FACTOR.get(unit) 104 if factor is not None: 105 left = self.sql(expression, "this") 106 right = self.sql(expression, "expression") 107 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 108 return f"({sec_diff}){factor}" if factor else sec_diff 109 110 months_between = unit in DIFF_MONTH_SWITCH 111 sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF" 112 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 113 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 114 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 115 116 if months_between: 117 # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part 118 diff_sql = f"CAST({diff_sql} AS INT)" 119 120 return f"{diff_sql}{multiplier_sql}" 121 122 123def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 124 this = expression.this 125 if is_parse_json(this) and this.this.is_string: 126 # Since FROM_JSON requires a nested type, we always wrap the json string with 127 # an array to ensure that "naked" strings like "'a'" will be handled correctly 128 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 129 130 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 131 to_json = self.func("TO_JSON", from_json) 132 133 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 134 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 135 136 return self.func("TO_JSON", this, expression.args.get("options")) 137 138 139def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 140 if expression.expression: 141 self.unsupported("Hive SORT_ARRAY does not support a comparator") 142 return f"SORT_ARRAY({self.sql(expression, 'this')})" 143 144 145def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 146 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 147 148 149def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 150 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 151 152 153def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 154 this = self.sql(expression, "this") 155 time_format = self.format_time(expression) 156 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 157 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 158 return f"CAST({this} AS DATE)" 159 160 161def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 162 this = self.sql(expression, "this") 163 time_format = self.format_time(expression) 164 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 165 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 166 return f"CAST({this} AS TIMESTAMP)" 167 168 169def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 170 this = self.sql(expression, "this") 171 time_format = self.format_time(expression) 172 return f"DATE_FORMAT({this}, {time_format})" 173 174 175def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 176 this = self.sql(expression, "this") 177 time_format = self.format_time(expression) 178 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 179 return f"TO_DATE({this}, {time_format})" 180 return f"TO_DATE({this})" 181 182 183class Hive(Dialect): 184 ALIAS_POST_TABLESAMPLE = True 185 IDENTIFIERS_CAN_START_WITH_DIGIT = True 186 SUPPORTS_USER_DEFINED_TYPES = False 187 188 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 189 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 190 191 TIME_MAPPING = { 192 "y": "%Y", 193 "Y": "%Y", 194 "YYYY": "%Y", 195 "yyyy": "%Y", 196 "YY": "%y", 197 "yy": "%y", 198 "MMMM": "%B", 199 "MMM": "%b", 200 "MM": "%m", 201 "M": "%-m", 202 "dd": "%d", 203 "d": "%-d", 204 "HH": "%H", 205 "H": "%-H", 206 "hh": "%I", 207 "h": "%-I", 208 "mm": "%M", 209 "m": "%-M", 210 "ss": "%S", 211 "s": "%-S", 212 "SSSSSS": "%f", 213 "a": "%p", 214 "DD": "%j", 215 "D": "%-j", 216 "E": "%a", 217 "EE": "%a", 218 "EEE": "%a", 219 "EEEE": "%A", 220 } 221 222 DATE_FORMAT = "'yyyy-MM-dd'" 223 DATEINT_FORMAT = "'yyyyMMdd'" 224 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 225 226 class Tokenizer(tokens.Tokenizer): 227 QUOTES = ["'", '"'] 228 IDENTIFIERS = ["`"] 229 STRING_ESCAPES = ["\\"] 230 ENCODE = "utf-8" 231 232 KEYWORDS = { 233 **tokens.Tokenizer.KEYWORDS, 234 "ADD ARCHIVE": TokenType.COMMAND, 235 "ADD ARCHIVES": TokenType.COMMAND, 236 "ADD FILE": TokenType.COMMAND, 237 "ADD FILES": TokenType.COMMAND, 238 "ADD JAR": TokenType.COMMAND, 239 "ADD JARS": TokenType.COMMAND, 240 "MSCK REPAIR": TokenType.COMMAND, 241 "REFRESH": TokenType.COMMAND, 242 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 243 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 244 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 245 } 246 247 NUMERIC_LITERALS = { 248 "L": "BIGINT", 249 "S": "SMALLINT", 250 "Y": "TINYINT", 251 "D": "DOUBLE", 252 "F": "FLOAT", 253 "BD": "DECIMAL", 254 } 255 256 class Parser(parser.Parser): 257 LOG_DEFAULTS_TO_LN = True 258 STRICT_CAST = False 259 260 FUNCTIONS = { 261 **parser.Parser.FUNCTIONS, 262 "BASE64": exp.ToBase64.from_arg_list, 263 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 264 "COLLECT_SET": exp.SetAgg.from_arg_list, 265 "DATE_ADD": lambda args: exp.TsOrDsAdd( 266 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 267 ), 268 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 269 [ 270 exp.TimeStrToTime(this=seq_get(args, 0)), 271 seq_get(args, 1), 272 ] 273 ), 274 "DATE_SUB": lambda args: exp.TsOrDsAdd( 275 this=seq_get(args, 0), 276 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 277 unit=exp.Literal.string("DAY"), 278 ), 279 "DATEDIFF": lambda args: exp.DateDiff( 280 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 281 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 282 ), 283 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 284 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 285 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 286 "LOCATE": locate_to_strposition, 287 "MAP": parse_var_map, 288 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 289 "PERCENTILE": exp.Quantile.from_arg_list, 290 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 291 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 292 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 293 ), 294 "SIZE": exp.ArraySize.from_arg_list, 295 "SPLIT": exp.RegexpSplit.from_arg_list, 296 "STR_TO_MAP": lambda args: exp.StrToMap( 297 this=seq_get(args, 0), 298 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 299 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 300 ), 301 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 302 "TO_JSON": exp.JSONFormat.from_arg_list, 303 "UNBASE64": exp.FromBase64.from_arg_list, 304 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 305 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 306 } 307 308 NO_PAREN_FUNCTION_PARSERS = { 309 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 310 "TRANSFORM": lambda self: self._parse_transform(), 311 } 312 313 PROPERTY_PARSERS = { 314 **parser.Parser.PROPERTY_PARSERS, 315 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 316 expressions=self._parse_wrapped_csv(self._parse_property) 317 ), 318 } 319 320 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 321 if not self._match(TokenType.L_PAREN, advance=False): 322 self._retreat(self._index - 1) 323 return None 324 325 args = self._parse_wrapped_csv(self._parse_lambda) 326 row_format_before = self._parse_row_format(match_row=True) 327 328 record_writer = None 329 if self._match_text_seq("RECORDWRITER"): 330 record_writer = self._parse_string() 331 332 if not self._match(TokenType.USING): 333 return exp.Transform.from_arg_list(args) 334 335 command_script = self._parse_string() 336 337 self._match(TokenType.ALIAS) 338 schema = self._parse_schema() 339 340 row_format_after = self._parse_row_format(match_row=True) 341 record_reader = None 342 if self._match_text_seq("RECORDREADER"): 343 record_reader = self._parse_string() 344 345 return self.expression( 346 exp.QueryTransform, 347 expressions=args, 348 command_script=command_script, 349 schema=schema, 350 row_format_before=row_format_before, 351 record_writer=record_writer, 352 row_format_after=row_format_after, 353 record_reader=record_reader, 354 ) 355 356 def _parse_types( 357 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 358 ) -> t.Optional[exp.Expression]: 359 """ 360 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 361 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 362 363 spark-sql (default)> select cast(1234 as varchar(2)); 364 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 365 char/varchar type and simply treats them as string type. Please use string type 366 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 367 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 368 369 1234 370 Time taken: 4.265 seconds, Fetched 1 row(s) 371 372 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 373 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 374 375 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 376 """ 377 this = super()._parse_types( 378 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 379 ) 380 381 if this and not schema: 382 return this.transform( 383 lambda node: node.replace(exp.DataType.build("text")) 384 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 385 else node, 386 copy=False, 387 ) 388 389 return this 390 391 def _parse_partition_and_order( 392 self, 393 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 394 return ( 395 self._parse_csv(self._parse_conjunction) 396 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 397 else [], 398 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 399 ) 400 401 class Generator(generator.Generator): 402 LIMIT_FETCH = "LIMIT" 403 TABLESAMPLE_WITH_METHOD = False 404 TABLESAMPLE_SIZE_IS_PERCENT = True 405 JOIN_HINTS = False 406 TABLE_HINTS = False 407 QUERY_HINTS = False 408 INDEX_ON = "ON TABLE" 409 EXTRACT_ALLOWS_QUOTES = False 410 NVL2_SUPPORTED = False 411 412 TYPE_MAPPING = { 413 **generator.Generator.TYPE_MAPPING, 414 exp.DataType.Type.BIT: "BOOLEAN", 415 exp.DataType.Type.DATETIME: "TIMESTAMP", 416 exp.DataType.Type.TEXT: "STRING", 417 exp.DataType.Type.TIME: "TIMESTAMP", 418 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 419 exp.DataType.Type.VARBINARY: "BINARY", 420 } 421 422 TRANSFORMS = { 423 **generator.Generator.TRANSFORMS, 424 exp.Group: transforms.preprocess([transforms.unalias_group]), 425 exp.Select: transforms.preprocess( 426 [ 427 transforms.eliminate_qualify, 428 transforms.eliminate_distinct_on, 429 transforms.unnest_to_explode, 430 ] 431 ), 432 exp.Property: _property_sql, 433 exp.AnyValue: rename_func("FIRST"), 434 exp.ApproxDistinct: approx_count_distinct_sql, 435 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 436 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 437 exp.ArrayConcat: rename_func("CONCAT"), 438 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 439 exp.ArraySize: rename_func("SIZE"), 440 exp.ArraySort: _array_sort_sql, 441 exp.With: no_recursive_cte_sql, 442 exp.DateAdd: _add_date_sql, 443 exp.DateDiff: _date_diff_sql, 444 exp.DateStrToDate: rename_func("TO_DATE"), 445 exp.DateSub: _add_date_sql, 446 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 447 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 448 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 449 exp.FromBase64: rename_func("UNBASE64"), 450 exp.If: if_sql(), 451 exp.ILike: no_ilike_sql, 452 exp.IsNan: rename_func("ISNAN"), 453 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 454 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 455 exp.JSONFormat: _json_format_sql, 456 exp.Left: left_to_substring_sql, 457 exp.Map: var_map_sql, 458 exp.Max: max_or_greatest, 459 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 460 exp.Min: min_or_least, 461 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 462 exp.NotNullColumnConstraint: lambda self, e: "" 463 if e.args.get("allow_null") 464 else "NOT NULL", 465 exp.VarMap: var_map_sql, 466 exp.Create: _create_sql, 467 exp.Quantile: rename_func("PERCENTILE"), 468 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 469 exp.RegexpExtract: regexp_extract_sql, 470 exp.RegexpReplace: regexp_replace_sql, 471 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 472 exp.RegexpSplit: rename_func("SPLIT"), 473 exp.Right: right_to_substring_sql, 474 exp.SafeDivide: no_safe_divide_sql, 475 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 476 exp.SetAgg: rename_func("COLLECT_SET"), 477 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 478 exp.StrPosition: strposition_to_locate_sql, 479 exp.StrToDate: _str_to_date_sql, 480 exp.StrToTime: _str_to_time_sql, 481 exp.StrToUnix: _str_to_unix_sql, 482 exp.StructExtract: struct_extract_sql, 483 exp.TimeStrToDate: rename_func("TO_DATE"), 484 exp.TimeStrToTime: timestrtotime_sql, 485 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 486 exp.TimeToStr: _time_to_str, 487 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 488 exp.ToBase64: rename_func("BASE64"), 489 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 490 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 491 exp.TsOrDsToDate: _to_date_sql, 492 exp.TryCast: no_trycast_sql, 493 exp.UnixToStr: lambda self, e: self.func( 494 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 495 ), 496 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 497 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 498 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 499 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 500 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 501 exp.LastDateOfMonth: rename_func("LAST_DAY"), 502 exp.National: lambda self, e: self.national_sql(e, prefix=""), 503 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 504 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 505 exp.NotForReplicationColumnConstraint: lambda self, e: "", 506 exp.OnProperty: lambda self, e: "", 507 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 508 } 509 510 PROPERTIES_LOCATION = { 511 **generator.Generator.PROPERTIES_LOCATION, 512 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 513 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 514 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 515 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 516 } 517 518 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 519 # Hive has no temporary storage provider (there are hive settings though) 520 return expression 521 522 def parameter_sql(self, expression: exp.Parameter) -> str: 523 this = self.sql(expression, "this") 524 parent = expression.parent 525 526 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 527 # We need to produce SET key = value instead of SET ${key} = value 528 return this 529 530 return f"${{{this}}}" 531 532 def schema_sql(self, expression: exp.Schema) -> str: 533 expression = expression.copy() 534 535 for ordered in expression.find_all(exp.Ordered): 536 if ordered.args.get("desc") is False: 537 ordered.set("desc", None) 538 539 return super().schema_sql(expression) 540 541 def constraint_sql(self, expression: exp.Constraint) -> str: 542 expression = expression.copy() 543 544 for prop in list(expression.find_all(exp.Properties)): 545 prop.pop() 546 547 this = self.sql(expression, "this") 548 expressions = self.expressions(expression, sep=" ", flat=True) 549 return f"CONSTRAINT {this} {expressions}" 550 551 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 552 serde_props = self.sql(expression, "serde_properties") 553 serde_props = f" {serde_props}" if serde_props else "" 554 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 555 556 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 557 return self.func( 558 "COLLECT_LIST", 559 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 560 ) 561 562 def with_properties(self, properties: exp.Properties) -> str: 563 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 564 565 def datatype_sql(self, expression: exp.DataType) -> str: 566 if ( 567 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 568 and not expression.expressions 569 ): 570 expression = exp.DataType.build("text") 571 elif expression.this in exp.DataType.TEMPORAL_TYPES: 572 expression = exp.DataType.build(expression.this) 573 elif expression.is_type("float"): 574 size_expression = expression.find(exp.DataTypeParam) 575 if size_expression: 576 size = int(size_expression.name) 577 expression = ( 578 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 579 ) 580 581 return super().datatype_sql(expression) 582 583 def version_sql(self, expression: exp.Version) -> str: 584 sql = super().version_sql(expression) 585 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
184class Hive(Dialect): 185 ALIAS_POST_TABLESAMPLE = True 186 IDENTIFIERS_CAN_START_WITH_DIGIT = True 187 SUPPORTS_USER_DEFINED_TYPES = False 188 189 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 190 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 191 192 TIME_MAPPING = { 193 "y": "%Y", 194 "Y": "%Y", 195 "YYYY": "%Y", 196 "yyyy": "%Y", 197 "YY": "%y", 198 "yy": "%y", 199 "MMMM": "%B", 200 "MMM": "%b", 201 "MM": "%m", 202 "M": "%-m", 203 "dd": "%d", 204 "d": "%-d", 205 "HH": "%H", 206 "H": "%-H", 207 "hh": "%I", 208 "h": "%-I", 209 "mm": "%M", 210 "m": "%-M", 211 "ss": "%S", 212 "s": "%-S", 213 "SSSSSS": "%f", 214 "a": "%p", 215 "DD": "%j", 216 "D": "%-j", 217 "E": "%a", 218 "EE": "%a", 219 "EEE": "%a", 220 "EEEE": "%A", 221 } 222 223 DATE_FORMAT = "'yyyy-MM-dd'" 224 DATEINT_FORMAT = "'yyyyMMdd'" 225 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 226 227 class Tokenizer(tokens.Tokenizer): 228 QUOTES = ["'", '"'] 229 IDENTIFIERS = ["`"] 230 STRING_ESCAPES = ["\\"] 231 ENCODE = "utf-8" 232 233 KEYWORDS = { 234 **tokens.Tokenizer.KEYWORDS, 235 "ADD ARCHIVE": TokenType.COMMAND, 236 "ADD ARCHIVES": TokenType.COMMAND, 237 "ADD FILE": TokenType.COMMAND, 238 "ADD FILES": TokenType.COMMAND, 239 "ADD JAR": TokenType.COMMAND, 240 "ADD JARS": TokenType.COMMAND, 241 "MSCK REPAIR": TokenType.COMMAND, 242 "REFRESH": TokenType.COMMAND, 243 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 244 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 245 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 246 } 247 248 NUMERIC_LITERALS = { 249 "L": "BIGINT", 250 "S": "SMALLINT", 251 "Y": "TINYINT", 252 "D": "DOUBLE", 253 "F": "FLOAT", 254 "BD": "DECIMAL", 255 } 256 257 class Parser(parser.Parser): 258 LOG_DEFAULTS_TO_LN = True 259 STRICT_CAST = False 260 261 FUNCTIONS = { 262 **parser.Parser.FUNCTIONS, 263 "BASE64": exp.ToBase64.from_arg_list, 264 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 265 "COLLECT_SET": exp.SetAgg.from_arg_list, 266 "DATE_ADD": lambda args: exp.TsOrDsAdd( 267 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 268 ), 269 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 270 [ 271 exp.TimeStrToTime(this=seq_get(args, 0)), 272 seq_get(args, 1), 273 ] 274 ), 275 "DATE_SUB": lambda args: exp.TsOrDsAdd( 276 this=seq_get(args, 0), 277 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 278 unit=exp.Literal.string("DAY"), 279 ), 280 "DATEDIFF": lambda args: exp.DateDiff( 281 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 282 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 283 ), 284 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 285 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 286 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 287 "LOCATE": locate_to_strposition, 288 "MAP": parse_var_map, 289 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 290 "PERCENTILE": exp.Quantile.from_arg_list, 291 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 292 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 293 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 294 ), 295 "SIZE": exp.ArraySize.from_arg_list, 296 "SPLIT": exp.RegexpSplit.from_arg_list, 297 "STR_TO_MAP": lambda args: exp.StrToMap( 298 this=seq_get(args, 0), 299 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 300 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 301 ), 302 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 303 "TO_JSON": exp.JSONFormat.from_arg_list, 304 "UNBASE64": exp.FromBase64.from_arg_list, 305 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 306 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 307 } 308 309 NO_PAREN_FUNCTION_PARSERS = { 310 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 311 "TRANSFORM": lambda self: self._parse_transform(), 312 } 313 314 PROPERTY_PARSERS = { 315 **parser.Parser.PROPERTY_PARSERS, 316 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 317 expressions=self._parse_wrapped_csv(self._parse_property) 318 ), 319 } 320 321 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 322 if not self._match(TokenType.L_PAREN, advance=False): 323 self._retreat(self._index - 1) 324 return None 325 326 args = self._parse_wrapped_csv(self._parse_lambda) 327 row_format_before = self._parse_row_format(match_row=True) 328 329 record_writer = None 330 if self._match_text_seq("RECORDWRITER"): 331 record_writer = self._parse_string() 332 333 if not self._match(TokenType.USING): 334 return exp.Transform.from_arg_list(args) 335 336 command_script = self._parse_string() 337 338 self._match(TokenType.ALIAS) 339 schema = self._parse_schema() 340 341 row_format_after = self._parse_row_format(match_row=True) 342 record_reader = None 343 if self._match_text_seq("RECORDREADER"): 344 record_reader = self._parse_string() 345 346 return self.expression( 347 exp.QueryTransform, 348 expressions=args, 349 command_script=command_script, 350 schema=schema, 351 row_format_before=row_format_before, 352 record_writer=record_writer, 353 row_format_after=row_format_after, 354 record_reader=record_reader, 355 ) 356 357 def _parse_types( 358 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 359 ) -> t.Optional[exp.Expression]: 360 """ 361 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 362 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 363 364 spark-sql (default)> select cast(1234 as varchar(2)); 365 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 366 char/varchar type and simply treats them as string type. Please use string type 367 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 368 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 369 370 1234 371 Time taken: 4.265 seconds, Fetched 1 row(s) 372 373 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 374 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 375 376 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 377 """ 378 this = super()._parse_types( 379 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 380 ) 381 382 if this and not schema: 383 return this.transform( 384 lambda node: node.replace(exp.DataType.build("text")) 385 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 386 else node, 387 copy=False, 388 ) 389 390 return this 391 392 def _parse_partition_and_order( 393 self, 394 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 395 return ( 396 self._parse_csv(self._parse_conjunction) 397 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 398 else [], 399 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 400 ) 401 402 class Generator(generator.Generator): 403 LIMIT_FETCH = "LIMIT" 404 TABLESAMPLE_WITH_METHOD = False 405 TABLESAMPLE_SIZE_IS_PERCENT = True 406 JOIN_HINTS = False 407 TABLE_HINTS = False 408 QUERY_HINTS = False 409 INDEX_ON = "ON TABLE" 410 EXTRACT_ALLOWS_QUOTES = False 411 NVL2_SUPPORTED = False 412 413 TYPE_MAPPING = { 414 **generator.Generator.TYPE_MAPPING, 415 exp.DataType.Type.BIT: "BOOLEAN", 416 exp.DataType.Type.DATETIME: "TIMESTAMP", 417 exp.DataType.Type.TEXT: "STRING", 418 exp.DataType.Type.TIME: "TIMESTAMP", 419 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 420 exp.DataType.Type.VARBINARY: "BINARY", 421 } 422 423 TRANSFORMS = { 424 **generator.Generator.TRANSFORMS, 425 exp.Group: transforms.preprocess([transforms.unalias_group]), 426 exp.Select: transforms.preprocess( 427 [ 428 transforms.eliminate_qualify, 429 transforms.eliminate_distinct_on, 430 transforms.unnest_to_explode, 431 ] 432 ), 433 exp.Property: _property_sql, 434 exp.AnyValue: rename_func("FIRST"), 435 exp.ApproxDistinct: approx_count_distinct_sql, 436 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 437 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 438 exp.ArrayConcat: rename_func("CONCAT"), 439 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 440 exp.ArraySize: rename_func("SIZE"), 441 exp.ArraySort: _array_sort_sql, 442 exp.With: no_recursive_cte_sql, 443 exp.DateAdd: _add_date_sql, 444 exp.DateDiff: _date_diff_sql, 445 exp.DateStrToDate: rename_func("TO_DATE"), 446 exp.DateSub: _add_date_sql, 447 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 448 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 449 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 450 exp.FromBase64: rename_func("UNBASE64"), 451 exp.If: if_sql(), 452 exp.ILike: no_ilike_sql, 453 exp.IsNan: rename_func("ISNAN"), 454 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 455 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 456 exp.JSONFormat: _json_format_sql, 457 exp.Left: left_to_substring_sql, 458 exp.Map: var_map_sql, 459 exp.Max: max_or_greatest, 460 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 461 exp.Min: min_or_least, 462 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 463 exp.NotNullColumnConstraint: lambda self, e: "" 464 if e.args.get("allow_null") 465 else "NOT NULL", 466 exp.VarMap: var_map_sql, 467 exp.Create: _create_sql, 468 exp.Quantile: rename_func("PERCENTILE"), 469 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 470 exp.RegexpExtract: regexp_extract_sql, 471 exp.RegexpReplace: regexp_replace_sql, 472 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 473 exp.RegexpSplit: rename_func("SPLIT"), 474 exp.Right: right_to_substring_sql, 475 exp.SafeDivide: no_safe_divide_sql, 476 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 477 exp.SetAgg: rename_func("COLLECT_SET"), 478 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 479 exp.StrPosition: strposition_to_locate_sql, 480 exp.StrToDate: _str_to_date_sql, 481 exp.StrToTime: _str_to_time_sql, 482 exp.StrToUnix: _str_to_unix_sql, 483 exp.StructExtract: struct_extract_sql, 484 exp.TimeStrToDate: rename_func("TO_DATE"), 485 exp.TimeStrToTime: timestrtotime_sql, 486 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 487 exp.TimeToStr: _time_to_str, 488 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 489 exp.ToBase64: rename_func("BASE64"), 490 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 491 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 492 exp.TsOrDsToDate: _to_date_sql, 493 exp.TryCast: no_trycast_sql, 494 exp.UnixToStr: lambda self, e: self.func( 495 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 496 ), 497 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 498 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 499 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 500 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 501 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 502 exp.LastDateOfMonth: rename_func("LAST_DAY"), 503 exp.National: lambda self, e: self.national_sql(e, prefix=""), 504 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 505 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 506 exp.NotForReplicationColumnConstraint: lambda self, e: "", 507 exp.OnProperty: lambda self, e: "", 508 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 509 } 510 511 PROPERTIES_LOCATION = { 512 **generator.Generator.PROPERTIES_LOCATION, 513 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 514 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 515 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 516 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 517 } 518 519 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 520 # Hive has no temporary storage provider (there are hive settings though) 521 return expression 522 523 def parameter_sql(self, expression: exp.Parameter) -> str: 524 this = self.sql(expression, "this") 525 parent = expression.parent 526 527 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 528 # We need to produce SET key = value instead of SET ${key} = value 529 return this 530 531 return f"${{{this}}}" 532 533 def schema_sql(self, expression: exp.Schema) -> str: 534 expression = expression.copy() 535 536 for ordered in expression.find_all(exp.Ordered): 537 if ordered.args.get("desc") is False: 538 ordered.set("desc", None) 539 540 return super().schema_sql(expression) 541 542 def constraint_sql(self, expression: exp.Constraint) -> str: 543 expression = expression.copy() 544 545 for prop in list(expression.find_all(exp.Properties)): 546 prop.pop() 547 548 this = self.sql(expression, "this") 549 expressions = self.expressions(expression, sep=" ", flat=True) 550 return f"CONSTRAINT {this} {expressions}" 551 552 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 553 serde_props = self.sql(expression, "serde_properties") 554 serde_props = f" {serde_props}" if serde_props else "" 555 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 556 557 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 558 return self.func( 559 "COLLECT_LIST", 560 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 561 ) 562 563 def with_properties(self, properties: exp.Properties) -> str: 564 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 565 566 def datatype_sql(self, expression: exp.DataType) -> str: 567 if ( 568 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 569 and not expression.expressions 570 ): 571 expression = exp.DataType.build("text") 572 elif expression.this in exp.DataType.TEMPORAL_TYPES: 573 expression = exp.DataType.build(expression.this) 574 elif expression.is_type("float"): 575 size_expression = expression.find(exp.DataTypeParam) 576 if size_expression: 577 size = int(size_expression.name) 578 expression = ( 579 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 580 ) 581 582 return super().datatype_sql(expression) 583 584 def version_sql(self, expression: exp.Version) -> str: 585 sql = super().version_sql(expression) 586 return sql.replace("FOR ", "", 1)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
227 class Tokenizer(tokens.Tokenizer): 228 QUOTES = ["'", '"'] 229 IDENTIFIERS = ["`"] 230 STRING_ESCAPES = ["\\"] 231 ENCODE = "utf-8" 232 233 KEYWORDS = { 234 **tokens.Tokenizer.KEYWORDS, 235 "ADD ARCHIVE": TokenType.COMMAND, 236 "ADD ARCHIVES": TokenType.COMMAND, 237 "ADD FILE": TokenType.COMMAND, 238 "ADD FILES": TokenType.COMMAND, 239 "ADD JAR": TokenType.COMMAND, 240 "ADD JARS": TokenType.COMMAND, 241 "MSCK REPAIR": TokenType.COMMAND, 242 "REFRESH": TokenType.COMMAND, 243 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 244 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 245 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 246 } 247 248 NUMERIC_LITERALS = { 249 "L": "BIGINT", 250 "S": "SMALLINT", 251 "Y": "TINYINT", 252 "D": "DOUBLE", 253 "F": "FLOAT", 254 "BD": "DECIMAL", 255 }
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.COMMAND: 'COMMAND'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>}
257 class Parser(parser.Parser): 258 LOG_DEFAULTS_TO_LN = True 259 STRICT_CAST = False 260 261 FUNCTIONS = { 262 **parser.Parser.FUNCTIONS, 263 "BASE64": exp.ToBase64.from_arg_list, 264 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 265 "COLLECT_SET": exp.SetAgg.from_arg_list, 266 "DATE_ADD": lambda args: exp.TsOrDsAdd( 267 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 268 ), 269 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 270 [ 271 exp.TimeStrToTime(this=seq_get(args, 0)), 272 seq_get(args, 1), 273 ] 274 ), 275 "DATE_SUB": lambda args: exp.TsOrDsAdd( 276 this=seq_get(args, 0), 277 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 278 unit=exp.Literal.string("DAY"), 279 ), 280 "DATEDIFF": lambda args: exp.DateDiff( 281 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 282 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 283 ), 284 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 285 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 286 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 287 "LOCATE": locate_to_strposition, 288 "MAP": parse_var_map, 289 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 290 "PERCENTILE": exp.Quantile.from_arg_list, 291 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 292 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 293 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 294 ), 295 "SIZE": exp.ArraySize.from_arg_list, 296 "SPLIT": exp.RegexpSplit.from_arg_list, 297 "STR_TO_MAP": lambda args: exp.StrToMap( 298 this=seq_get(args, 0), 299 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 300 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 301 ), 302 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 303 "TO_JSON": exp.JSONFormat.from_arg_list, 304 "UNBASE64": exp.FromBase64.from_arg_list, 305 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 306 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 307 } 308 309 NO_PAREN_FUNCTION_PARSERS = { 310 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 311 "TRANSFORM": lambda self: self._parse_transform(), 312 } 313 314 PROPERTY_PARSERS = { 315 **parser.Parser.PROPERTY_PARSERS, 316 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 317 expressions=self._parse_wrapped_csv(self._parse_property) 318 ), 319 } 320 321 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 322 if not self._match(TokenType.L_PAREN, advance=False): 323 self._retreat(self._index - 1) 324 return None 325 326 args = self._parse_wrapped_csv(self._parse_lambda) 327 row_format_before = self._parse_row_format(match_row=True) 328 329 record_writer = None 330 if self._match_text_seq("RECORDWRITER"): 331 record_writer = self._parse_string() 332 333 if not self._match(TokenType.USING): 334 return exp.Transform.from_arg_list(args) 335 336 command_script = self._parse_string() 337 338 self._match(TokenType.ALIAS) 339 schema = self._parse_schema() 340 341 row_format_after = self._parse_row_format(match_row=True) 342 record_reader = None 343 if self._match_text_seq("RECORDREADER"): 344 record_reader = self._parse_string() 345 346 return self.expression( 347 exp.QueryTransform, 348 expressions=args, 349 command_script=command_script, 350 schema=schema, 351 row_format_before=row_format_before, 352 record_writer=record_writer, 353 row_format_after=row_format_after, 354 record_reader=record_reader, 355 ) 356 357 def _parse_types( 358 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 359 ) -> t.Optional[exp.Expression]: 360 """ 361 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 362 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 363 364 spark-sql (default)> select cast(1234 as varchar(2)); 365 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 366 char/varchar type and simply treats them as string type. Please use string type 367 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 368 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 369 370 1234 371 Time taken: 4.265 seconds, Fetched 1 row(s) 372 373 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 374 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 375 376 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 377 """ 378 this = super()._parse_types( 379 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 380 ) 381 382 if this and not schema: 383 return this.transform( 384 lambda node: node.replace(exp.DataType.build("text")) 385 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 386 else node, 387 copy=False, 388 ) 389 390 return this 391 392 def _parse_partition_and_order( 393 self, 394 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 395 return ( 396 self._parse_csv(self._parse_conjunction) 397 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 398 else [], 399 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 400 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- CLONE_KINDS
- OPCLASS_FOLLOW_KEYWORDS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
402 class Generator(generator.Generator): 403 LIMIT_FETCH = "LIMIT" 404 TABLESAMPLE_WITH_METHOD = False 405 TABLESAMPLE_SIZE_IS_PERCENT = True 406 JOIN_HINTS = False 407 TABLE_HINTS = False 408 QUERY_HINTS = False 409 INDEX_ON = "ON TABLE" 410 EXTRACT_ALLOWS_QUOTES = False 411 NVL2_SUPPORTED = False 412 413 TYPE_MAPPING = { 414 **generator.Generator.TYPE_MAPPING, 415 exp.DataType.Type.BIT: "BOOLEAN", 416 exp.DataType.Type.DATETIME: "TIMESTAMP", 417 exp.DataType.Type.TEXT: "STRING", 418 exp.DataType.Type.TIME: "TIMESTAMP", 419 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 420 exp.DataType.Type.VARBINARY: "BINARY", 421 } 422 423 TRANSFORMS = { 424 **generator.Generator.TRANSFORMS, 425 exp.Group: transforms.preprocess([transforms.unalias_group]), 426 exp.Select: transforms.preprocess( 427 [ 428 transforms.eliminate_qualify, 429 transforms.eliminate_distinct_on, 430 transforms.unnest_to_explode, 431 ] 432 ), 433 exp.Property: _property_sql, 434 exp.AnyValue: rename_func("FIRST"), 435 exp.ApproxDistinct: approx_count_distinct_sql, 436 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 437 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 438 exp.ArrayConcat: rename_func("CONCAT"), 439 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 440 exp.ArraySize: rename_func("SIZE"), 441 exp.ArraySort: _array_sort_sql, 442 exp.With: no_recursive_cte_sql, 443 exp.DateAdd: _add_date_sql, 444 exp.DateDiff: _date_diff_sql, 445 exp.DateStrToDate: rename_func("TO_DATE"), 446 exp.DateSub: _add_date_sql, 447 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 448 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 449 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 450 exp.FromBase64: rename_func("UNBASE64"), 451 exp.If: if_sql(), 452 exp.ILike: no_ilike_sql, 453 exp.IsNan: rename_func("ISNAN"), 454 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 455 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 456 exp.JSONFormat: _json_format_sql, 457 exp.Left: left_to_substring_sql, 458 exp.Map: var_map_sql, 459 exp.Max: max_or_greatest, 460 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 461 exp.Min: min_or_least, 462 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 463 exp.NotNullColumnConstraint: lambda self, e: "" 464 if e.args.get("allow_null") 465 else "NOT NULL", 466 exp.VarMap: var_map_sql, 467 exp.Create: _create_sql, 468 exp.Quantile: rename_func("PERCENTILE"), 469 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 470 exp.RegexpExtract: regexp_extract_sql, 471 exp.RegexpReplace: regexp_replace_sql, 472 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 473 exp.RegexpSplit: rename_func("SPLIT"), 474 exp.Right: right_to_substring_sql, 475 exp.SafeDivide: no_safe_divide_sql, 476 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 477 exp.SetAgg: rename_func("COLLECT_SET"), 478 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 479 exp.StrPosition: strposition_to_locate_sql, 480 exp.StrToDate: _str_to_date_sql, 481 exp.StrToTime: _str_to_time_sql, 482 exp.StrToUnix: _str_to_unix_sql, 483 exp.StructExtract: struct_extract_sql, 484 exp.TimeStrToDate: rename_func("TO_DATE"), 485 exp.TimeStrToTime: timestrtotime_sql, 486 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 487 exp.TimeToStr: _time_to_str, 488 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 489 exp.ToBase64: rename_func("BASE64"), 490 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 491 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 492 exp.TsOrDsToDate: _to_date_sql, 493 exp.TryCast: no_trycast_sql, 494 exp.UnixToStr: lambda self, e: self.func( 495 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 496 ), 497 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 498 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 499 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 500 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 501 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 502 exp.LastDateOfMonth: rename_func("LAST_DAY"), 503 exp.National: lambda self, e: self.national_sql(e, prefix=""), 504 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 505 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 506 exp.NotForReplicationColumnConstraint: lambda self, e: "", 507 exp.OnProperty: lambda self, e: "", 508 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 509 } 510 511 PROPERTIES_LOCATION = { 512 **generator.Generator.PROPERTIES_LOCATION, 513 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 514 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 515 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 516 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 517 } 518 519 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 520 # Hive has no temporary storage provider (there are hive settings though) 521 return expression 522 523 def parameter_sql(self, expression: exp.Parameter) -> str: 524 this = self.sql(expression, "this") 525 parent = expression.parent 526 527 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 528 # We need to produce SET key = value instead of SET ${key} = value 529 return this 530 531 return f"${{{this}}}" 532 533 def schema_sql(self, expression: exp.Schema) -> str: 534 expression = expression.copy() 535 536 for ordered in expression.find_all(exp.Ordered): 537 if ordered.args.get("desc") is False: 538 ordered.set("desc", None) 539 540 return super().schema_sql(expression) 541 542 def constraint_sql(self, expression: exp.Constraint) -> str: 543 expression = expression.copy() 544 545 for prop in list(expression.find_all(exp.Properties)): 546 prop.pop() 547 548 this = self.sql(expression, "this") 549 expressions = self.expressions(expression, sep=" ", flat=True) 550 return f"CONSTRAINT {this} {expressions}" 551 552 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 553 serde_props = self.sql(expression, "serde_properties") 554 serde_props = f" {serde_props}" if serde_props else "" 555 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 556 557 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 558 return self.func( 559 "COLLECT_LIST", 560 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 561 ) 562 563 def with_properties(self, properties: exp.Properties) -> str: 564 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 565 566 def datatype_sql(self, expression: exp.DataType) -> str: 567 if ( 568 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 569 and not expression.expressions 570 ): 571 expression = exp.DataType.build("text") 572 elif expression.this in exp.DataType.TEMPORAL_TYPES: 573 expression = exp.DataType.build(expression.this) 574 elif expression.is_type("float"): 575 size_expression = expression.find(exp.DataTypeParam) 576 if size_expression: 577 size = int(size_expression.name) 578 expression = ( 579 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 580 ) 581 582 return super().datatype_sql(expression) 583 584 def version_sql(self, expression: exp.Version) -> str: 585 sql = super().version_sql(expression) 586 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function _create_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>}
def
temporary_storage_provider( self, expression: sqlglot.expressions.Create) -> sqlglot.expressions.Create:
523 def parameter_sql(self, expression: exp.Parameter) -> str: 524 this = self.sql(expression, "this") 525 parent = expression.parent 526 527 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 528 # We need to produce SET key = value instead of SET ${key} = value 529 return this 530 531 return f"${{{this}}}"
542 def constraint_sql(self, expression: exp.Constraint) -> str: 543 expression = expression.copy() 544 545 for prop in list(expression.find_all(exp.Properties)): 546 prop.pop() 547 548 this = self.sql(expression, "this") 549 expressions = self.expressions(expression, sep=" ", flat=True) 550 return f"CONSTRAINT {this} {expressions}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
566 def datatype_sql(self, expression: exp.DataType) -> str: 567 if ( 568 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 569 and not expression.expressions 570 ): 571 expression = exp.DataType.build("text") 572 elif expression.this in exp.DataType.TEMPORAL_TYPES: 573 expression = exp.DataType.build(expression.this) 574 elif expression.is_type("float"): 575 size_expression = expression.find(exp.DataTypeParam) 576 if size_expression: 577 size = int(size_expression.name) 578 expression = ( 579 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 580 ) 581 582 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
279 @classmethod 280 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 281 """Checks if text can be identified given an identify option. 282 283 Args: 284 text: The text to check. 285 identify: 286 "always" or `True`: Always returns true. 287 "safe": True if the identifier is case-insensitive. 288 289 Returns: 290 Whether or not the given text can be identified. 291 """ 292 if identify is True or identify == "always": 293 return True 294 295 if identify == "safe": 296 return not cls.case_sensitive(text) 297 298 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
TOKENIZER_CLASS =
<class 'Hive.Tokenizer'>
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOG_BASE_FIRST
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- SUPPORTS_PARAMETERS
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql