sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 left_to_substring_sql, 13 locate_to_strposition, 14 max_or_greatest, 15 min_or_least, 16 no_ilike_sql, 17 no_recursive_cte_sql, 18 no_safe_divide_sql, 19 no_trycast_sql, 20 regexp_extract_sql, 21 regexp_replace_sql, 22 rename_func, 23 right_to_substring_sql, 24 strposition_to_locate_sql, 25 struct_extract_sql, 26 time_format, 27 timestrtotime_sql, 28 var_map_sql, 29) 30from sqlglot.helper import seq_get 31from sqlglot.parser import parse_var_map 32from sqlglot.tokens import TokenType 33 34# (FuncType, Multiplier) 35DATE_DELTA_INTERVAL = { 36 "YEAR": ("ADD_MONTHS", 12), 37 "MONTH": ("ADD_MONTHS", 1), 38 "QUARTER": ("ADD_MONTHS", 3), 39 "WEEK": ("DATE_ADD", 7), 40 "DAY": ("DATE_ADD", 1), 41} 42 43TIME_DIFF_FACTOR = { 44 "MILLISECOND": " * 1000", 45 "SECOND": "", 46 "MINUTE": " / 60", 47 "HOUR": " / 3600", 48} 49 50DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 51 52 53def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 54 unit = expression.text("unit").upper() 55 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 56 57 if isinstance(expression, exp.DateSub): 58 multiplier *= -1 59 60 if expression.expression.is_number: 61 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 62 else: 63 modified_increment = expression.expression.copy() 64 if multiplier != 1: 65 modified_increment = exp.Mul( # type: ignore 66 this=modified_increment, expression=exp.Literal.number(multiplier) 67 ) 68 69 return self.func(func, expression.this, modified_increment) 70 71 72def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str: 73 unit = expression.text("unit").upper() 74 75 factor = TIME_DIFF_FACTOR.get(unit) 76 if factor is not None: 77 left = self.sql(expression, "this") 78 right = self.sql(expression, "expression") 79 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 80 return f"({sec_diff}){factor}" if factor else sec_diff 81 82 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 83 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 84 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 85 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 86 87 return f"{diff_sql}{multiplier_sql}" 88 89 90def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str: 91 this = expression.this 92 if isinstance(this, exp.Cast) and this.is_type("json") and this.this.is_string: 93 # Since FROM_JSON requires a nested type, we always wrap the json string with 94 # an array to ensure that "naked" strings like "'a'" will be handled correctly 95 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 96 97 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 98 to_json = self.func("TO_JSON", from_json) 99 100 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 101 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 102 103 return self.func("TO_JSON", this, expression.args.get("options")) 104 105 106def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str: 107 if expression.expression: 108 self.unsupported("Hive SORT_ARRAY does not support a comparator") 109 return f"SORT_ARRAY({self.sql(expression, 'this')})" 110 111 112def _property_sql(self: generator.Generator, expression: exp.Property) -> str: 113 return f"'{expression.name}'={self.sql(expression, 'value')}" 114 115 116def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str: 117 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 118 119 120def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str: 121 this = self.sql(expression, "this") 122 time_format = self.format_time(expression) 123 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 124 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 125 return f"CAST({this} AS DATE)" 126 127 128def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str: 129 this = self.sql(expression, "this") 130 time_format = self.format_time(expression) 131 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 132 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 133 return f"CAST({this} AS TIMESTAMP)" 134 135 136def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str: 137 this = self.sql(expression, "this") 138 time_format = self.format_time(expression) 139 return f"DATE_FORMAT({this}, {time_format})" 140 141 142def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str: 143 this = self.sql(expression, "this") 144 time_format = self.format_time(expression) 145 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 146 return f"TO_DATE({this}, {time_format})" 147 return f"TO_DATE({this})" 148 149 150class Hive(Dialect): 151 ALIAS_POST_TABLESAMPLE = True 152 IDENTIFIERS_CAN_START_WITH_DIGIT = True 153 154 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 155 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 156 157 TIME_MAPPING = { 158 "y": "%Y", 159 "Y": "%Y", 160 "YYYY": "%Y", 161 "yyyy": "%Y", 162 "YY": "%y", 163 "yy": "%y", 164 "MMMM": "%B", 165 "MMM": "%b", 166 "MM": "%m", 167 "M": "%-m", 168 "dd": "%d", 169 "d": "%-d", 170 "HH": "%H", 171 "H": "%-H", 172 "hh": "%I", 173 "h": "%-I", 174 "mm": "%M", 175 "m": "%-M", 176 "ss": "%S", 177 "s": "%-S", 178 "SSSSSS": "%f", 179 "a": "%p", 180 "DD": "%j", 181 "D": "%-j", 182 "E": "%a", 183 "EE": "%a", 184 "EEE": "%a", 185 "EEEE": "%A", 186 } 187 188 DATE_FORMAT = "'yyyy-MM-dd'" 189 DATEINT_FORMAT = "'yyyyMMdd'" 190 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 191 192 class Tokenizer(tokens.Tokenizer): 193 QUOTES = ["'", '"'] 194 IDENTIFIERS = ["`"] 195 STRING_ESCAPES = ["\\"] 196 ENCODE = "utf-8" 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ADD ARCHIVE": TokenType.COMMAND, 201 "ADD ARCHIVES": TokenType.COMMAND, 202 "ADD FILE": TokenType.COMMAND, 203 "ADD FILES": TokenType.COMMAND, 204 "ADD JAR": TokenType.COMMAND, 205 "ADD JARS": TokenType.COMMAND, 206 "MSCK REPAIR": TokenType.COMMAND, 207 "REFRESH": TokenType.COMMAND, 208 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 209 } 210 211 NUMERIC_LITERALS = { 212 "L": "BIGINT", 213 "S": "SMALLINT", 214 "Y": "TINYINT", 215 "D": "DOUBLE", 216 "F": "FLOAT", 217 "BD": "DECIMAL", 218 } 219 220 class Parser(parser.Parser): 221 LOG_DEFAULTS_TO_LN = True 222 STRICT_CAST = False 223 PARTITION_BY_TOKENS = {*parser.Parser.PARTITION_BY_TOKENS, TokenType.DISTRIBUTE_BY} 224 225 FUNCTIONS = { 226 **parser.Parser.FUNCTIONS, 227 "BASE64": exp.ToBase64.from_arg_list, 228 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 229 "COLLECT_SET": exp.SetAgg.from_arg_list, 230 "DATE_ADD": lambda args: exp.TsOrDsAdd( 231 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 232 ), 233 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 234 [ 235 exp.TimeStrToTime(this=seq_get(args, 0)), 236 seq_get(args, 1), 237 ] 238 ), 239 "DATE_SUB": lambda args: exp.TsOrDsAdd( 240 this=seq_get(args, 0), 241 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 242 unit=exp.Literal.string("DAY"), 243 ), 244 "DATEDIFF": lambda args: exp.DateDiff( 245 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 246 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 247 ), 248 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 249 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 250 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 251 "LOCATE": locate_to_strposition, 252 "MAP": parse_var_map, 253 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 254 "PERCENTILE": exp.Quantile.from_arg_list, 255 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 256 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 257 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 258 ), 259 "SIZE": exp.ArraySize.from_arg_list, 260 "SPLIT": exp.RegexpSplit.from_arg_list, 261 "STR_TO_MAP": lambda args: exp.StrToMap( 262 this=seq_get(args, 0), 263 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 264 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 265 ), 266 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 267 "TO_JSON": exp.JSONFormat.from_arg_list, 268 "UNBASE64": exp.FromBase64.from_arg_list, 269 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 270 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 271 } 272 273 NO_PAREN_FUNCTION_PARSERS = { 274 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 275 "TRANSFORM": lambda self: self._parse_transform(), 276 } 277 278 PROPERTY_PARSERS = { 279 **parser.Parser.PROPERTY_PARSERS, 280 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 281 expressions=self._parse_wrapped_csv(self._parse_property) 282 ), 283 } 284 285 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 286 if not self._match(TokenType.L_PAREN, advance=False): 287 self._retreat(self._index - 1) 288 return None 289 290 args = self._parse_wrapped_csv(self._parse_lambda) 291 row_format_before = self._parse_row_format(match_row=True) 292 293 record_writer = None 294 if self._match_text_seq("RECORDWRITER"): 295 record_writer = self._parse_string() 296 297 if not self._match(TokenType.USING): 298 return exp.Transform.from_arg_list(args) 299 300 command_script = self._parse_string() 301 302 self._match(TokenType.ALIAS) 303 schema = self._parse_schema() 304 305 row_format_after = self._parse_row_format(match_row=True) 306 record_reader = None 307 if self._match_text_seq("RECORDREADER"): 308 record_reader = self._parse_string() 309 310 return self.expression( 311 exp.QueryTransform, 312 expressions=args, 313 command_script=command_script, 314 schema=schema, 315 row_format_before=row_format_before, 316 record_writer=record_writer, 317 row_format_after=row_format_after, 318 record_reader=record_reader, 319 ) 320 321 def _parse_types( 322 self, check_func: bool = False, schema: bool = False 323 ) -> t.Optional[exp.Expression]: 324 """ 325 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 326 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 327 328 spark-sql (default)> select cast(1234 as varchar(2)); 329 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 330 char/varchar type and simply treats them as string type. Please use string type 331 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 332 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 333 334 1234 335 Time taken: 4.265 seconds, Fetched 1 row(s) 336 337 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 338 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 339 340 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 341 """ 342 this = super()._parse_types(check_func=check_func, schema=schema) 343 344 if this and not schema: 345 return this.transform( 346 lambda node: node.replace(exp.DataType.build("text")) 347 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 348 else node, 349 copy=False, 350 ) 351 352 return this 353 354 class Generator(generator.Generator): 355 LIMIT_FETCH = "LIMIT" 356 TABLESAMPLE_WITH_METHOD = False 357 TABLESAMPLE_SIZE_IS_PERCENT = True 358 JOIN_HINTS = False 359 TABLE_HINTS = False 360 QUERY_HINTS = False 361 INDEX_ON = "ON TABLE" 362 EXTRACT_ALLOWS_QUOTES = False 363 NVL2_SUPPORTED = False 364 365 TYPE_MAPPING = { 366 **generator.Generator.TYPE_MAPPING, 367 exp.DataType.Type.BIT: "BOOLEAN", 368 exp.DataType.Type.DATETIME: "TIMESTAMP", 369 exp.DataType.Type.TEXT: "STRING", 370 exp.DataType.Type.TIME: "TIMESTAMP", 371 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 372 exp.DataType.Type.VARBINARY: "BINARY", 373 } 374 375 TRANSFORMS = { 376 **generator.Generator.TRANSFORMS, 377 exp.Group: transforms.preprocess([transforms.unalias_group]), 378 exp.Select: transforms.preprocess( 379 [ 380 transforms.eliminate_qualify, 381 transforms.eliminate_distinct_on, 382 transforms.unnest_to_explode, 383 ] 384 ), 385 exp.Property: _property_sql, 386 exp.AnyValue: rename_func("FIRST"), 387 exp.ApproxDistinct: approx_count_distinct_sql, 388 exp.ArrayConcat: rename_func("CONCAT"), 389 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 390 exp.ArraySize: rename_func("SIZE"), 391 exp.ArraySort: _array_sort_sql, 392 exp.With: no_recursive_cte_sql, 393 exp.DateAdd: _add_date_sql, 394 exp.DateDiff: _date_diff_sql, 395 exp.DateStrToDate: rename_func("TO_DATE"), 396 exp.DateSub: _add_date_sql, 397 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 398 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 399 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 400 exp.FromBase64: rename_func("UNBASE64"), 401 exp.If: if_sql, 402 exp.ILike: no_ilike_sql, 403 exp.IsNan: rename_func("ISNAN"), 404 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 405 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 406 exp.JSONFormat: _json_format_sql, 407 exp.Left: left_to_substring_sql, 408 exp.Map: var_map_sql, 409 exp.Max: max_or_greatest, 410 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 411 exp.Min: min_or_least, 412 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 413 exp.VarMap: var_map_sql, 414 exp.Create: create_with_partitions_sql, 415 exp.Quantile: rename_func("PERCENTILE"), 416 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 417 exp.RegexpExtract: regexp_extract_sql, 418 exp.RegexpReplace: regexp_replace_sql, 419 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 420 exp.RegexpSplit: rename_func("SPLIT"), 421 exp.Right: right_to_substring_sql, 422 exp.SafeDivide: no_safe_divide_sql, 423 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 424 exp.SetAgg: rename_func("COLLECT_SET"), 425 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 426 exp.StrPosition: strposition_to_locate_sql, 427 exp.StrToDate: _str_to_date_sql, 428 exp.StrToTime: _str_to_time_sql, 429 exp.StrToUnix: _str_to_unix_sql, 430 exp.StructExtract: struct_extract_sql, 431 exp.TimeStrToDate: rename_func("TO_DATE"), 432 exp.TimeStrToTime: timestrtotime_sql, 433 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 434 exp.TimeToStr: _time_to_str, 435 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 436 exp.ToBase64: rename_func("BASE64"), 437 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 438 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 439 exp.TsOrDsToDate: _to_date_sql, 440 exp.TryCast: no_trycast_sql, 441 exp.UnixToStr: lambda self, e: self.func( 442 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 443 ), 444 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 445 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 446 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 447 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 448 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 449 exp.LastDateOfMonth: rename_func("LAST_DAY"), 450 exp.National: lambda self, e: self.national_sql(e, prefix=""), 451 } 452 453 PROPERTIES_LOCATION = { 454 **generator.Generator.PROPERTIES_LOCATION, 455 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 456 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 457 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 458 } 459 460 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 461 serde_props = self.sql(expression, "serde_properties") 462 serde_props = f" {serde_props}" if serde_props else "" 463 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 464 465 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 466 return self.func( 467 "COLLECT_LIST", 468 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 469 ) 470 471 def with_properties(self, properties: exp.Properties) -> str: 472 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 473 474 def datatype_sql(self, expression: exp.DataType) -> str: 475 if ( 476 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 477 and not expression.expressions 478 ): 479 expression = exp.DataType.build("text") 480 elif expression.this in exp.DataType.TEMPORAL_TYPES: 481 expression = exp.DataType.build(expression.this) 482 elif expression.is_type("float"): 483 size_expression = expression.find(exp.DataTypeSize) 484 if size_expression: 485 size = int(size_expression.name) 486 expression = ( 487 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 488 ) 489 490 return super().datatype_sql(expression)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
151class Hive(Dialect): 152 ALIAS_POST_TABLESAMPLE = True 153 IDENTIFIERS_CAN_START_WITH_DIGIT = True 154 155 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 156 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 157 158 TIME_MAPPING = { 159 "y": "%Y", 160 "Y": "%Y", 161 "YYYY": "%Y", 162 "yyyy": "%Y", 163 "YY": "%y", 164 "yy": "%y", 165 "MMMM": "%B", 166 "MMM": "%b", 167 "MM": "%m", 168 "M": "%-m", 169 "dd": "%d", 170 "d": "%-d", 171 "HH": "%H", 172 "H": "%-H", 173 "hh": "%I", 174 "h": "%-I", 175 "mm": "%M", 176 "m": "%-M", 177 "ss": "%S", 178 "s": "%-S", 179 "SSSSSS": "%f", 180 "a": "%p", 181 "DD": "%j", 182 "D": "%-j", 183 "E": "%a", 184 "EE": "%a", 185 "EEE": "%a", 186 "EEEE": "%A", 187 } 188 189 DATE_FORMAT = "'yyyy-MM-dd'" 190 DATEINT_FORMAT = "'yyyyMMdd'" 191 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 192 193 class Tokenizer(tokens.Tokenizer): 194 QUOTES = ["'", '"'] 195 IDENTIFIERS = ["`"] 196 STRING_ESCAPES = ["\\"] 197 ENCODE = "utf-8" 198 199 KEYWORDS = { 200 **tokens.Tokenizer.KEYWORDS, 201 "ADD ARCHIVE": TokenType.COMMAND, 202 "ADD ARCHIVES": TokenType.COMMAND, 203 "ADD FILE": TokenType.COMMAND, 204 "ADD FILES": TokenType.COMMAND, 205 "ADD JAR": TokenType.COMMAND, 206 "ADD JARS": TokenType.COMMAND, 207 "MSCK REPAIR": TokenType.COMMAND, 208 "REFRESH": TokenType.COMMAND, 209 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 210 } 211 212 NUMERIC_LITERALS = { 213 "L": "BIGINT", 214 "S": "SMALLINT", 215 "Y": "TINYINT", 216 "D": "DOUBLE", 217 "F": "FLOAT", 218 "BD": "DECIMAL", 219 } 220 221 class Parser(parser.Parser): 222 LOG_DEFAULTS_TO_LN = True 223 STRICT_CAST = False 224 PARTITION_BY_TOKENS = {*parser.Parser.PARTITION_BY_TOKENS, TokenType.DISTRIBUTE_BY} 225 226 FUNCTIONS = { 227 **parser.Parser.FUNCTIONS, 228 "BASE64": exp.ToBase64.from_arg_list, 229 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 230 "COLLECT_SET": exp.SetAgg.from_arg_list, 231 "DATE_ADD": lambda args: exp.TsOrDsAdd( 232 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 233 ), 234 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 235 [ 236 exp.TimeStrToTime(this=seq_get(args, 0)), 237 seq_get(args, 1), 238 ] 239 ), 240 "DATE_SUB": lambda args: exp.TsOrDsAdd( 241 this=seq_get(args, 0), 242 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 243 unit=exp.Literal.string("DAY"), 244 ), 245 "DATEDIFF": lambda args: exp.DateDiff( 246 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 247 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 248 ), 249 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 250 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 251 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 252 "LOCATE": locate_to_strposition, 253 "MAP": parse_var_map, 254 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 255 "PERCENTILE": exp.Quantile.from_arg_list, 256 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 257 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 258 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 259 ), 260 "SIZE": exp.ArraySize.from_arg_list, 261 "SPLIT": exp.RegexpSplit.from_arg_list, 262 "STR_TO_MAP": lambda args: exp.StrToMap( 263 this=seq_get(args, 0), 264 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 265 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 266 ), 267 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 268 "TO_JSON": exp.JSONFormat.from_arg_list, 269 "UNBASE64": exp.FromBase64.from_arg_list, 270 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 271 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 272 } 273 274 NO_PAREN_FUNCTION_PARSERS = { 275 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 276 "TRANSFORM": lambda self: self._parse_transform(), 277 } 278 279 PROPERTY_PARSERS = { 280 **parser.Parser.PROPERTY_PARSERS, 281 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 282 expressions=self._parse_wrapped_csv(self._parse_property) 283 ), 284 } 285 286 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 287 if not self._match(TokenType.L_PAREN, advance=False): 288 self._retreat(self._index - 1) 289 return None 290 291 args = self._parse_wrapped_csv(self._parse_lambda) 292 row_format_before = self._parse_row_format(match_row=True) 293 294 record_writer = None 295 if self._match_text_seq("RECORDWRITER"): 296 record_writer = self._parse_string() 297 298 if not self._match(TokenType.USING): 299 return exp.Transform.from_arg_list(args) 300 301 command_script = self._parse_string() 302 303 self._match(TokenType.ALIAS) 304 schema = self._parse_schema() 305 306 row_format_after = self._parse_row_format(match_row=True) 307 record_reader = None 308 if self._match_text_seq("RECORDREADER"): 309 record_reader = self._parse_string() 310 311 return self.expression( 312 exp.QueryTransform, 313 expressions=args, 314 command_script=command_script, 315 schema=schema, 316 row_format_before=row_format_before, 317 record_writer=record_writer, 318 row_format_after=row_format_after, 319 record_reader=record_reader, 320 ) 321 322 def _parse_types( 323 self, check_func: bool = False, schema: bool = False 324 ) -> t.Optional[exp.Expression]: 325 """ 326 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 327 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 328 329 spark-sql (default)> select cast(1234 as varchar(2)); 330 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 331 char/varchar type and simply treats them as string type. Please use string type 332 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 333 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 334 335 1234 336 Time taken: 4.265 seconds, Fetched 1 row(s) 337 338 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 339 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 340 341 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 342 """ 343 this = super()._parse_types(check_func=check_func, schema=schema) 344 345 if this and not schema: 346 return this.transform( 347 lambda node: node.replace(exp.DataType.build("text")) 348 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 349 else node, 350 copy=False, 351 ) 352 353 return this 354 355 class Generator(generator.Generator): 356 LIMIT_FETCH = "LIMIT" 357 TABLESAMPLE_WITH_METHOD = False 358 TABLESAMPLE_SIZE_IS_PERCENT = True 359 JOIN_HINTS = False 360 TABLE_HINTS = False 361 QUERY_HINTS = False 362 INDEX_ON = "ON TABLE" 363 EXTRACT_ALLOWS_QUOTES = False 364 NVL2_SUPPORTED = False 365 366 TYPE_MAPPING = { 367 **generator.Generator.TYPE_MAPPING, 368 exp.DataType.Type.BIT: "BOOLEAN", 369 exp.DataType.Type.DATETIME: "TIMESTAMP", 370 exp.DataType.Type.TEXT: "STRING", 371 exp.DataType.Type.TIME: "TIMESTAMP", 372 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 373 exp.DataType.Type.VARBINARY: "BINARY", 374 } 375 376 TRANSFORMS = { 377 **generator.Generator.TRANSFORMS, 378 exp.Group: transforms.preprocess([transforms.unalias_group]), 379 exp.Select: transforms.preprocess( 380 [ 381 transforms.eliminate_qualify, 382 transforms.eliminate_distinct_on, 383 transforms.unnest_to_explode, 384 ] 385 ), 386 exp.Property: _property_sql, 387 exp.AnyValue: rename_func("FIRST"), 388 exp.ApproxDistinct: approx_count_distinct_sql, 389 exp.ArrayConcat: rename_func("CONCAT"), 390 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 391 exp.ArraySize: rename_func("SIZE"), 392 exp.ArraySort: _array_sort_sql, 393 exp.With: no_recursive_cte_sql, 394 exp.DateAdd: _add_date_sql, 395 exp.DateDiff: _date_diff_sql, 396 exp.DateStrToDate: rename_func("TO_DATE"), 397 exp.DateSub: _add_date_sql, 398 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 399 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 400 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 401 exp.FromBase64: rename_func("UNBASE64"), 402 exp.If: if_sql, 403 exp.ILike: no_ilike_sql, 404 exp.IsNan: rename_func("ISNAN"), 405 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 406 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 407 exp.JSONFormat: _json_format_sql, 408 exp.Left: left_to_substring_sql, 409 exp.Map: var_map_sql, 410 exp.Max: max_or_greatest, 411 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 412 exp.Min: min_or_least, 413 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 414 exp.VarMap: var_map_sql, 415 exp.Create: create_with_partitions_sql, 416 exp.Quantile: rename_func("PERCENTILE"), 417 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 418 exp.RegexpExtract: regexp_extract_sql, 419 exp.RegexpReplace: regexp_replace_sql, 420 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 421 exp.RegexpSplit: rename_func("SPLIT"), 422 exp.Right: right_to_substring_sql, 423 exp.SafeDivide: no_safe_divide_sql, 424 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 425 exp.SetAgg: rename_func("COLLECT_SET"), 426 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 427 exp.StrPosition: strposition_to_locate_sql, 428 exp.StrToDate: _str_to_date_sql, 429 exp.StrToTime: _str_to_time_sql, 430 exp.StrToUnix: _str_to_unix_sql, 431 exp.StructExtract: struct_extract_sql, 432 exp.TimeStrToDate: rename_func("TO_DATE"), 433 exp.TimeStrToTime: timestrtotime_sql, 434 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 435 exp.TimeToStr: _time_to_str, 436 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 437 exp.ToBase64: rename_func("BASE64"), 438 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 439 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 440 exp.TsOrDsToDate: _to_date_sql, 441 exp.TryCast: no_trycast_sql, 442 exp.UnixToStr: lambda self, e: self.func( 443 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 444 ), 445 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 446 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 447 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 448 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 449 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 450 exp.LastDateOfMonth: rename_func("LAST_DAY"), 451 exp.National: lambda self, e: self.national_sql(e, prefix=""), 452 } 453 454 PROPERTIES_LOCATION = { 455 **generator.Generator.PROPERTIES_LOCATION, 456 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 457 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 458 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 459 } 460 461 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 462 serde_props = self.sql(expression, "serde_properties") 463 serde_props = f" {serde_props}" if serde_props else "" 464 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 465 466 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 467 return self.func( 468 "COLLECT_LIST", 469 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 470 ) 471 472 def with_properties(self, properties: exp.Properties) -> str: 473 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 474 475 def datatype_sql(self, expression: exp.DataType) -> str: 476 if ( 477 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 478 and not expression.expressions 479 ): 480 expression = exp.DataType.build("text") 481 elif expression.this in exp.DataType.TEMPORAL_TYPES: 482 expression = exp.DataType.build(expression.this) 483 elif expression.is_type("float"): 484 size_expression = expression.find(exp.DataTypeSize) 485 if size_expression: 486 size = int(size_expression.name) 487 expression = ( 488 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 489 ) 490 491 return super().datatype_sql(expression)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'sqlglot.dialects.hive.Hive.Tokenizer'>
parser_class =
<class 'sqlglot.dialects.hive.Hive.Parser'>
generator_class =
<class 'sqlglot.dialects.hive.Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
193 class Tokenizer(tokens.Tokenizer): 194 QUOTES = ["'", '"'] 195 IDENTIFIERS = ["`"] 196 STRING_ESCAPES = ["\\"] 197 ENCODE = "utf-8" 198 199 KEYWORDS = { 200 **tokens.Tokenizer.KEYWORDS, 201 "ADD ARCHIVE": TokenType.COMMAND, 202 "ADD ARCHIVES": TokenType.COMMAND, 203 "ADD FILE": TokenType.COMMAND, 204 "ADD FILES": TokenType.COMMAND, 205 "ADD JAR": TokenType.COMMAND, 206 "ADD JARS": TokenType.COMMAND, 207 "MSCK REPAIR": TokenType.COMMAND, 208 "REFRESH": TokenType.COMMAND, 209 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 210 } 211 212 NUMERIC_LITERALS = { 213 "L": "BIGINT", 214 "S": "SMALLINT", 215 "Y": "TINYINT", 216 "D": "DOUBLE", 217 "F": "FLOAT", 218 "BD": "DECIMAL", 219 }
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.COMMAND: 'COMMAND'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
221 class Parser(parser.Parser): 222 LOG_DEFAULTS_TO_LN = True 223 STRICT_CAST = False 224 PARTITION_BY_TOKENS = {*parser.Parser.PARTITION_BY_TOKENS, TokenType.DISTRIBUTE_BY} 225 226 FUNCTIONS = { 227 **parser.Parser.FUNCTIONS, 228 "BASE64": exp.ToBase64.from_arg_list, 229 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 230 "COLLECT_SET": exp.SetAgg.from_arg_list, 231 "DATE_ADD": lambda args: exp.TsOrDsAdd( 232 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 233 ), 234 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 235 [ 236 exp.TimeStrToTime(this=seq_get(args, 0)), 237 seq_get(args, 1), 238 ] 239 ), 240 "DATE_SUB": lambda args: exp.TsOrDsAdd( 241 this=seq_get(args, 0), 242 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 243 unit=exp.Literal.string("DAY"), 244 ), 245 "DATEDIFF": lambda args: exp.DateDiff( 246 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 247 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 248 ), 249 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 250 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 251 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 252 "LOCATE": locate_to_strposition, 253 "MAP": parse_var_map, 254 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 255 "PERCENTILE": exp.Quantile.from_arg_list, 256 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 257 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 258 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 259 ), 260 "SIZE": exp.ArraySize.from_arg_list, 261 "SPLIT": exp.RegexpSplit.from_arg_list, 262 "STR_TO_MAP": lambda args: exp.StrToMap( 263 this=seq_get(args, 0), 264 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 265 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 266 ), 267 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 268 "TO_JSON": exp.JSONFormat.from_arg_list, 269 "UNBASE64": exp.FromBase64.from_arg_list, 270 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 271 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 272 } 273 274 NO_PAREN_FUNCTION_PARSERS = { 275 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 276 "TRANSFORM": lambda self: self._parse_transform(), 277 } 278 279 PROPERTY_PARSERS = { 280 **parser.Parser.PROPERTY_PARSERS, 281 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 282 expressions=self._parse_wrapped_csv(self._parse_property) 283 ), 284 } 285 286 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 287 if not self._match(TokenType.L_PAREN, advance=False): 288 self._retreat(self._index - 1) 289 return None 290 291 args = self._parse_wrapped_csv(self._parse_lambda) 292 row_format_before = self._parse_row_format(match_row=True) 293 294 record_writer = None 295 if self._match_text_seq("RECORDWRITER"): 296 record_writer = self._parse_string() 297 298 if not self._match(TokenType.USING): 299 return exp.Transform.from_arg_list(args) 300 301 command_script = self._parse_string() 302 303 self._match(TokenType.ALIAS) 304 schema = self._parse_schema() 305 306 row_format_after = self._parse_row_format(match_row=True) 307 record_reader = None 308 if self._match_text_seq("RECORDREADER"): 309 record_reader = self._parse_string() 310 311 return self.expression( 312 exp.QueryTransform, 313 expressions=args, 314 command_script=command_script, 315 schema=schema, 316 row_format_before=row_format_before, 317 record_writer=record_writer, 318 row_format_after=row_format_after, 319 record_reader=record_reader, 320 ) 321 322 def _parse_types( 323 self, check_func: bool = False, schema: bool = False 324 ) -> t.Optional[exp.Expression]: 325 """ 326 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 327 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 328 329 spark-sql (default)> select cast(1234 as varchar(2)); 330 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 331 char/varchar type and simply treats them as string type. Please use string type 332 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 333 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 334 335 1234 336 Time taken: 4.265 seconds, Fetched 1 row(s) 337 338 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 339 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 340 341 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 342 """ 343 this = super()._parse_types(check_func=check_func, schema=schema) 344 345 if this and not schema: 346 return this.transform( 347 lambda node: node.replace(exp.DataType.build("text")) 348 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 349 else node, 350 copy=False, 351 ) 352 353 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
PARTITION_BY_TOKENS =
{<TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, <TokenType.PARTITION_BY: 'PARTITION_BY'>}
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KINDS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- ADD_CONSTRAINT_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
355 class Generator(generator.Generator): 356 LIMIT_FETCH = "LIMIT" 357 TABLESAMPLE_WITH_METHOD = False 358 TABLESAMPLE_SIZE_IS_PERCENT = True 359 JOIN_HINTS = False 360 TABLE_HINTS = False 361 QUERY_HINTS = False 362 INDEX_ON = "ON TABLE" 363 EXTRACT_ALLOWS_QUOTES = False 364 NVL2_SUPPORTED = False 365 366 TYPE_MAPPING = { 367 **generator.Generator.TYPE_MAPPING, 368 exp.DataType.Type.BIT: "BOOLEAN", 369 exp.DataType.Type.DATETIME: "TIMESTAMP", 370 exp.DataType.Type.TEXT: "STRING", 371 exp.DataType.Type.TIME: "TIMESTAMP", 372 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 373 exp.DataType.Type.VARBINARY: "BINARY", 374 } 375 376 TRANSFORMS = { 377 **generator.Generator.TRANSFORMS, 378 exp.Group: transforms.preprocess([transforms.unalias_group]), 379 exp.Select: transforms.preprocess( 380 [ 381 transforms.eliminate_qualify, 382 transforms.eliminate_distinct_on, 383 transforms.unnest_to_explode, 384 ] 385 ), 386 exp.Property: _property_sql, 387 exp.AnyValue: rename_func("FIRST"), 388 exp.ApproxDistinct: approx_count_distinct_sql, 389 exp.ArrayConcat: rename_func("CONCAT"), 390 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 391 exp.ArraySize: rename_func("SIZE"), 392 exp.ArraySort: _array_sort_sql, 393 exp.With: no_recursive_cte_sql, 394 exp.DateAdd: _add_date_sql, 395 exp.DateDiff: _date_diff_sql, 396 exp.DateStrToDate: rename_func("TO_DATE"), 397 exp.DateSub: _add_date_sql, 398 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 399 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 400 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 401 exp.FromBase64: rename_func("UNBASE64"), 402 exp.If: if_sql, 403 exp.ILike: no_ilike_sql, 404 exp.IsNan: rename_func("ISNAN"), 405 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 406 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 407 exp.JSONFormat: _json_format_sql, 408 exp.Left: left_to_substring_sql, 409 exp.Map: var_map_sql, 410 exp.Max: max_or_greatest, 411 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 412 exp.Min: min_or_least, 413 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 414 exp.VarMap: var_map_sql, 415 exp.Create: create_with_partitions_sql, 416 exp.Quantile: rename_func("PERCENTILE"), 417 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 418 exp.RegexpExtract: regexp_extract_sql, 419 exp.RegexpReplace: regexp_replace_sql, 420 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 421 exp.RegexpSplit: rename_func("SPLIT"), 422 exp.Right: right_to_substring_sql, 423 exp.SafeDivide: no_safe_divide_sql, 424 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 425 exp.SetAgg: rename_func("COLLECT_SET"), 426 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 427 exp.StrPosition: strposition_to_locate_sql, 428 exp.StrToDate: _str_to_date_sql, 429 exp.StrToTime: _str_to_time_sql, 430 exp.StrToUnix: _str_to_unix_sql, 431 exp.StructExtract: struct_extract_sql, 432 exp.TimeStrToDate: rename_func("TO_DATE"), 433 exp.TimeStrToTime: timestrtotime_sql, 434 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 435 exp.TimeToStr: _time_to_str, 436 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 437 exp.ToBase64: rename_func("BASE64"), 438 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 439 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 440 exp.TsOrDsToDate: _to_date_sql, 441 exp.TryCast: no_trycast_sql, 442 exp.UnixToStr: lambda self, e: self.func( 443 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 444 ), 445 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 446 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 447 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 448 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 449 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 450 exp.LastDateOfMonth: rename_func("LAST_DAY"), 451 exp.National: lambda self, e: self.national_sql(e, prefix=""), 452 } 453 454 PROPERTIES_LOCATION = { 455 **generator.Generator.PROPERTIES_LOCATION, 456 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 457 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 458 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 459 } 460 461 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 462 serde_props = self.sql(expression, "serde_properties") 463 serde_props = f" {serde_props}" if serde_props else "" 464 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 465 466 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 467 return self.func( 468 "COLLECT_LIST", 469 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 470 ) 471 472 def with_properties(self, properties: exp.Properties) -> str: 473 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 474 475 def datatype_sql(self, expression: exp.DataType) -> str: 476 if ( 477 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 478 and not expression.expressions 479 ): 480 expression = exp.DataType.build("text") 481 elif expression.this in exp.DataType.TEMPORAL_TYPES: 482 expression = exp.DataType.build(expression.this) 483 elif expression.is_type("float"): 484 size_expression = expression.find(exp.DataTypeSize) 485 if size_expression: 486 size = int(size_expression.name) 487 expression = ( 488 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 489 ) 490 491 return super().datatype_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalDayToSecondSpan'>: 'DAY TO SECOND', <class 'sqlglot.expressions.IntervalYearToMonthSpan'>: 'YEAR TO MONTH', <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function create_with_partitions_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>}
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
475 def datatype_sql(self, expression: exp.DataType) -> str: 476 if ( 477 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 478 and not expression.expressions 479 ): 480 expression = exp.DataType.build("text") 481 elif expression.this in exp.DataType.TEMPORAL_TYPES: 482 expression = exp.DataType.build(expression.this) 483 elif expression.is_type("float"): 484 size_expression = expression.find(exp.DataTypeSize) 485 if size_expression: 486 size = int(size_expression.name) 487 expression = ( 488 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 489 ) 490 491 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
257 @classmethod 258 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 259 """Checks if text can be identified given an identify option. 260 261 Args: 262 text: The text to check. 263 identify: 264 "always" or `True`: Always returns true. 265 "safe": True if the identifier is case-insensitive. 266 267 Returns: 268 Whether or not the given text can be identified. 269 """ 270 if identify is True or identify == "always": 271 return True 272 273 if identify == "safe": 274 return not cls.case_sensitive(text) 275 276 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- ESCAPE_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql