sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 left_to_substring_sql, 13 locate_to_strposition, 14 max_or_greatest, 15 min_or_least, 16 no_ilike_sql, 17 no_recursive_cte_sql, 18 no_safe_divide_sql, 19 no_trycast_sql, 20 regexp_extract_sql, 21 regexp_replace_sql, 22 rename_func, 23 right_to_substring_sql, 24 strposition_to_locate_sql, 25 struct_extract_sql, 26 time_format, 27 timestrtotime_sql, 28 var_map_sql, 29) 30from sqlglot.helper import seq_get 31from sqlglot.parser import parse_var_map 32from sqlglot.tokens import TokenType 33 34# (FuncType, Multiplier) 35DATE_DELTA_INTERVAL = { 36 "YEAR": ("ADD_MONTHS", 12), 37 "MONTH": ("ADD_MONTHS", 1), 38 "QUARTER": ("ADD_MONTHS", 3), 39 "WEEK": ("DATE_ADD", 7), 40 "DAY": ("DATE_ADD", 1), 41} 42 43TIME_DIFF_FACTOR = { 44 "MILLISECOND": " * 1000", 45 "SECOND": "", 46 "MINUTE": " / 60", 47 "HOUR": " / 3600", 48} 49 50DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 51 52 53def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 54 unit = expression.text("unit").upper() 55 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 56 57 if isinstance(expression, exp.DateSub): 58 multiplier *= -1 59 60 if expression.expression.is_number: 61 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 62 else: 63 modified_increment = expression.expression.copy() 64 if multiplier != 1: 65 modified_increment = exp.Mul( # type: ignore 66 this=modified_increment, expression=exp.Literal.number(multiplier) 67 ) 68 69 return self.func(func, expression.this, modified_increment) 70 71 72def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str: 73 unit = expression.text("unit").upper() 74 75 factor = TIME_DIFF_FACTOR.get(unit) 76 if factor is not None: 77 left = self.sql(expression, "this") 78 right = self.sql(expression, "expression") 79 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 80 return f"({sec_diff}){factor}" if factor else sec_diff 81 82 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 83 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 84 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 85 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 86 87 return f"{diff_sql}{multiplier_sql}" 88 89 90def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str: 91 this = expression.this 92 if isinstance(this, exp.Cast) and this.is_type("json") and this.this.is_string: 93 # Since FROM_JSON requires a nested type, we always wrap the json string with 94 # an array to ensure that "naked" strings like "'a'" will be handled correctly 95 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 96 97 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 98 to_json = self.func("TO_JSON", from_json) 99 100 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 101 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 102 103 return self.func("TO_JSON", this, expression.args.get("options")) 104 105 106def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str: 107 if expression.expression: 108 self.unsupported("Hive SORT_ARRAY does not support a comparator") 109 return f"SORT_ARRAY({self.sql(expression, 'this')})" 110 111 112def _property_sql(self: generator.Generator, expression: exp.Property) -> str: 113 return f"'{expression.name}'={self.sql(expression, 'value')}" 114 115 116def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str: 117 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 118 119 120def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str: 121 this = self.sql(expression, "this") 122 time_format = self.format_time(expression) 123 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 124 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 125 return f"CAST({this} AS DATE)" 126 127 128def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str: 129 this = self.sql(expression, "this") 130 time_format = self.format_time(expression) 131 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 132 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 133 return f"CAST({this} AS TIMESTAMP)" 134 135 136def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str: 137 this = self.sql(expression, "this") 138 time_format = self.format_time(expression) 139 return f"DATE_FORMAT({this}, {time_format})" 140 141 142def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str: 143 this = self.sql(expression, "this") 144 time_format = self.format_time(expression) 145 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 146 return f"TO_DATE({this}, {time_format})" 147 return f"TO_DATE({this})" 148 149 150class Hive(Dialect): 151 ALIAS_POST_TABLESAMPLE = True 152 IDENTIFIERS_CAN_START_WITH_DIGIT = True 153 154 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 155 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 156 157 TIME_MAPPING = { 158 "y": "%Y", 159 "Y": "%Y", 160 "YYYY": "%Y", 161 "yyyy": "%Y", 162 "YY": "%y", 163 "yy": "%y", 164 "MMMM": "%B", 165 "MMM": "%b", 166 "MM": "%m", 167 "M": "%-m", 168 "dd": "%d", 169 "d": "%-d", 170 "HH": "%H", 171 "H": "%-H", 172 "hh": "%I", 173 "h": "%-I", 174 "mm": "%M", 175 "m": "%-M", 176 "ss": "%S", 177 "s": "%-S", 178 "SSSSSS": "%f", 179 "a": "%p", 180 "DD": "%j", 181 "D": "%-j", 182 "E": "%a", 183 "EE": "%a", 184 "EEE": "%a", 185 "EEEE": "%A", 186 } 187 188 DATE_FORMAT = "'yyyy-MM-dd'" 189 DATEINT_FORMAT = "'yyyyMMdd'" 190 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 191 192 class Tokenizer(tokens.Tokenizer): 193 QUOTES = ["'", '"'] 194 IDENTIFIERS = ["`"] 195 STRING_ESCAPES = ["\\"] 196 ENCODE = "utf-8" 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ADD ARCHIVE": TokenType.COMMAND, 201 "ADD ARCHIVES": TokenType.COMMAND, 202 "ADD FILE": TokenType.COMMAND, 203 "ADD FILES": TokenType.COMMAND, 204 "ADD JAR": TokenType.COMMAND, 205 "ADD JARS": TokenType.COMMAND, 206 "MSCK REPAIR": TokenType.COMMAND, 207 "REFRESH": TokenType.COMMAND, 208 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 209 } 210 211 NUMERIC_LITERALS = { 212 "L": "BIGINT", 213 "S": "SMALLINT", 214 "Y": "TINYINT", 215 "D": "DOUBLE", 216 "F": "FLOAT", 217 "BD": "DECIMAL", 218 } 219 220 class Parser(parser.Parser): 221 LOG_DEFAULTS_TO_LN = True 222 STRICT_CAST = False 223 224 FUNCTIONS = { 225 **parser.Parser.FUNCTIONS, 226 "BASE64": exp.ToBase64.from_arg_list, 227 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 228 "COLLECT_SET": exp.SetAgg.from_arg_list, 229 "DATE_ADD": lambda args: exp.TsOrDsAdd( 230 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 231 ), 232 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 233 [ 234 exp.TimeStrToTime(this=seq_get(args, 0)), 235 seq_get(args, 1), 236 ] 237 ), 238 "DATE_SUB": lambda args: exp.TsOrDsAdd( 239 this=seq_get(args, 0), 240 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 241 unit=exp.Literal.string("DAY"), 242 ), 243 "DATEDIFF": lambda args: exp.DateDiff( 244 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 245 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 246 ), 247 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 248 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 249 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 250 "LOCATE": locate_to_strposition, 251 "MAP": parse_var_map, 252 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 253 "PERCENTILE": exp.Quantile.from_arg_list, 254 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 255 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 256 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 257 ), 258 "SIZE": exp.ArraySize.from_arg_list, 259 "SPLIT": exp.RegexpSplit.from_arg_list, 260 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 261 "TO_JSON": exp.JSONFormat.from_arg_list, 262 "UNBASE64": exp.FromBase64.from_arg_list, 263 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 264 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 265 } 266 267 NO_PAREN_FUNCTION_PARSERS = { 268 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 269 "TRANSFORM": lambda self: self._parse_transform(), 270 } 271 272 PROPERTY_PARSERS = { 273 **parser.Parser.PROPERTY_PARSERS, 274 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 275 expressions=self._parse_wrapped_csv(self._parse_property) 276 ), 277 } 278 279 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 280 if not self._match(TokenType.L_PAREN, advance=False): 281 self._retreat(self._index - 1) 282 return None 283 284 args = self._parse_wrapped_csv(self._parse_lambda) 285 row_format_before = self._parse_row_format(match_row=True) 286 287 record_writer = None 288 if self._match_text_seq("RECORDWRITER"): 289 record_writer = self._parse_string() 290 291 if not self._match(TokenType.USING): 292 return exp.Transform.from_arg_list(args) 293 294 command_script = self._parse_string() 295 296 self._match(TokenType.ALIAS) 297 schema = self._parse_schema() 298 299 row_format_after = self._parse_row_format(match_row=True) 300 record_reader = None 301 if self._match_text_seq("RECORDREADER"): 302 record_reader = self._parse_string() 303 304 return self.expression( 305 exp.QueryTransform, 306 expressions=args, 307 command_script=command_script, 308 schema=schema, 309 row_format_before=row_format_before, 310 record_writer=record_writer, 311 row_format_after=row_format_after, 312 record_reader=record_reader, 313 ) 314 315 def _parse_types( 316 self, check_func: bool = False, schema: bool = False 317 ) -> t.Optional[exp.Expression]: 318 """ 319 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 320 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 321 322 spark-sql (default)> select cast(1234 as varchar(2)); 323 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 324 char/varchar type and simply treats them as string type. Please use string type 325 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 326 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 327 328 1234 329 Time taken: 4.265 seconds, Fetched 1 row(s) 330 331 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 332 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 333 334 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 335 """ 336 this = super()._parse_types(check_func=check_func, schema=schema) 337 338 if this and not schema: 339 return this.transform( 340 lambda node: node.replace(exp.DataType.build("text")) 341 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 342 else node, 343 copy=False, 344 ) 345 346 return this 347 348 class Generator(generator.Generator): 349 LIMIT_FETCH = "LIMIT" 350 TABLESAMPLE_WITH_METHOD = False 351 TABLESAMPLE_SIZE_IS_PERCENT = True 352 JOIN_HINTS = False 353 TABLE_HINTS = False 354 QUERY_HINTS = False 355 INDEX_ON = "ON TABLE" 356 EXTRACT_ALLOWS_QUOTES = False 357 358 TYPE_MAPPING = { 359 **generator.Generator.TYPE_MAPPING, 360 exp.DataType.Type.BIT: "BOOLEAN", 361 exp.DataType.Type.DATETIME: "TIMESTAMP", 362 exp.DataType.Type.TEXT: "STRING", 363 exp.DataType.Type.TIME: "TIMESTAMP", 364 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 365 exp.DataType.Type.VARBINARY: "BINARY", 366 } 367 368 TRANSFORMS = { 369 **generator.Generator.TRANSFORMS, 370 exp.Group: transforms.preprocess([transforms.unalias_group]), 371 exp.Select: transforms.preprocess( 372 [ 373 transforms.eliminate_qualify, 374 transforms.eliminate_distinct_on, 375 transforms.unnest_to_explode, 376 ] 377 ), 378 exp.Property: _property_sql, 379 exp.ApproxDistinct: approx_count_distinct_sql, 380 exp.ArrayConcat: rename_func("CONCAT"), 381 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 382 exp.ArraySize: rename_func("SIZE"), 383 exp.ArraySort: _array_sort_sql, 384 exp.With: no_recursive_cte_sql, 385 exp.DateAdd: _add_date_sql, 386 exp.DateDiff: _date_diff_sql, 387 exp.DateStrToDate: rename_func("TO_DATE"), 388 exp.DateSub: _add_date_sql, 389 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 390 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 391 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 392 exp.FromBase64: rename_func("UNBASE64"), 393 exp.If: if_sql, 394 exp.ILike: no_ilike_sql, 395 exp.IsNan: rename_func("ISNAN"), 396 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 397 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 398 exp.JSONFormat: _json_format_sql, 399 exp.Left: left_to_substring_sql, 400 exp.Map: var_map_sql, 401 exp.Max: max_or_greatest, 402 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 403 exp.Min: min_or_least, 404 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 405 exp.VarMap: var_map_sql, 406 exp.Create: create_with_partitions_sql, 407 exp.Quantile: rename_func("PERCENTILE"), 408 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 409 exp.RegexpExtract: regexp_extract_sql, 410 exp.RegexpReplace: regexp_replace_sql, 411 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 412 exp.RegexpSplit: rename_func("SPLIT"), 413 exp.Right: right_to_substring_sql, 414 exp.SafeDivide: no_safe_divide_sql, 415 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 416 exp.SetAgg: rename_func("COLLECT_SET"), 417 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 418 exp.StrPosition: strposition_to_locate_sql, 419 exp.StrToDate: _str_to_date_sql, 420 exp.StrToTime: _str_to_time_sql, 421 exp.StrToUnix: _str_to_unix_sql, 422 exp.StructExtract: struct_extract_sql, 423 exp.TimeStrToDate: rename_func("TO_DATE"), 424 exp.TimeStrToTime: timestrtotime_sql, 425 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 426 exp.TimeToStr: _time_to_str, 427 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 428 exp.ToBase64: rename_func("BASE64"), 429 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 430 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 431 exp.TsOrDsToDate: _to_date_sql, 432 exp.TryCast: no_trycast_sql, 433 exp.UnixToStr: lambda self, e: self.func( 434 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 435 ), 436 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 437 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 438 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 439 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 440 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 441 exp.LastDateOfMonth: rename_func("LAST_DAY"), 442 exp.National: lambda self, e: self.national_sql(e, prefix=""), 443 } 444 445 PROPERTIES_LOCATION = { 446 **generator.Generator.PROPERTIES_LOCATION, 447 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 448 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 449 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 450 } 451 452 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 453 serde_props = self.sql(expression, "serde_properties") 454 serde_props = f" {serde_props}" if serde_props else "" 455 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 456 457 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 458 return self.func( 459 "COLLECT_LIST", 460 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 461 ) 462 463 def with_properties(self, properties: exp.Properties) -> str: 464 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 465 466 def datatype_sql(self, expression: exp.DataType) -> str: 467 if ( 468 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 469 and not expression.expressions 470 ): 471 expression = exp.DataType.build("text") 472 elif expression.this in exp.DataType.TEMPORAL_TYPES: 473 expression = exp.DataType.build(expression.this) 474 elif expression.is_type("float"): 475 size_expression = expression.find(exp.DataTypeSize) 476 if size_expression: 477 size = int(size_expression.name) 478 expression = ( 479 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 480 ) 481 482 return super().datatype_sql(expression)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
151class Hive(Dialect): 152 ALIAS_POST_TABLESAMPLE = True 153 IDENTIFIERS_CAN_START_WITH_DIGIT = True 154 155 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 156 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 157 158 TIME_MAPPING = { 159 "y": "%Y", 160 "Y": "%Y", 161 "YYYY": "%Y", 162 "yyyy": "%Y", 163 "YY": "%y", 164 "yy": "%y", 165 "MMMM": "%B", 166 "MMM": "%b", 167 "MM": "%m", 168 "M": "%-m", 169 "dd": "%d", 170 "d": "%-d", 171 "HH": "%H", 172 "H": "%-H", 173 "hh": "%I", 174 "h": "%-I", 175 "mm": "%M", 176 "m": "%-M", 177 "ss": "%S", 178 "s": "%-S", 179 "SSSSSS": "%f", 180 "a": "%p", 181 "DD": "%j", 182 "D": "%-j", 183 "E": "%a", 184 "EE": "%a", 185 "EEE": "%a", 186 "EEEE": "%A", 187 } 188 189 DATE_FORMAT = "'yyyy-MM-dd'" 190 DATEINT_FORMAT = "'yyyyMMdd'" 191 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 192 193 class Tokenizer(tokens.Tokenizer): 194 QUOTES = ["'", '"'] 195 IDENTIFIERS = ["`"] 196 STRING_ESCAPES = ["\\"] 197 ENCODE = "utf-8" 198 199 KEYWORDS = { 200 **tokens.Tokenizer.KEYWORDS, 201 "ADD ARCHIVE": TokenType.COMMAND, 202 "ADD ARCHIVES": TokenType.COMMAND, 203 "ADD FILE": TokenType.COMMAND, 204 "ADD FILES": TokenType.COMMAND, 205 "ADD JAR": TokenType.COMMAND, 206 "ADD JARS": TokenType.COMMAND, 207 "MSCK REPAIR": TokenType.COMMAND, 208 "REFRESH": TokenType.COMMAND, 209 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 210 } 211 212 NUMERIC_LITERALS = { 213 "L": "BIGINT", 214 "S": "SMALLINT", 215 "Y": "TINYINT", 216 "D": "DOUBLE", 217 "F": "FLOAT", 218 "BD": "DECIMAL", 219 } 220 221 class Parser(parser.Parser): 222 LOG_DEFAULTS_TO_LN = True 223 STRICT_CAST = False 224 225 FUNCTIONS = { 226 **parser.Parser.FUNCTIONS, 227 "BASE64": exp.ToBase64.from_arg_list, 228 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 229 "COLLECT_SET": exp.SetAgg.from_arg_list, 230 "DATE_ADD": lambda args: exp.TsOrDsAdd( 231 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 232 ), 233 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 234 [ 235 exp.TimeStrToTime(this=seq_get(args, 0)), 236 seq_get(args, 1), 237 ] 238 ), 239 "DATE_SUB": lambda args: exp.TsOrDsAdd( 240 this=seq_get(args, 0), 241 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 242 unit=exp.Literal.string("DAY"), 243 ), 244 "DATEDIFF": lambda args: exp.DateDiff( 245 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 246 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 247 ), 248 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 249 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 250 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 251 "LOCATE": locate_to_strposition, 252 "MAP": parse_var_map, 253 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 254 "PERCENTILE": exp.Quantile.from_arg_list, 255 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 256 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 257 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 258 ), 259 "SIZE": exp.ArraySize.from_arg_list, 260 "SPLIT": exp.RegexpSplit.from_arg_list, 261 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 262 "TO_JSON": exp.JSONFormat.from_arg_list, 263 "UNBASE64": exp.FromBase64.from_arg_list, 264 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 265 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 266 } 267 268 NO_PAREN_FUNCTION_PARSERS = { 269 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 270 "TRANSFORM": lambda self: self._parse_transform(), 271 } 272 273 PROPERTY_PARSERS = { 274 **parser.Parser.PROPERTY_PARSERS, 275 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 276 expressions=self._parse_wrapped_csv(self._parse_property) 277 ), 278 } 279 280 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 281 if not self._match(TokenType.L_PAREN, advance=False): 282 self._retreat(self._index - 1) 283 return None 284 285 args = self._parse_wrapped_csv(self._parse_lambda) 286 row_format_before = self._parse_row_format(match_row=True) 287 288 record_writer = None 289 if self._match_text_seq("RECORDWRITER"): 290 record_writer = self._parse_string() 291 292 if not self._match(TokenType.USING): 293 return exp.Transform.from_arg_list(args) 294 295 command_script = self._parse_string() 296 297 self._match(TokenType.ALIAS) 298 schema = self._parse_schema() 299 300 row_format_after = self._parse_row_format(match_row=True) 301 record_reader = None 302 if self._match_text_seq("RECORDREADER"): 303 record_reader = self._parse_string() 304 305 return self.expression( 306 exp.QueryTransform, 307 expressions=args, 308 command_script=command_script, 309 schema=schema, 310 row_format_before=row_format_before, 311 record_writer=record_writer, 312 row_format_after=row_format_after, 313 record_reader=record_reader, 314 ) 315 316 def _parse_types( 317 self, check_func: bool = False, schema: bool = False 318 ) -> t.Optional[exp.Expression]: 319 """ 320 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 321 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 322 323 spark-sql (default)> select cast(1234 as varchar(2)); 324 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 325 char/varchar type and simply treats them as string type. Please use string type 326 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 327 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 328 329 1234 330 Time taken: 4.265 seconds, Fetched 1 row(s) 331 332 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 333 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 334 335 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 336 """ 337 this = super()._parse_types(check_func=check_func, schema=schema) 338 339 if this and not schema: 340 return this.transform( 341 lambda node: node.replace(exp.DataType.build("text")) 342 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 343 else node, 344 copy=False, 345 ) 346 347 return this 348 349 class Generator(generator.Generator): 350 LIMIT_FETCH = "LIMIT" 351 TABLESAMPLE_WITH_METHOD = False 352 TABLESAMPLE_SIZE_IS_PERCENT = True 353 JOIN_HINTS = False 354 TABLE_HINTS = False 355 QUERY_HINTS = False 356 INDEX_ON = "ON TABLE" 357 EXTRACT_ALLOWS_QUOTES = False 358 359 TYPE_MAPPING = { 360 **generator.Generator.TYPE_MAPPING, 361 exp.DataType.Type.BIT: "BOOLEAN", 362 exp.DataType.Type.DATETIME: "TIMESTAMP", 363 exp.DataType.Type.TEXT: "STRING", 364 exp.DataType.Type.TIME: "TIMESTAMP", 365 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 366 exp.DataType.Type.VARBINARY: "BINARY", 367 } 368 369 TRANSFORMS = { 370 **generator.Generator.TRANSFORMS, 371 exp.Group: transforms.preprocess([transforms.unalias_group]), 372 exp.Select: transforms.preprocess( 373 [ 374 transforms.eliminate_qualify, 375 transforms.eliminate_distinct_on, 376 transforms.unnest_to_explode, 377 ] 378 ), 379 exp.Property: _property_sql, 380 exp.ApproxDistinct: approx_count_distinct_sql, 381 exp.ArrayConcat: rename_func("CONCAT"), 382 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 383 exp.ArraySize: rename_func("SIZE"), 384 exp.ArraySort: _array_sort_sql, 385 exp.With: no_recursive_cte_sql, 386 exp.DateAdd: _add_date_sql, 387 exp.DateDiff: _date_diff_sql, 388 exp.DateStrToDate: rename_func("TO_DATE"), 389 exp.DateSub: _add_date_sql, 390 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 391 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 392 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 393 exp.FromBase64: rename_func("UNBASE64"), 394 exp.If: if_sql, 395 exp.ILike: no_ilike_sql, 396 exp.IsNan: rename_func("ISNAN"), 397 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 398 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 399 exp.JSONFormat: _json_format_sql, 400 exp.Left: left_to_substring_sql, 401 exp.Map: var_map_sql, 402 exp.Max: max_or_greatest, 403 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 404 exp.Min: min_or_least, 405 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 406 exp.VarMap: var_map_sql, 407 exp.Create: create_with_partitions_sql, 408 exp.Quantile: rename_func("PERCENTILE"), 409 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 410 exp.RegexpExtract: regexp_extract_sql, 411 exp.RegexpReplace: regexp_replace_sql, 412 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 413 exp.RegexpSplit: rename_func("SPLIT"), 414 exp.Right: right_to_substring_sql, 415 exp.SafeDivide: no_safe_divide_sql, 416 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 417 exp.SetAgg: rename_func("COLLECT_SET"), 418 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 419 exp.StrPosition: strposition_to_locate_sql, 420 exp.StrToDate: _str_to_date_sql, 421 exp.StrToTime: _str_to_time_sql, 422 exp.StrToUnix: _str_to_unix_sql, 423 exp.StructExtract: struct_extract_sql, 424 exp.TimeStrToDate: rename_func("TO_DATE"), 425 exp.TimeStrToTime: timestrtotime_sql, 426 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 427 exp.TimeToStr: _time_to_str, 428 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 429 exp.ToBase64: rename_func("BASE64"), 430 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 431 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 432 exp.TsOrDsToDate: _to_date_sql, 433 exp.TryCast: no_trycast_sql, 434 exp.UnixToStr: lambda self, e: self.func( 435 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 436 ), 437 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 438 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 439 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 440 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 441 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 442 exp.LastDateOfMonth: rename_func("LAST_DAY"), 443 exp.National: lambda self, e: self.national_sql(e, prefix=""), 444 } 445 446 PROPERTIES_LOCATION = { 447 **generator.Generator.PROPERTIES_LOCATION, 448 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 449 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 450 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 451 } 452 453 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 454 serde_props = self.sql(expression, "serde_properties") 455 serde_props = f" {serde_props}" if serde_props else "" 456 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 457 458 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 459 return self.func( 460 "COLLECT_LIST", 461 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 462 ) 463 464 def with_properties(self, properties: exp.Properties) -> str: 465 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 466 467 def datatype_sql(self, expression: exp.DataType) -> str: 468 if ( 469 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 470 and not expression.expressions 471 ): 472 expression = exp.DataType.build("text") 473 elif expression.this in exp.DataType.TEMPORAL_TYPES: 474 expression = exp.DataType.build(expression.this) 475 elif expression.is_type("float"): 476 size_expression = expression.find(exp.DataTypeSize) 477 if size_expression: 478 size = int(size_expression.name) 479 expression = ( 480 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 481 ) 482 483 return super().datatype_sql(expression)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'sqlglot.dialects.hive.Hive.Tokenizer'>
parser_class =
<class 'sqlglot.dialects.hive.Hive.Parser'>
generator_class =
<class 'sqlglot.dialects.hive.Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
193 class Tokenizer(tokens.Tokenizer): 194 QUOTES = ["'", '"'] 195 IDENTIFIERS = ["`"] 196 STRING_ESCAPES = ["\\"] 197 ENCODE = "utf-8" 198 199 KEYWORDS = { 200 **tokens.Tokenizer.KEYWORDS, 201 "ADD ARCHIVE": TokenType.COMMAND, 202 "ADD ARCHIVES": TokenType.COMMAND, 203 "ADD FILE": TokenType.COMMAND, 204 "ADD FILES": TokenType.COMMAND, 205 "ADD JAR": TokenType.COMMAND, 206 "ADD JARS": TokenType.COMMAND, 207 "MSCK REPAIR": TokenType.COMMAND, 208 "REFRESH": TokenType.COMMAND, 209 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 210 } 211 212 NUMERIC_LITERALS = { 213 "L": "BIGINT", 214 "S": "SMALLINT", 215 "Y": "TINYINT", 216 "D": "DOUBLE", 217 "F": "FLOAT", 218 "BD": "DECIMAL", 219 }
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.COMMAND: 'COMMAND'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
221 class Parser(parser.Parser): 222 LOG_DEFAULTS_TO_LN = True 223 STRICT_CAST = False 224 225 FUNCTIONS = { 226 **parser.Parser.FUNCTIONS, 227 "BASE64": exp.ToBase64.from_arg_list, 228 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 229 "COLLECT_SET": exp.SetAgg.from_arg_list, 230 "DATE_ADD": lambda args: exp.TsOrDsAdd( 231 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 232 ), 233 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 234 [ 235 exp.TimeStrToTime(this=seq_get(args, 0)), 236 seq_get(args, 1), 237 ] 238 ), 239 "DATE_SUB": lambda args: exp.TsOrDsAdd( 240 this=seq_get(args, 0), 241 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 242 unit=exp.Literal.string("DAY"), 243 ), 244 "DATEDIFF": lambda args: exp.DateDiff( 245 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 246 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 247 ), 248 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 249 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 250 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 251 "LOCATE": locate_to_strposition, 252 "MAP": parse_var_map, 253 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 254 "PERCENTILE": exp.Quantile.from_arg_list, 255 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 256 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 257 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 258 ), 259 "SIZE": exp.ArraySize.from_arg_list, 260 "SPLIT": exp.RegexpSplit.from_arg_list, 261 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 262 "TO_JSON": exp.JSONFormat.from_arg_list, 263 "UNBASE64": exp.FromBase64.from_arg_list, 264 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 265 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 266 } 267 268 NO_PAREN_FUNCTION_PARSERS = { 269 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 270 "TRANSFORM": lambda self: self._parse_transform(), 271 } 272 273 PROPERTY_PARSERS = { 274 **parser.Parser.PROPERTY_PARSERS, 275 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 276 expressions=self._parse_wrapped_csv(self._parse_property) 277 ), 278 } 279 280 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 281 if not self._match(TokenType.L_PAREN, advance=False): 282 self._retreat(self._index - 1) 283 return None 284 285 args = self._parse_wrapped_csv(self._parse_lambda) 286 row_format_before = self._parse_row_format(match_row=True) 287 288 record_writer = None 289 if self._match_text_seq("RECORDWRITER"): 290 record_writer = self._parse_string() 291 292 if not self._match(TokenType.USING): 293 return exp.Transform.from_arg_list(args) 294 295 command_script = self._parse_string() 296 297 self._match(TokenType.ALIAS) 298 schema = self._parse_schema() 299 300 row_format_after = self._parse_row_format(match_row=True) 301 record_reader = None 302 if self._match_text_seq("RECORDREADER"): 303 record_reader = self._parse_string() 304 305 return self.expression( 306 exp.QueryTransform, 307 expressions=args, 308 command_script=command_script, 309 schema=schema, 310 row_format_before=row_format_before, 311 record_writer=record_writer, 312 row_format_after=row_format_after, 313 record_reader=record_reader, 314 ) 315 316 def _parse_types( 317 self, check_func: bool = False, schema: bool = False 318 ) -> t.Optional[exp.Expression]: 319 """ 320 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 321 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 322 323 spark-sql (default)> select cast(1234 as varchar(2)); 324 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 325 char/varchar type and simply treats them as string type. Please use string type 326 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 327 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 328 329 1234 330 Time taken: 4.265 seconds, Fetched 1 row(s) 331 332 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 333 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 334 335 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 336 """ 337 this = super()._parse_types(check_func=check_func, schema=schema) 338 339 if this and not schema: 340 return this.transform( 341 lambda node: node.replace(exp.DataType.build("text")) 342 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 343 else node, 344 copy=False, 345 ) 346 347 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KINDS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- ADD_CONSTRAINT_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
349 class Generator(generator.Generator): 350 LIMIT_FETCH = "LIMIT" 351 TABLESAMPLE_WITH_METHOD = False 352 TABLESAMPLE_SIZE_IS_PERCENT = True 353 JOIN_HINTS = False 354 TABLE_HINTS = False 355 QUERY_HINTS = False 356 INDEX_ON = "ON TABLE" 357 EXTRACT_ALLOWS_QUOTES = False 358 359 TYPE_MAPPING = { 360 **generator.Generator.TYPE_MAPPING, 361 exp.DataType.Type.BIT: "BOOLEAN", 362 exp.DataType.Type.DATETIME: "TIMESTAMP", 363 exp.DataType.Type.TEXT: "STRING", 364 exp.DataType.Type.TIME: "TIMESTAMP", 365 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 366 exp.DataType.Type.VARBINARY: "BINARY", 367 } 368 369 TRANSFORMS = { 370 **generator.Generator.TRANSFORMS, 371 exp.Group: transforms.preprocess([transforms.unalias_group]), 372 exp.Select: transforms.preprocess( 373 [ 374 transforms.eliminate_qualify, 375 transforms.eliminate_distinct_on, 376 transforms.unnest_to_explode, 377 ] 378 ), 379 exp.Property: _property_sql, 380 exp.ApproxDistinct: approx_count_distinct_sql, 381 exp.ArrayConcat: rename_func("CONCAT"), 382 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 383 exp.ArraySize: rename_func("SIZE"), 384 exp.ArraySort: _array_sort_sql, 385 exp.With: no_recursive_cte_sql, 386 exp.DateAdd: _add_date_sql, 387 exp.DateDiff: _date_diff_sql, 388 exp.DateStrToDate: rename_func("TO_DATE"), 389 exp.DateSub: _add_date_sql, 390 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 391 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 392 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 393 exp.FromBase64: rename_func("UNBASE64"), 394 exp.If: if_sql, 395 exp.ILike: no_ilike_sql, 396 exp.IsNan: rename_func("ISNAN"), 397 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 398 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 399 exp.JSONFormat: _json_format_sql, 400 exp.Left: left_to_substring_sql, 401 exp.Map: var_map_sql, 402 exp.Max: max_or_greatest, 403 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 404 exp.Min: min_or_least, 405 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 406 exp.VarMap: var_map_sql, 407 exp.Create: create_with_partitions_sql, 408 exp.Quantile: rename_func("PERCENTILE"), 409 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 410 exp.RegexpExtract: regexp_extract_sql, 411 exp.RegexpReplace: regexp_replace_sql, 412 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 413 exp.RegexpSplit: rename_func("SPLIT"), 414 exp.Right: right_to_substring_sql, 415 exp.SafeDivide: no_safe_divide_sql, 416 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 417 exp.SetAgg: rename_func("COLLECT_SET"), 418 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 419 exp.StrPosition: strposition_to_locate_sql, 420 exp.StrToDate: _str_to_date_sql, 421 exp.StrToTime: _str_to_time_sql, 422 exp.StrToUnix: _str_to_unix_sql, 423 exp.StructExtract: struct_extract_sql, 424 exp.TimeStrToDate: rename_func("TO_DATE"), 425 exp.TimeStrToTime: timestrtotime_sql, 426 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 427 exp.TimeToStr: _time_to_str, 428 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 429 exp.ToBase64: rename_func("BASE64"), 430 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 431 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 432 exp.TsOrDsToDate: _to_date_sql, 433 exp.TryCast: no_trycast_sql, 434 exp.UnixToStr: lambda self, e: self.func( 435 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 436 ), 437 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 438 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 439 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 440 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 441 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 442 exp.LastDateOfMonth: rename_func("LAST_DAY"), 443 exp.National: lambda self, e: self.national_sql(e, prefix=""), 444 } 445 446 PROPERTIES_LOCATION = { 447 **generator.Generator.PROPERTIES_LOCATION, 448 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 449 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 450 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 451 } 452 453 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 454 serde_props = self.sql(expression, "serde_properties") 455 serde_props = f" {serde_props}" if serde_props else "" 456 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 457 458 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 459 return self.func( 460 "COLLECT_LIST", 461 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 462 ) 463 464 def with_properties(self, properties: exp.Properties) -> str: 465 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 466 467 def datatype_sql(self, expression: exp.DataType) -> str: 468 if ( 469 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 470 and not expression.expressions 471 ): 472 expression = exp.DataType.build("text") 473 elif expression.this in exp.DataType.TEMPORAL_TYPES: 474 expression = exp.DataType.build(expression.this) 475 elif expression.is_type("float"): 476 size_expression = expression.find(exp.DataTypeSize) 477 if size_expression: 478 size = int(size_expression.name) 479 expression = ( 480 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 481 ) 482 483 return super().datatype_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalDayToSecondSpan'>: 'DAY TO SECOND', <class 'sqlglot.expressions.IntervalYearToMonthSpan'>: 'YEAR TO MONTH', <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function create_with_partitions_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>}
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
467 def datatype_sql(self, expression: exp.DataType) -> str: 468 if ( 469 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 470 and not expression.expressions 471 ): 472 expression = exp.DataType.build("text") 473 elif expression.this in exp.DataType.TEMPORAL_TYPES: 474 expression = exp.DataType.build(expression.this) 475 elif expression.is_type("float"): 476 size_expression = expression.find(exp.DataTypeSize) 477 if size_expression: 478 size = int(size_expression.name) 479 expression = ( 480 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 481 ) 482 483 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
257 @classmethod 258 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 259 """Checks if text can be identified given an identify option. 260 261 Args: 262 text: The text to check. 263 identify: 264 "always" or `True`: Always returns true. 265 "safe": True if the identifier is case-insensitive. 266 267 Returns: 268 Whether or not the given text can be identified. 269 """ 270 if identify is True or identify == "always": 271 return True 272 273 if identify == "safe": 274 return not cls.case_sensitive(text) 275 276 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- ESCAPE_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql