sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 left_to_substring_sql, 13 locate_to_strposition, 14 max_or_greatest, 15 min_or_least, 16 no_ilike_sql, 17 no_recursive_cte_sql, 18 no_safe_divide_sql, 19 no_trycast_sql, 20 rename_func, 21 right_to_substring_sql, 22 strposition_to_locate_sql, 23 struct_extract_sql, 24 timestrtotime_sql, 25 var_map_sql, 26) 27from sqlglot.helper import seq_get 28from sqlglot.parser import parse_var_map 29from sqlglot.tokens import TokenType 30 31# (FuncType, Multiplier) 32DATE_DELTA_INTERVAL = { 33 "YEAR": ("ADD_MONTHS", 12), 34 "MONTH": ("ADD_MONTHS", 1), 35 "QUARTER": ("ADD_MONTHS", 3), 36 "WEEK": ("DATE_ADD", 7), 37 "DAY": ("DATE_ADD", 1), 38} 39 40TIME_DIFF_FACTOR = { 41 "MILLISECOND": " * 1000", 42 "SECOND": "", 43 "MINUTE": " / 60", 44 "HOUR": " / 3600", 45} 46 47DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 48 49 50def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 51 unit = expression.text("unit").upper() 52 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 53 54 if isinstance(expression, exp.DateSub): 55 multiplier *= -1 56 57 if expression.expression.is_number: 58 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 59 else: 60 modified_increment = expression.expression 61 if multiplier != 1: 62 modified_increment = exp.Mul( # type: ignore 63 this=modified_increment, expression=exp.Literal.number(multiplier) 64 ) 65 66 return self.func(func, expression.this, modified_increment) 67 68 69def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str: 70 unit = expression.text("unit").upper() 71 72 factor = TIME_DIFF_FACTOR.get(unit) 73 if factor is not None: 74 left = self.sql(expression, "this") 75 right = self.sql(expression, "expression") 76 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 77 return f"({sec_diff}){factor}" if factor else sec_diff 78 79 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 80 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 81 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 82 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 83 return f"{diff_sql}{multiplier_sql}" 84 85 86def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str: 87 this = expression.this 88 89 if not this.type: 90 from sqlglot.optimizer.annotate_types import annotate_types 91 92 annotate_types(this) 93 94 if this.type.is_type("json"): 95 return self.sql(this) 96 return self.func("TO_JSON", this, expression.args.get("options")) 97 98 99def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str: 100 if expression.expression: 101 self.unsupported("Hive SORT_ARRAY does not support a comparator") 102 return f"SORT_ARRAY({self.sql(expression, 'this')})" 103 104 105def _property_sql(self: generator.Generator, expression: exp.Property) -> str: 106 return f"'{expression.name}'={self.sql(expression, 'value')}" 107 108 109def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str: 110 return self.func("UNIX_TIMESTAMP", expression.this, _time_format(self, expression)) 111 112 113def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str: 114 this = self.sql(expression, "this") 115 time_format = self.format_time(expression) 116 if time_format not in (Hive.time_format, Hive.date_format): 117 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 118 return f"CAST({this} AS DATE)" 119 120 121def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str: 122 this = self.sql(expression, "this") 123 time_format = self.format_time(expression) 124 if time_format not in (Hive.time_format, Hive.date_format): 125 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 126 return f"CAST({this} AS TIMESTAMP)" 127 128 129def _time_format( 130 self: generator.Generator, expression: exp.UnixToStr | exp.StrToUnix 131) -> t.Optional[str]: 132 time_format = self.format_time(expression) 133 if time_format == Hive.time_format: 134 return None 135 return time_format 136 137 138def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str: 139 this = self.sql(expression, "this") 140 time_format = self.format_time(expression) 141 return f"DATE_FORMAT({this}, {time_format})" 142 143 144def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str: 145 this = self.sql(expression, "this") 146 time_format = self.format_time(expression) 147 if time_format and time_format not in (Hive.time_format, Hive.date_format): 148 return f"TO_DATE({this}, {time_format})" 149 return f"TO_DATE({this})" 150 151 152class Hive(Dialect): 153 alias_post_tablesample = True 154 identifiers_can_start_with_digit = True 155 156 time_mapping = { 157 "y": "%Y", 158 "Y": "%Y", 159 "YYYY": "%Y", 160 "yyyy": "%Y", 161 "YY": "%y", 162 "yy": "%y", 163 "MMMM": "%B", 164 "MMM": "%b", 165 "MM": "%m", 166 "M": "%-m", 167 "dd": "%d", 168 "d": "%-d", 169 "HH": "%H", 170 "H": "%-H", 171 "hh": "%I", 172 "h": "%-I", 173 "mm": "%M", 174 "m": "%-M", 175 "ss": "%S", 176 "s": "%-S", 177 "SSSSSS": "%f", 178 "a": "%p", 179 "DD": "%j", 180 "D": "%-j", 181 "E": "%a", 182 "EE": "%a", 183 "EEE": "%a", 184 "EEEE": "%A", 185 } 186 187 date_format = "'yyyy-MM-dd'" 188 dateint_format = "'yyyyMMdd'" 189 time_format = "'yyyy-MM-dd HH:mm:ss'" 190 191 class Tokenizer(tokens.Tokenizer): 192 QUOTES = ["'", '"'] 193 IDENTIFIERS = ["`"] 194 STRING_ESCAPES = ["\\"] 195 ENCODE = "utf-8" 196 197 KEYWORDS = { 198 **tokens.Tokenizer.KEYWORDS, 199 "ADD ARCHIVE": TokenType.COMMAND, 200 "ADD ARCHIVES": TokenType.COMMAND, 201 "ADD FILE": TokenType.COMMAND, 202 "ADD FILES": TokenType.COMMAND, 203 "ADD JAR": TokenType.COMMAND, 204 "ADD JARS": TokenType.COMMAND, 205 "MSCK REPAIR": TokenType.COMMAND, 206 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 207 } 208 209 NUMERIC_LITERALS = { 210 "L": "BIGINT", 211 "S": "SMALLINT", 212 "Y": "TINYINT", 213 "D": "DOUBLE", 214 "F": "FLOAT", 215 "BD": "DECIMAL", 216 } 217 218 class Parser(parser.Parser): 219 LOG_DEFAULTS_TO_LN = True 220 STRICT_CAST = False 221 222 FUNCTIONS = { 223 **parser.Parser.FUNCTIONS, 224 "BASE64": exp.ToBase64.from_arg_list, 225 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 226 "DATE_ADD": lambda args: exp.TsOrDsAdd( 227 this=seq_get(args, 0), 228 expression=seq_get(args, 1), 229 unit=exp.Literal.string("DAY"), 230 ), 231 "DATEDIFF": lambda args: exp.DateDiff( 232 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 233 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 234 ), 235 "DATE_SUB": lambda args: exp.TsOrDsAdd( 236 this=seq_get(args, 0), 237 expression=exp.Mul( 238 this=seq_get(args, 1), 239 expression=exp.Literal.number(-1), 240 ), 241 unit=exp.Literal.string("DAY"), 242 ), 243 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 244 [ 245 exp.TimeStrToTime(this=seq_get(args, 0)), 246 seq_get(args, 1), 247 ] 248 ), 249 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 250 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 251 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 252 "LOCATE": locate_to_strposition, 253 "MAP": parse_var_map, 254 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 255 "PERCENTILE": exp.Quantile.from_arg_list, 256 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 257 "COLLECT_SET": exp.SetAgg.from_arg_list, 258 "SIZE": exp.ArraySize.from_arg_list, 259 "SPLIT": exp.RegexpSplit.from_arg_list, 260 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 261 "TO_JSON": exp.JSONFormat.from_arg_list, 262 "UNBASE64": exp.FromBase64.from_arg_list, 263 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 264 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 265 } 266 267 PROPERTY_PARSERS = { 268 **parser.Parser.PROPERTY_PARSERS, 269 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 270 expressions=self._parse_wrapped_csv(self._parse_property) 271 ), 272 } 273 274 QUERY_MODIFIER_PARSERS = { 275 **parser.Parser.QUERY_MODIFIER_PARSERS, 276 "distribute": lambda self: self._parse_sort(exp.Distribute, "DISTRIBUTE", "BY"), 277 "sort": lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 278 "cluster": lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 279 } 280 281 def _parse_types( 282 self, check_func: bool = False, schema: bool = False 283 ) -> t.Optional[exp.Expression]: 284 """ 285 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 286 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 287 288 spark-sql (default)> select cast(1234 as varchar(2)); 289 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 290 char/varchar type and simply treats them as string type. Please use string type 291 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 292 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 293 294 1234 295 Time taken: 4.265 seconds, Fetched 1 row(s) 296 297 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 298 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 299 300 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 301 """ 302 this = super()._parse_types(check_func=check_func, schema=schema) 303 304 if this and not schema: 305 return this.transform( 306 lambda node: node.replace(exp.DataType.build("text")) 307 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 308 else node, 309 copy=False, 310 ) 311 312 return this 313 314 class Generator(generator.Generator): 315 LIMIT_FETCH = "LIMIT" 316 TABLESAMPLE_WITH_METHOD = False 317 TABLESAMPLE_SIZE_IS_PERCENT = True 318 JOIN_HINTS = False 319 TABLE_HINTS = False 320 INDEX_ON = "ON TABLE" 321 322 TYPE_MAPPING = { 323 **generator.Generator.TYPE_MAPPING, 324 exp.DataType.Type.TEXT: "STRING", 325 exp.DataType.Type.DATETIME: "TIMESTAMP", 326 exp.DataType.Type.VARBINARY: "BINARY", 327 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 328 exp.DataType.Type.BIT: "BOOLEAN", 329 } 330 331 TRANSFORMS = { 332 **generator.Generator.TRANSFORMS, 333 exp.Group: transforms.preprocess([transforms.unalias_group]), 334 exp.Select: transforms.preprocess( 335 [ 336 transforms.eliminate_qualify, 337 transforms.eliminate_distinct_on, 338 transforms.unnest_to_explode, 339 ] 340 ), 341 exp.Property: _property_sql, 342 exp.ApproxDistinct: approx_count_distinct_sql, 343 exp.ArrayConcat: rename_func("CONCAT"), 344 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 345 exp.ArraySize: rename_func("SIZE"), 346 exp.ArraySort: _array_sort_sql, 347 exp.With: no_recursive_cte_sql, 348 exp.DateAdd: _add_date_sql, 349 exp.DateDiff: _date_diff_sql, 350 exp.DateStrToDate: rename_func("TO_DATE"), 351 exp.DateSub: _add_date_sql, 352 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.dateint_format}) AS INT)", 353 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.dateint_format})", 354 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 355 exp.FromBase64: rename_func("UNBASE64"), 356 exp.If: if_sql, 357 exp.ILike: no_ilike_sql, 358 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 359 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 360 exp.JSONFormat: _json_format_sql, 361 exp.Left: left_to_substring_sql, 362 exp.Map: var_map_sql, 363 exp.Max: max_or_greatest, 364 exp.Min: min_or_least, 365 exp.VarMap: var_map_sql, 366 exp.Create: create_with_partitions_sql, 367 exp.Quantile: rename_func("PERCENTILE"), 368 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 369 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 370 exp.RegexpSplit: rename_func("SPLIT"), 371 exp.Right: right_to_substring_sql, 372 exp.SafeDivide: no_safe_divide_sql, 373 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 374 exp.SetAgg: rename_func("COLLECT_SET"), 375 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 376 exp.StrPosition: strposition_to_locate_sql, 377 exp.StrToDate: _str_to_date_sql, 378 exp.StrToTime: _str_to_time_sql, 379 exp.StrToUnix: _str_to_unix_sql, 380 exp.StructExtract: struct_extract_sql, 381 exp.TimeStrToDate: rename_func("TO_DATE"), 382 exp.TimeStrToTime: timestrtotime_sql, 383 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 384 exp.TimeToStr: _time_to_str, 385 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 386 exp.ToBase64: rename_func("BASE64"), 387 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 388 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 389 exp.TsOrDsToDate: _to_date_sql, 390 exp.TryCast: no_trycast_sql, 391 exp.UnixToStr: lambda self, e: self.func( 392 "FROM_UNIXTIME", e.this, _time_format(self, e) 393 ), 394 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 395 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 396 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 397 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 398 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 399 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 400 exp.LastDateOfMonth: rename_func("LAST_DAY"), 401 exp.National: lambda self, e: self.national_sql(e, prefix=""), 402 } 403 404 PROPERTIES_LOCATION = { 405 **generator.Generator.PROPERTIES_LOCATION, 406 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 407 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 408 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 409 } 410 411 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 412 return self.func( 413 "COLLECT_LIST", 414 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 415 ) 416 417 def with_properties(self, properties: exp.Properties) -> str: 418 return self.properties( 419 properties, 420 prefix=self.seg("TBLPROPERTIES"), 421 ) 422 423 def datatype_sql(self, expression: exp.DataType) -> str: 424 if ( 425 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 426 and not expression.expressions 427 ): 428 expression = exp.DataType.build("text") 429 elif expression.this in exp.DataType.TEMPORAL_TYPES: 430 expression = exp.DataType.build(expression.this) 431 432 return super().datatype_sql(expression) 433 434 def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]: 435 return super().after_having_modifiers(expression) + [ 436 self.sql(expression, "distribute"), 437 self.sql(expression, "sort"), 438 self.sql(expression, "cluster"), 439 ]
153class Hive(Dialect): 154 alias_post_tablesample = True 155 identifiers_can_start_with_digit = True 156 157 time_mapping = { 158 "y": "%Y", 159 "Y": "%Y", 160 "YYYY": "%Y", 161 "yyyy": "%Y", 162 "YY": "%y", 163 "yy": "%y", 164 "MMMM": "%B", 165 "MMM": "%b", 166 "MM": "%m", 167 "M": "%-m", 168 "dd": "%d", 169 "d": "%-d", 170 "HH": "%H", 171 "H": "%-H", 172 "hh": "%I", 173 "h": "%-I", 174 "mm": "%M", 175 "m": "%-M", 176 "ss": "%S", 177 "s": "%-S", 178 "SSSSSS": "%f", 179 "a": "%p", 180 "DD": "%j", 181 "D": "%-j", 182 "E": "%a", 183 "EE": "%a", 184 "EEE": "%a", 185 "EEEE": "%A", 186 } 187 188 date_format = "'yyyy-MM-dd'" 189 dateint_format = "'yyyyMMdd'" 190 time_format = "'yyyy-MM-dd HH:mm:ss'" 191 192 class Tokenizer(tokens.Tokenizer): 193 QUOTES = ["'", '"'] 194 IDENTIFIERS = ["`"] 195 STRING_ESCAPES = ["\\"] 196 ENCODE = "utf-8" 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ADD ARCHIVE": TokenType.COMMAND, 201 "ADD ARCHIVES": TokenType.COMMAND, 202 "ADD FILE": TokenType.COMMAND, 203 "ADD FILES": TokenType.COMMAND, 204 "ADD JAR": TokenType.COMMAND, 205 "ADD JARS": TokenType.COMMAND, 206 "MSCK REPAIR": TokenType.COMMAND, 207 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 208 } 209 210 NUMERIC_LITERALS = { 211 "L": "BIGINT", 212 "S": "SMALLINT", 213 "Y": "TINYINT", 214 "D": "DOUBLE", 215 "F": "FLOAT", 216 "BD": "DECIMAL", 217 } 218 219 class Parser(parser.Parser): 220 LOG_DEFAULTS_TO_LN = True 221 STRICT_CAST = False 222 223 FUNCTIONS = { 224 **parser.Parser.FUNCTIONS, 225 "BASE64": exp.ToBase64.from_arg_list, 226 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 227 "DATE_ADD": lambda args: exp.TsOrDsAdd( 228 this=seq_get(args, 0), 229 expression=seq_get(args, 1), 230 unit=exp.Literal.string("DAY"), 231 ), 232 "DATEDIFF": lambda args: exp.DateDiff( 233 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 234 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 235 ), 236 "DATE_SUB": lambda args: exp.TsOrDsAdd( 237 this=seq_get(args, 0), 238 expression=exp.Mul( 239 this=seq_get(args, 1), 240 expression=exp.Literal.number(-1), 241 ), 242 unit=exp.Literal.string("DAY"), 243 ), 244 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 245 [ 246 exp.TimeStrToTime(this=seq_get(args, 0)), 247 seq_get(args, 1), 248 ] 249 ), 250 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 251 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 252 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 253 "LOCATE": locate_to_strposition, 254 "MAP": parse_var_map, 255 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 256 "PERCENTILE": exp.Quantile.from_arg_list, 257 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 258 "COLLECT_SET": exp.SetAgg.from_arg_list, 259 "SIZE": exp.ArraySize.from_arg_list, 260 "SPLIT": exp.RegexpSplit.from_arg_list, 261 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 262 "TO_JSON": exp.JSONFormat.from_arg_list, 263 "UNBASE64": exp.FromBase64.from_arg_list, 264 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 265 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 266 } 267 268 PROPERTY_PARSERS = { 269 **parser.Parser.PROPERTY_PARSERS, 270 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 271 expressions=self._parse_wrapped_csv(self._parse_property) 272 ), 273 } 274 275 QUERY_MODIFIER_PARSERS = { 276 **parser.Parser.QUERY_MODIFIER_PARSERS, 277 "distribute": lambda self: self._parse_sort(exp.Distribute, "DISTRIBUTE", "BY"), 278 "sort": lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 279 "cluster": lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 280 } 281 282 def _parse_types( 283 self, check_func: bool = False, schema: bool = False 284 ) -> t.Optional[exp.Expression]: 285 """ 286 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 287 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 288 289 spark-sql (default)> select cast(1234 as varchar(2)); 290 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 291 char/varchar type and simply treats them as string type. Please use string type 292 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 293 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 294 295 1234 296 Time taken: 4.265 seconds, Fetched 1 row(s) 297 298 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 299 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 300 301 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 302 """ 303 this = super()._parse_types(check_func=check_func, schema=schema) 304 305 if this and not schema: 306 return this.transform( 307 lambda node: node.replace(exp.DataType.build("text")) 308 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 309 else node, 310 copy=False, 311 ) 312 313 return this 314 315 class Generator(generator.Generator): 316 LIMIT_FETCH = "LIMIT" 317 TABLESAMPLE_WITH_METHOD = False 318 TABLESAMPLE_SIZE_IS_PERCENT = True 319 JOIN_HINTS = False 320 TABLE_HINTS = False 321 INDEX_ON = "ON TABLE" 322 323 TYPE_MAPPING = { 324 **generator.Generator.TYPE_MAPPING, 325 exp.DataType.Type.TEXT: "STRING", 326 exp.DataType.Type.DATETIME: "TIMESTAMP", 327 exp.DataType.Type.VARBINARY: "BINARY", 328 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 329 exp.DataType.Type.BIT: "BOOLEAN", 330 } 331 332 TRANSFORMS = { 333 **generator.Generator.TRANSFORMS, 334 exp.Group: transforms.preprocess([transforms.unalias_group]), 335 exp.Select: transforms.preprocess( 336 [ 337 transforms.eliminate_qualify, 338 transforms.eliminate_distinct_on, 339 transforms.unnest_to_explode, 340 ] 341 ), 342 exp.Property: _property_sql, 343 exp.ApproxDistinct: approx_count_distinct_sql, 344 exp.ArrayConcat: rename_func("CONCAT"), 345 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 346 exp.ArraySize: rename_func("SIZE"), 347 exp.ArraySort: _array_sort_sql, 348 exp.With: no_recursive_cte_sql, 349 exp.DateAdd: _add_date_sql, 350 exp.DateDiff: _date_diff_sql, 351 exp.DateStrToDate: rename_func("TO_DATE"), 352 exp.DateSub: _add_date_sql, 353 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.dateint_format}) AS INT)", 354 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.dateint_format})", 355 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 356 exp.FromBase64: rename_func("UNBASE64"), 357 exp.If: if_sql, 358 exp.ILike: no_ilike_sql, 359 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 360 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 361 exp.JSONFormat: _json_format_sql, 362 exp.Left: left_to_substring_sql, 363 exp.Map: var_map_sql, 364 exp.Max: max_or_greatest, 365 exp.Min: min_or_least, 366 exp.VarMap: var_map_sql, 367 exp.Create: create_with_partitions_sql, 368 exp.Quantile: rename_func("PERCENTILE"), 369 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 370 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 371 exp.RegexpSplit: rename_func("SPLIT"), 372 exp.Right: right_to_substring_sql, 373 exp.SafeDivide: no_safe_divide_sql, 374 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 375 exp.SetAgg: rename_func("COLLECT_SET"), 376 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 377 exp.StrPosition: strposition_to_locate_sql, 378 exp.StrToDate: _str_to_date_sql, 379 exp.StrToTime: _str_to_time_sql, 380 exp.StrToUnix: _str_to_unix_sql, 381 exp.StructExtract: struct_extract_sql, 382 exp.TimeStrToDate: rename_func("TO_DATE"), 383 exp.TimeStrToTime: timestrtotime_sql, 384 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 385 exp.TimeToStr: _time_to_str, 386 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 387 exp.ToBase64: rename_func("BASE64"), 388 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 389 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 390 exp.TsOrDsToDate: _to_date_sql, 391 exp.TryCast: no_trycast_sql, 392 exp.UnixToStr: lambda self, e: self.func( 393 "FROM_UNIXTIME", e.this, _time_format(self, e) 394 ), 395 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 396 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 397 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 398 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 399 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 400 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 401 exp.LastDateOfMonth: rename_func("LAST_DAY"), 402 exp.National: lambda self, e: self.national_sql(e, prefix=""), 403 } 404 405 PROPERTIES_LOCATION = { 406 **generator.Generator.PROPERTIES_LOCATION, 407 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 408 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 409 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 410 } 411 412 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 413 return self.func( 414 "COLLECT_LIST", 415 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 416 ) 417 418 def with_properties(self, properties: exp.Properties) -> str: 419 return self.properties( 420 properties, 421 prefix=self.seg("TBLPROPERTIES"), 422 ) 423 424 def datatype_sql(self, expression: exp.DataType) -> str: 425 if ( 426 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 427 and not expression.expressions 428 ): 429 expression = exp.DataType.build("text") 430 elif expression.this in exp.DataType.TEMPORAL_TYPES: 431 expression = exp.DataType.build(expression.this) 432 433 return super().datatype_sql(expression) 434 435 def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]: 436 return super().after_having_modifiers(expression) + [ 437 self.sql(expression, "distribute"), 438 self.sql(expression, "sort"), 439 self.sql(expression, "cluster"), 440 ]
192 class Tokenizer(tokens.Tokenizer): 193 QUOTES = ["'", '"'] 194 IDENTIFIERS = ["`"] 195 STRING_ESCAPES = ["\\"] 196 ENCODE = "utf-8" 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ADD ARCHIVE": TokenType.COMMAND, 201 "ADD ARCHIVES": TokenType.COMMAND, 202 "ADD FILE": TokenType.COMMAND, 203 "ADD FILES": TokenType.COMMAND, 204 "ADD JAR": TokenType.COMMAND, 205 "ADD JARS": TokenType.COMMAND, 206 "MSCK REPAIR": TokenType.COMMAND, 207 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 208 } 209 210 NUMERIC_LITERALS = { 211 "L": "BIGINT", 212 "S": "SMALLINT", 213 "Y": "TINYINT", 214 "D": "DOUBLE", 215 "F": "FLOAT", 216 "BD": "DECIMAL", 217 }
Inherited Members
219 class Parser(parser.Parser): 220 LOG_DEFAULTS_TO_LN = True 221 STRICT_CAST = False 222 223 FUNCTIONS = { 224 **parser.Parser.FUNCTIONS, 225 "BASE64": exp.ToBase64.from_arg_list, 226 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 227 "DATE_ADD": lambda args: exp.TsOrDsAdd( 228 this=seq_get(args, 0), 229 expression=seq_get(args, 1), 230 unit=exp.Literal.string("DAY"), 231 ), 232 "DATEDIFF": lambda args: exp.DateDiff( 233 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 234 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 235 ), 236 "DATE_SUB": lambda args: exp.TsOrDsAdd( 237 this=seq_get(args, 0), 238 expression=exp.Mul( 239 this=seq_get(args, 1), 240 expression=exp.Literal.number(-1), 241 ), 242 unit=exp.Literal.string("DAY"), 243 ), 244 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 245 [ 246 exp.TimeStrToTime(this=seq_get(args, 0)), 247 seq_get(args, 1), 248 ] 249 ), 250 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 251 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 252 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 253 "LOCATE": locate_to_strposition, 254 "MAP": parse_var_map, 255 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 256 "PERCENTILE": exp.Quantile.from_arg_list, 257 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 258 "COLLECT_SET": exp.SetAgg.from_arg_list, 259 "SIZE": exp.ArraySize.from_arg_list, 260 "SPLIT": exp.RegexpSplit.from_arg_list, 261 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 262 "TO_JSON": exp.JSONFormat.from_arg_list, 263 "UNBASE64": exp.FromBase64.from_arg_list, 264 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 265 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 266 } 267 268 PROPERTY_PARSERS = { 269 **parser.Parser.PROPERTY_PARSERS, 270 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 271 expressions=self._parse_wrapped_csv(self._parse_property) 272 ), 273 } 274 275 QUERY_MODIFIER_PARSERS = { 276 **parser.Parser.QUERY_MODIFIER_PARSERS, 277 "distribute": lambda self: self._parse_sort(exp.Distribute, "DISTRIBUTE", "BY"), 278 "sort": lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 279 "cluster": lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 280 } 281 282 def _parse_types( 283 self, check_func: bool = False, schema: bool = False 284 ) -> t.Optional[exp.Expression]: 285 """ 286 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 287 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 288 289 spark-sql (default)> select cast(1234 as varchar(2)); 290 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 291 char/varchar type and simply treats them as string type. Please use string type 292 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 293 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 294 295 1234 296 Time taken: 4.265 seconds, Fetched 1 row(s) 297 298 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 299 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 300 301 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 302 """ 303 this = super()._parse_types(check_func=check_func, schema=schema) 304 305 if this and not schema: 306 return this.transform( 307 lambda node: node.replace(exp.DataType.build("text")) 308 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 309 else node, 310 copy=False, 311 ) 312 313 return this
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Inherited Members
315 class Generator(generator.Generator): 316 LIMIT_FETCH = "LIMIT" 317 TABLESAMPLE_WITH_METHOD = False 318 TABLESAMPLE_SIZE_IS_PERCENT = True 319 JOIN_HINTS = False 320 TABLE_HINTS = False 321 INDEX_ON = "ON TABLE" 322 323 TYPE_MAPPING = { 324 **generator.Generator.TYPE_MAPPING, 325 exp.DataType.Type.TEXT: "STRING", 326 exp.DataType.Type.DATETIME: "TIMESTAMP", 327 exp.DataType.Type.VARBINARY: "BINARY", 328 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 329 exp.DataType.Type.BIT: "BOOLEAN", 330 } 331 332 TRANSFORMS = { 333 **generator.Generator.TRANSFORMS, 334 exp.Group: transforms.preprocess([transforms.unalias_group]), 335 exp.Select: transforms.preprocess( 336 [ 337 transforms.eliminate_qualify, 338 transforms.eliminate_distinct_on, 339 transforms.unnest_to_explode, 340 ] 341 ), 342 exp.Property: _property_sql, 343 exp.ApproxDistinct: approx_count_distinct_sql, 344 exp.ArrayConcat: rename_func("CONCAT"), 345 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 346 exp.ArraySize: rename_func("SIZE"), 347 exp.ArraySort: _array_sort_sql, 348 exp.With: no_recursive_cte_sql, 349 exp.DateAdd: _add_date_sql, 350 exp.DateDiff: _date_diff_sql, 351 exp.DateStrToDate: rename_func("TO_DATE"), 352 exp.DateSub: _add_date_sql, 353 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.dateint_format}) AS INT)", 354 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.dateint_format})", 355 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 356 exp.FromBase64: rename_func("UNBASE64"), 357 exp.If: if_sql, 358 exp.ILike: no_ilike_sql, 359 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 360 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 361 exp.JSONFormat: _json_format_sql, 362 exp.Left: left_to_substring_sql, 363 exp.Map: var_map_sql, 364 exp.Max: max_or_greatest, 365 exp.Min: min_or_least, 366 exp.VarMap: var_map_sql, 367 exp.Create: create_with_partitions_sql, 368 exp.Quantile: rename_func("PERCENTILE"), 369 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 370 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 371 exp.RegexpSplit: rename_func("SPLIT"), 372 exp.Right: right_to_substring_sql, 373 exp.SafeDivide: no_safe_divide_sql, 374 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 375 exp.SetAgg: rename_func("COLLECT_SET"), 376 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 377 exp.StrPosition: strposition_to_locate_sql, 378 exp.StrToDate: _str_to_date_sql, 379 exp.StrToTime: _str_to_time_sql, 380 exp.StrToUnix: _str_to_unix_sql, 381 exp.StructExtract: struct_extract_sql, 382 exp.TimeStrToDate: rename_func("TO_DATE"), 383 exp.TimeStrToTime: timestrtotime_sql, 384 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 385 exp.TimeToStr: _time_to_str, 386 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 387 exp.ToBase64: rename_func("BASE64"), 388 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 389 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 390 exp.TsOrDsToDate: _to_date_sql, 391 exp.TryCast: no_trycast_sql, 392 exp.UnixToStr: lambda self, e: self.func( 393 "FROM_UNIXTIME", e.this, _time_format(self, e) 394 ), 395 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 396 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 397 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 398 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 399 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 400 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 401 exp.LastDateOfMonth: rename_func("LAST_DAY"), 402 exp.National: lambda self, e: self.national_sql(e, prefix=""), 403 } 404 405 PROPERTIES_LOCATION = { 406 **generator.Generator.PROPERTIES_LOCATION, 407 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 408 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 409 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 410 } 411 412 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 413 return self.func( 414 "COLLECT_LIST", 415 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 416 ) 417 418 def with_properties(self, properties: exp.Properties) -> str: 419 return self.properties( 420 properties, 421 prefix=self.seg("TBLPROPERTIES"), 422 ) 423 424 def datatype_sql(self, expression: exp.DataType) -> str: 425 if ( 426 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 427 and not expression.expressions 428 ): 429 expression = exp.DataType.build("text") 430 elif expression.this in exp.DataType.TEMPORAL_TYPES: 431 expression = exp.DataType.build(expression.this) 432 433 return super().datatype_sql(expression) 434 435 def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]: 436 return super().after_having_modifiers(expression) + [ 437 self.sql(expression, "distribute"), 438 self.sql(expression, "sort"), 439 self.sql(expression, "cluster"), 440 ]
Generator interprets the given syntax tree and produces a SQL string as an output.
Arguments:
- time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
- time_trie (trie): a trie of the time_mapping keys
- pretty (bool): if set to True the returned string will be formatted. Default: False.
- quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
- quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
- identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
- identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
- bit_start (str): specifies which starting character to use to delimit bit literals. Default: None.
- bit_end (str): specifies which ending character to use to delimit bit literals. Default: None.
- hex_start (str): specifies which starting character to use to delimit hex literals. Default: None.
- hex_end (str): specifies which ending character to use to delimit hex literals. Default: None.
- byte_start (str): specifies which starting character to use to delimit byte literals. Default: None.
- byte_end (str): specifies which ending character to use to delimit byte literals. Default: None.
- raw_start (str): specifies which starting character to use to delimit raw literals. Default: None.
- raw_end (str): specifies which ending character to use to delimit raw literals. Default: None.
- identify (bool | str): 'always': always quote, 'safe': quote identifiers if they don't contain an upcase, True defaults to always.
- normalize (bool): if set to True all identifiers will lower cased
- string_escape (str): specifies a string escape character. Default: '.
- identifier_escape (str): specifies an identifier escape character. Default: ".
- pad (int): determines padding in a formatted string. Default: 2.
- indent (int): determines the size of indentation in a formatted string. Default: 4.
- unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
- normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
- alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
- identifiers_can_start_with_digit (bool): if an unquoted identifier can start with digit Default: False
- unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
- max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
424 def datatype_sql(self, expression: exp.DataType) -> str: 425 if ( 426 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 427 and not expression.expressions 428 ): 429 expression = exp.DataType.build("text") 430 elif expression.this in exp.DataType.TEMPORAL_TYPES: 431 expression = exp.DataType.build(expression.this) 432 433 return super().datatype_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- concat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql