sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 left_to_substring_sql, 13 locate_to_strposition, 14 max_or_greatest, 15 min_or_least, 16 no_ilike_sql, 17 no_recursive_cte_sql, 18 no_safe_divide_sql, 19 no_trycast_sql, 20 rename_func, 21 right_to_substring_sql, 22 strposition_to_locate_sql, 23 struct_extract_sql, 24 timestrtotime_sql, 25 var_map_sql, 26) 27from sqlglot.helper import seq_get 28from sqlglot.parser import parse_var_map 29from sqlglot.tokens import TokenType 30 31# (FuncType, Multiplier) 32DATE_DELTA_INTERVAL = { 33 "YEAR": ("ADD_MONTHS", 12), 34 "MONTH": ("ADD_MONTHS", 1), 35 "QUARTER": ("ADD_MONTHS", 3), 36 "WEEK": ("DATE_ADD", 7), 37 "DAY": ("DATE_ADD", 1), 38} 39 40TIME_DIFF_FACTOR = { 41 "MILLISECOND": " * 1000", 42 "SECOND": "", 43 "MINUTE": " / 60", 44 "HOUR": " / 3600", 45} 46 47DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 48 49 50def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 51 unit = expression.text("unit").upper() 52 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 53 54 if isinstance(expression, exp.DateSub): 55 multiplier *= -1 56 57 if expression.expression.is_number: 58 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 59 else: 60 modified_increment = expression.expression 61 if multiplier != 1: 62 modified_increment = exp.Mul( # type: ignore 63 this=modified_increment, expression=exp.Literal.number(multiplier) 64 ) 65 66 return self.func(func, expression.this, modified_increment) 67 68 69def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str: 70 unit = expression.text("unit").upper() 71 72 factor = TIME_DIFF_FACTOR.get(unit) 73 if factor is not None: 74 left = self.sql(expression, "this") 75 right = self.sql(expression, "expression") 76 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 77 return f"({sec_diff}){factor}" if factor else sec_diff 78 79 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 80 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 81 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 82 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 83 84 return f"{diff_sql}{multiplier_sql}" 85 86 87def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str: 88 this = expression.this 89 if not this.type: 90 from sqlglot.optimizer.annotate_types import annotate_types 91 92 annotate_types(this) 93 94 if this.type.is_type("json"): 95 return self.sql(this) 96 return self.func("TO_JSON", this, expression.args.get("options")) 97 98 99def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str: 100 if expression.expression: 101 self.unsupported("Hive SORT_ARRAY does not support a comparator") 102 return f"SORT_ARRAY({self.sql(expression, 'this')})" 103 104 105def _property_sql(self: generator.Generator, expression: exp.Property) -> str: 106 return f"'{expression.name}'={self.sql(expression, 'value')}" 107 108 109def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str: 110 return self.func("UNIX_TIMESTAMP", expression.this, _time_format(self, expression)) 111 112 113def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str: 114 this = self.sql(expression, "this") 115 time_format = self.format_time(expression) 116 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 117 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 118 return f"CAST({this} AS DATE)" 119 120 121def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str: 122 this = self.sql(expression, "this") 123 time_format = self.format_time(expression) 124 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 125 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 126 return f"CAST({this} AS TIMESTAMP)" 127 128 129def _time_format( 130 self: generator.Generator, expression: exp.UnixToStr | exp.StrToUnix 131) -> t.Optional[str]: 132 time_format = self.format_time(expression) 133 if time_format == Hive.TIME_FORMAT: 134 return None 135 return time_format 136 137 138def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str: 139 this = self.sql(expression, "this") 140 time_format = self.format_time(expression) 141 return f"DATE_FORMAT({this}, {time_format})" 142 143 144def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str: 145 this = self.sql(expression, "this") 146 time_format = self.format_time(expression) 147 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 148 return f"TO_DATE({this}, {time_format})" 149 return f"TO_DATE({this})" 150 151 152class Hive(Dialect): 153 ALIAS_POST_TABLESAMPLE = True 154 IDENTIFIERS_CAN_START_WITH_DIGIT = True 155 156 TIME_MAPPING = { 157 "y": "%Y", 158 "Y": "%Y", 159 "YYYY": "%Y", 160 "yyyy": "%Y", 161 "YY": "%y", 162 "yy": "%y", 163 "MMMM": "%B", 164 "MMM": "%b", 165 "MM": "%m", 166 "M": "%-m", 167 "dd": "%d", 168 "d": "%-d", 169 "HH": "%H", 170 "H": "%-H", 171 "hh": "%I", 172 "h": "%-I", 173 "mm": "%M", 174 "m": "%-M", 175 "ss": "%S", 176 "s": "%-S", 177 "SSSSSS": "%f", 178 "a": "%p", 179 "DD": "%j", 180 "D": "%-j", 181 "E": "%a", 182 "EE": "%a", 183 "EEE": "%a", 184 "EEEE": "%A", 185 } 186 187 DATE_FORMAT = "'yyyy-MM-dd'" 188 DATEINT_FORMAT = "'yyyyMMdd'" 189 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 190 191 class Tokenizer(tokens.Tokenizer): 192 QUOTES = ["'", '"'] 193 IDENTIFIERS = ["`"] 194 STRING_ESCAPES = ["\\"] 195 ENCODE = "utf-8" 196 197 KEYWORDS = { 198 **tokens.Tokenizer.KEYWORDS, 199 "ADD ARCHIVE": TokenType.COMMAND, 200 "ADD ARCHIVES": TokenType.COMMAND, 201 "ADD FILE": TokenType.COMMAND, 202 "ADD FILES": TokenType.COMMAND, 203 "ADD JAR": TokenType.COMMAND, 204 "ADD JARS": TokenType.COMMAND, 205 "MSCK REPAIR": TokenType.COMMAND, 206 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 207 } 208 209 NUMERIC_LITERALS = { 210 "L": "BIGINT", 211 "S": "SMALLINT", 212 "Y": "TINYINT", 213 "D": "DOUBLE", 214 "F": "FLOAT", 215 "BD": "DECIMAL", 216 } 217 218 class Parser(parser.Parser): 219 LOG_DEFAULTS_TO_LN = True 220 STRICT_CAST = False 221 222 FUNCTIONS = { 223 **parser.Parser.FUNCTIONS, 224 "BASE64": exp.ToBase64.from_arg_list, 225 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 226 "DATE_ADD": lambda args: exp.TsOrDsAdd( 227 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 228 ), 229 "DATEDIFF": lambda args: exp.DateDiff( 230 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 231 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 232 ), 233 "DATE_SUB": lambda args: exp.TsOrDsAdd( 234 this=seq_get(args, 0), 235 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 236 unit=exp.Literal.string("DAY"), 237 ), 238 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 239 [ 240 exp.TimeStrToTime(this=seq_get(args, 0)), 241 seq_get(args, 1), 242 ] 243 ), 244 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 245 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 246 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 247 "LOCATE": locate_to_strposition, 248 "MAP": parse_var_map, 249 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 250 "PERCENTILE": exp.Quantile.from_arg_list, 251 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 252 "COLLECT_SET": exp.SetAgg.from_arg_list, 253 "SIZE": exp.ArraySize.from_arg_list, 254 "SPLIT": exp.RegexpSplit.from_arg_list, 255 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 256 "TO_JSON": exp.JSONFormat.from_arg_list, 257 "UNBASE64": exp.FromBase64.from_arg_list, 258 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 259 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 260 } 261 262 PROPERTY_PARSERS = { 263 **parser.Parser.PROPERTY_PARSERS, 264 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 265 expressions=self._parse_wrapped_csv(self._parse_property) 266 ), 267 } 268 269 QUERY_MODIFIER_PARSERS = { 270 **parser.Parser.QUERY_MODIFIER_PARSERS, 271 "distribute": lambda self: self._parse_sort(exp.Distribute, "DISTRIBUTE", "BY"), 272 "sort": lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 273 "cluster": lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 274 } 275 276 def _parse_types( 277 self, check_func: bool = False, schema: bool = False 278 ) -> t.Optional[exp.Expression]: 279 """ 280 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 281 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 282 283 spark-sql (default)> select cast(1234 as varchar(2)); 284 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 285 char/varchar type and simply treats them as string type. Please use string type 286 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 287 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 288 289 1234 290 Time taken: 4.265 seconds, Fetched 1 row(s) 291 292 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 293 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 294 295 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 296 """ 297 this = super()._parse_types(check_func=check_func, schema=schema) 298 299 if this and not schema: 300 return this.transform( 301 lambda node: node.replace(exp.DataType.build("text")) 302 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 303 else node, 304 copy=False, 305 ) 306 307 return this 308 309 class Generator(generator.Generator): 310 LIMIT_FETCH = "LIMIT" 311 TABLESAMPLE_WITH_METHOD = False 312 TABLESAMPLE_SIZE_IS_PERCENT = True 313 JOIN_HINTS = False 314 TABLE_HINTS = False 315 INDEX_ON = "ON TABLE" 316 317 TYPE_MAPPING = { 318 **generator.Generator.TYPE_MAPPING, 319 exp.DataType.Type.TEXT: "STRING", 320 exp.DataType.Type.DATETIME: "TIMESTAMP", 321 exp.DataType.Type.VARBINARY: "BINARY", 322 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 323 exp.DataType.Type.BIT: "BOOLEAN", 324 } 325 326 TRANSFORMS = { 327 **generator.Generator.TRANSFORMS, 328 exp.Group: transforms.preprocess([transforms.unalias_group]), 329 exp.Select: transforms.preprocess( 330 [ 331 transforms.eliminate_qualify, 332 transforms.eliminate_distinct_on, 333 transforms.unnest_to_explode, 334 ] 335 ), 336 exp.Property: _property_sql, 337 exp.ApproxDistinct: approx_count_distinct_sql, 338 exp.ArrayConcat: rename_func("CONCAT"), 339 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 340 exp.ArraySize: rename_func("SIZE"), 341 exp.ArraySort: _array_sort_sql, 342 exp.With: no_recursive_cte_sql, 343 exp.DateAdd: _add_date_sql, 344 exp.DateDiff: _date_diff_sql, 345 exp.DateStrToDate: rename_func("TO_DATE"), 346 exp.DateSub: _add_date_sql, 347 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 348 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 349 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 350 exp.FromBase64: rename_func("UNBASE64"), 351 exp.If: if_sql, 352 exp.ILike: no_ilike_sql, 353 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 354 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 355 exp.JSONFormat: _json_format_sql, 356 exp.Left: left_to_substring_sql, 357 exp.Map: var_map_sql, 358 exp.Max: max_or_greatest, 359 exp.Min: min_or_least, 360 exp.VarMap: var_map_sql, 361 exp.Create: create_with_partitions_sql, 362 exp.Quantile: rename_func("PERCENTILE"), 363 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 364 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 365 exp.RegexpSplit: rename_func("SPLIT"), 366 exp.Right: right_to_substring_sql, 367 exp.SafeDivide: no_safe_divide_sql, 368 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 369 exp.SetAgg: rename_func("COLLECT_SET"), 370 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 371 exp.StrPosition: strposition_to_locate_sql, 372 exp.StrToDate: _str_to_date_sql, 373 exp.StrToTime: _str_to_time_sql, 374 exp.StrToUnix: _str_to_unix_sql, 375 exp.StructExtract: struct_extract_sql, 376 exp.TimeStrToDate: rename_func("TO_DATE"), 377 exp.TimeStrToTime: timestrtotime_sql, 378 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 379 exp.TimeToStr: _time_to_str, 380 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 381 exp.ToBase64: rename_func("BASE64"), 382 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 383 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 384 exp.TsOrDsToDate: _to_date_sql, 385 exp.TryCast: no_trycast_sql, 386 exp.UnixToStr: lambda self, e: self.func( 387 "FROM_UNIXTIME", e.this, _time_format(self, e) 388 ), 389 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 390 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 391 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 392 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 393 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 394 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 395 exp.LastDateOfMonth: rename_func("LAST_DAY"), 396 exp.National: lambda self, e: self.national_sql(e, prefix=""), 397 } 398 399 PROPERTIES_LOCATION = { 400 **generator.Generator.PROPERTIES_LOCATION, 401 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 402 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 403 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 404 } 405 406 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 407 return self.func( 408 "COLLECT_LIST", 409 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 410 ) 411 412 def with_properties(self, properties: exp.Properties) -> str: 413 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 414 415 def datatype_sql(self, expression: exp.DataType) -> str: 416 if ( 417 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 418 and not expression.expressions 419 ): 420 expression = exp.DataType.build("text") 421 elif expression.this in exp.DataType.TEMPORAL_TYPES: 422 expression = exp.DataType.build(expression.this) 423 424 return super().datatype_sql(expression) 425 426 def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]: 427 return super().after_having_modifiers(expression) + [ 428 self.sql(expression, "distribute"), 429 self.sql(expression, "sort"), 430 self.sql(expression, "cluster"), 431 ]
153class Hive(Dialect): 154 ALIAS_POST_TABLESAMPLE = True 155 IDENTIFIERS_CAN_START_WITH_DIGIT = True 156 157 TIME_MAPPING = { 158 "y": "%Y", 159 "Y": "%Y", 160 "YYYY": "%Y", 161 "yyyy": "%Y", 162 "YY": "%y", 163 "yy": "%y", 164 "MMMM": "%B", 165 "MMM": "%b", 166 "MM": "%m", 167 "M": "%-m", 168 "dd": "%d", 169 "d": "%-d", 170 "HH": "%H", 171 "H": "%-H", 172 "hh": "%I", 173 "h": "%-I", 174 "mm": "%M", 175 "m": "%-M", 176 "ss": "%S", 177 "s": "%-S", 178 "SSSSSS": "%f", 179 "a": "%p", 180 "DD": "%j", 181 "D": "%-j", 182 "E": "%a", 183 "EE": "%a", 184 "EEE": "%a", 185 "EEEE": "%A", 186 } 187 188 DATE_FORMAT = "'yyyy-MM-dd'" 189 DATEINT_FORMAT = "'yyyyMMdd'" 190 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 191 192 class Tokenizer(tokens.Tokenizer): 193 QUOTES = ["'", '"'] 194 IDENTIFIERS = ["`"] 195 STRING_ESCAPES = ["\\"] 196 ENCODE = "utf-8" 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ADD ARCHIVE": TokenType.COMMAND, 201 "ADD ARCHIVES": TokenType.COMMAND, 202 "ADD FILE": TokenType.COMMAND, 203 "ADD FILES": TokenType.COMMAND, 204 "ADD JAR": TokenType.COMMAND, 205 "ADD JARS": TokenType.COMMAND, 206 "MSCK REPAIR": TokenType.COMMAND, 207 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 208 } 209 210 NUMERIC_LITERALS = { 211 "L": "BIGINT", 212 "S": "SMALLINT", 213 "Y": "TINYINT", 214 "D": "DOUBLE", 215 "F": "FLOAT", 216 "BD": "DECIMAL", 217 } 218 219 class Parser(parser.Parser): 220 LOG_DEFAULTS_TO_LN = True 221 STRICT_CAST = False 222 223 FUNCTIONS = { 224 **parser.Parser.FUNCTIONS, 225 "BASE64": exp.ToBase64.from_arg_list, 226 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 227 "DATE_ADD": lambda args: exp.TsOrDsAdd( 228 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 229 ), 230 "DATEDIFF": lambda args: exp.DateDiff( 231 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 232 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 233 ), 234 "DATE_SUB": lambda args: exp.TsOrDsAdd( 235 this=seq_get(args, 0), 236 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 237 unit=exp.Literal.string("DAY"), 238 ), 239 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 240 [ 241 exp.TimeStrToTime(this=seq_get(args, 0)), 242 seq_get(args, 1), 243 ] 244 ), 245 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 246 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 247 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 248 "LOCATE": locate_to_strposition, 249 "MAP": parse_var_map, 250 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 251 "PERCENTILE": exp.Quantile.from_arg_list, 252 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 253 "COLLECT_SET": exp.SetAgg.from_arg_list, 254 "SIZE": exp.ArraySize.from_arg_list, 255 "SPLIT": exp.RegexpSplit.from_arg_list, 256 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 257 "TO_JSON": exp.JSONFormat.from_arg_list, 258 "UNBASE64": exp.FromBase64.from_arg_list, 259 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 260 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 261 } 262 263 PROPERTY_PARSERS = { 264 **parser.Parser.PROPERTY_PARSERS, 265 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 266 expressions=self._parse_wrapped_csv(self._parse_property) 267 ), 268 } 269 270 QUERY_MODIFIER_PARSERS = { 271 **parser.Parser.QUERY_MODIFIER_PARSERS, 272 "distribute": lambda self: self._parse_sort(exp.Distribute, "DISTRIBUTE", "BY"), 273 "sort": lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 274 "cluster": lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 275 } 276 277 def _parse_types( 278 self, check_func: bool = False, schema: bool = False 279 ) -> t.Optional[exp.Expression]: 280 """ 281 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 282 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 283 284 spark-sql (default)> select cast(1234 as varchar(2)); 285 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 286 char/varchar type and simply treats them as string type. Please use string type 287 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 288 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 289 290 1234 291 Time taken: 4.265 seconds, Fetched 1 row(s) 292 293 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 294 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 295 296 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 297 """ 298 this = super()._parse_types(check_func=check_func, schema=schema) 299 300 if this and not schema: 301 return this.transform( 302 lambda node: node.replace(exp.DataType.build("text")) 303 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 304 else node, 305 copy=False, 306 ) 307 308 return this 309 310 class Generator(generator.Generator): 311 LIMIT_FETCH = "LIMIT" 312 TABLESAMPLE_WITH_METHOD = False 313 TABLESAMPLE_SIZE_IS_PERCENT = True 314 JOIN_HINTS = False 315 TABLE_HINTS = False 316 INDEX_ON = "ON TABLE" 317 318 TYPE_MAPPING = { 319 **generator.Generator.TYPE_MAPPING, 320 exp.DataType.Type.TEXT: "STRING", 321 exp.DataType.Type.DATETIME: "TIMESTAMP", 322 exp.DataType.Type.VARBINARY: "BINARY", 323 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 324 exp.DataType.Type.BIT: "BOOLEAN", 325 } 326 327 TRANSFORMS = { 328 **generator.Generator.TRANSFORMS, 329 exp.Group: transforms.preprocess([transforms.unalias_group]), 330 exp.Select: transforms.preprocess( 331 [ 332 transforms.eliminate_qualify, 333 transforms.eliminate_distinct_on, 334 transforms.unnest_to_explode, 335 ] 336 ), 337 exp.Property: _property_sql, 338 exp.ApproxDistinct: approx_count_distinct_sql, 339 exp.ArrayConcat: rename_func("CONCAT"), 340 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 341 exp.ArraySize: rename_func("SIZE"), 342 exp.ArraySort: _array_sort_sql, 343 exp.With: no_recursive_cte_sql, 344 exp.DateAdd: _add_date_sql, 345 exp.DateDiff: _date_diff_sql, 346 exp.DateStrToDate: rename_func("TO_DATE"), 347 exp.DateSub: _add_date_sql, 348 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 349 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 350 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 351 exp.FromBase64: rename_func("UNBASE64"), 352 exp.If: if_sql, 353 exp.ILike: no_ilike_sql, 354 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 355 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 356 exp.JSONFormat: _json_format_sql, 357 exp.Left: left_to_substring_sql, 358 exp.Map: var_map_sql, 359 exp.Max: max_or_greatest, 360 exp.Min: min_or_least, 361 exp.VarMap: var_map_sql, 362 exp.Create: create_with_partitions_sql, 363 exp.Quantile: rename_func("PERCENTILE"), 364 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 365 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 366 exp.RegexpSplit: rename_func("SPLIT"), 367 exp.Right: right_to_substring_sql, 368 exp.SafeDivide: no_safe_divide_sql, 369 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 370 exp.SetAgg: rename_func("COLLECT_SET"), 371 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 372 exp.StrPosition: strposition_to_locate_sql, 373 exp.StrToDate: _str_to_date_sql, 374 exp.StrToTime: _str_to_time_sql, 375 exp.StrToUnix: _str_to_unix_sql, 376 exp.StructExtract: struct_extract_sql, 377 exp.TimeStrToDate: rename_func("TO_DATE"), 378 exp.TimeStrToTime: timestrtotime_sql, 379 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 380 exp.TimeToStr: _time_to_str, 381 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 382 exp.ToBase64: rename_func("BASE64"), 383 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 384 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 385 exp.TsOrDsToDate: _to_date_sql, 386 exp.TryCast: no_trycast_sql, 387 exp.UnixToStr: lambda self, e: self.func( 388 "FROM_UNIXTIME", e.this, _time_format(self, e) 389 ), 390 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 391 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 392 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 393 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 394 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 395 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 396 exp.LastDateOfMonth: rename_func("LAST_DAY"), 397 exp.National: lambda self, e: self.national_sql(e, prefix=""), 398 } 399 400 PROPERTIES_LOCATION = { 401 **generator.Generator.PROPERTIES_LOCATION, 402 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 403 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 404 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 405 } 406 407 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 408 return self.func( 409 "COLLECT_LIST", 410 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 411 ) 412 413 def with_properties(self, properties: exp.Properties) -> str: 414 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 415 416 def datatype_sql(self, expression: exp.DataType) -> str: 417 if ( 418 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 419 and not expression.expressions 420 ): 421 expression = exp.DataType.build("text") 422 elif expression.this in exp.DataType.TEMPORAL_TYPES: 423 expression = exp.DataType.build(expression.this) 424 425 return super().datatype_sql(expression) 426 427 def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]: 428 return super().after_having_modifiers(expression) + [ 429 self.sql(expression, "distribute"), 430 self.sql(expression, "sort"), 431 self.sql(expression, "cluster"), 432 ]
192 class Tokenizer(tokens.Tokenizer): 193 QUOTES = ["'", '"'] 194 IDENTIFIERS = ["`"] 195 STRING_ESCAPES = ["\\"] 196 ENCODE = "utf-8" 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ADD ARCHIVE": TokenType.COMMAND, 201 "ADD ARCHIVES": TokenType.COMMAND, 202 "ADD FILE": TokenType.COMMAND, 203 "ADD FILES": TokenType.COMMAND, 204 "ADD JAR": TokenType.COMMAND, 205 "ADD JARS": TokenType.COMMAND, 206 "MSCK REPAIR": TokenType.COMMAND, 207 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 208 } 209 210 NUMERIC_LITERALS = { 211 "L": "BIGINT", 212 "S": "SMALLINT", 213 "Y": "TINYINT", 214 "D": "DOUBLE", 215 "F": "FLOAT", 216 "BD": "DECIMAL", 217 }
Inherited Members
219 class Parser(parser.Parser): 220 LOG_DEFAULTS_TO_LN = True 221 STRICT_CAST = False 222 223 FUNCTIONS = { 224 **parser.Parser.FUNCTIONS, 225 "BASE64": exp.ToBase64.from_arg_list, 226 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 227 "DATE_ADD": lambda args: exp.TsOrDsAdd( 228 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 229 ), 230 "DATEDIFF": lambda args: exp.DateDiff( 231 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 232 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 233 ), 234 "DATE_SUB": lambda args: exp.TsOrDsAdd( 235 this=seq_get(args, 0), 236 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 237 unit=exp.Literal.string("DAY"), 238 ), 239 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 240 [ 241 exp.TimeStrToTime(this=seq_get(args, 0)), 242 seq_get(args, 1), 243 ] 244 ), 245 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 246 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 247 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 248 "LOCATE": locate_to_strposition, 249 "MAP": parse_var_map, 250 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 251 "PERCENTILE": exp.Quantile.from_arg_list, 252 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 253 "COLLECT_SET": exp.SetAgg.from_arg_list, 254 "SIZE": exp.ArraySize.from_arg_list, 255 "SPLIT": exp.RegexpSplit.from_arg_list, 256 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 257 "TO_JSON": exp.JSONFormat.from_arg_list, 258 "UNBASE64": exp.FromBase64.from_arg_list, 259 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 260 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 261 } 262 263 PROPERTY_PARSERS = { 264 **parser.Parser.PROPERTY_PARSERS, 265 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 266 expressions=self._parse_wrapped_csv(self._parse_property) 267 ), 268 } 269 270 QUERY_MODIFIER_PARSERS = { 271 **parser.Parser.QUERY_MODIFIER_PARSERS, 272 "distribute": lambda self: self._parse_sort(exp.Distribute, "DISTRIBUTE", "BY"), 273 "sort": lambda self: self._parse_sort(exp.Sort, "SORT", "BY"), 274 "cluster": lambda self: self._parse_sort(exp.Cluster, "CLUSTER", "BY"), 275 } 276 277 def _parse_types( 278 self, check_func: bool = False, schema: bool = False 279 ) -> t.Optional[exp.Expression]: 280 """ 281 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 282 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 283 284 spark-sql (default)> select cast(1234 as varchar(2)); 285 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 286 char/varchar type and simply treats them as string type. Please use string type 287 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 288 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 289 290 1234 291 Time taken: 4.265 seconds, Fetched 1 row(s) 292 293 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 294 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 295 296 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 297 """ 298 this = super()._parse_types(check_func=check_func, schema=schema) 299 300 if this and not schema: 301 return this.transform( 302 lambda node: node.replace(exp.DataType.build("text")) 303 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 304 else node, 305 copy=False, 306 ) 307 308 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
310 class Generator(generator.Generator): 311 LIMIT_FETCH = "LIMIT" 312 TABLESAMPLE_WITH_METHOD = False 313 TABLESAMPLE_SIZE_IS_PERCENT = True 314 JOIN_HINTS = False 315 TABLE_HINTS = False 316 INDEX_ON = "ON TABLE" 317 318 TYPE_MAPPING = { 319 **generator.Generator.TYPE_MAPPING, 320 exp.DataType.Type.TEXT: "STRING", 321 exp.DataType.Type.DATETIME: "TIMESTAMP", 322 exp.DataType.Type.VARBINARY: "BINARY", 323 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 324 exp.DataType.Type.BIT: "BOOLEAN", 325 } 326 327 TRANSFORMS = { 328 **generator.Generator.TRANSFORMS, 329 exp.Group: transforms.preprocess([transforms.unalias_group]), 330 exp.Select: transforms.preprocess( 331 [ 332 transforms.eliminate_qualify, 333 transforms.eliminate_distinct_on, 334 transforms.unnest_to_explode, 335 ] 336 ), 337 exp.Property: _property_sql, 338 exp.ApproxDistinct: approx_count_distinct_sql, 339 exp.ArrayConcat: rename_func("CONCAT"), 340 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 341 exp.ArraySize: rename_func("SIZE"), 342 exp.ArraySort: _array_sort_sql, 343 exp.With: no_recursive_cte_sql, 344 exp.DateAdd: _add_date_sql, 345 exp.DateDiff: _date_diff_sql, 346 exp.DateStrToDate: rename_func("TO_DATE"), 347 exp.DateSub: _add_date_sql, 348 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 349 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 350 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 351 exp.FromBase64: rename_func("UNBASE64"), 352 exp.If: if_sql, 353 exp.ILike: no_ilike_sql, 354 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 355 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 356 exp.JSONFormat: _json_format_sql, 357 exp.Left: left_to_substring_sql, 358 exp.Map: var_map_sql, 359 exp.Max: max_or_greatest, 360 exp.Min: min_or_least, 361 exp.VarMap: var_map_sql, 362 exp.Create: create_with_partitions_sql, 363 exp.Quantile: rename_func("PERCENTILE"), 364 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 365 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 366 exp.RegexpSplit: rename_func("SPLIT"), 367 exp.Right: right_to_substring_sql, 368 exp.SafeDivide: no_safe_divide_sql, 369 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 370 exp.SetAgg: rename_func("COLLECT_SET"), 371 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 372 exp.StrPosition: strposition_to_locate_sql, 373 exp.StrToDate: _str_to_date_sql, 374 exp.StrToTime: _str_to_time_sql, 375 exp.StrToUnix: _str_to_unix_sql, 376 exp.StructExtract: struct_extract_sql, 377 exp.TimeStrToDate: rename_func("TO_DATE"), 378 exp.TimeStrToTime: timestrtotime_sql, 379 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 380 exp.TimeToStr: _time_to_str, 381 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 382 exp.ToBase64: rename_func("BASE64"), 383 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 384 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 385 exp.TsOrDsToDate: _to_date_sql, 386 exp.TryCast: no_trycast_sql, 387 exp.UnixToStr: lambda self, e: self.func( 388 "FROM_UNIXTIME", e.this, _time_format(self, e) 389 ), 390 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 391 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 392 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 393 exp.RowFormatSerdeProperty: lambda self, e: f"ROW FORMAT SERDE {self.sql(e, 'this')}", 394 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 395 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 396 exp.LastDateOfMonth: rename_func("LAST_DAY"), 397 exp.National: lambda self, e: self.national_sql(e, prefix=""), 398 } 399 400 PROPERTIES_LOCATION = { 401 **generator.Generator.PROPERTIES_LOCATION, 402 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 403 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 404 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 405 } 406 407 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 408 return self.func( 409 "COLLECT_LIST", 410 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 411 ) 412 413 def with_properties(self, properties: exp.Properties) -> str: 414 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 415 416 def datatype_sql(self, expression: exp.DataType) -> str: 417 if ( 418 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 419 and not expression.expressions 420 ): 421 expression = exp.DataType.build("text") 422 elif expression.this in exp.DataType.TEMPORAL_TYPES: 423 expression = exp.DataType.build(expression.this) 424 425 return super().datatype_sql(expression) 426 427 def after_having_modifiers(self, expression: exp.Expression) -> t.List[str]: 428 return super().after_having_modifiers(expression) + [ 429 self.sql(expression, "distribute"), 430 self.sql(expression, "sort"), 431 self.sql(expression, "cluster"), 432 ]
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
416 def datatype_sql(self, expression: exp.DataType) -> str: 417 if ( 418 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 419 and not expression.expressions 420 ): 421 expression = exp.DataType.build("text") 422 elif expression.this in exp.DataType.TEMPORAL_TYPES: 423 expression = exp.DataType.build(expression.this) 424 425 return super().datatype_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql