sqlglot.dialects.redshift
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, transforms 6from sqlglot.dialects.dialect import ( 7 NormalizationStrategy, 8 concat_to_dpipe_sql, 9 concat_ws_to_dpipe_sql, 10 date_delta_sql, 11 generatedasidentitycolumnconstraint_sql, 12 json_extract_segments, 13 no_tablesample_sql, 14 rename_func, 15 map_date_part, 16) 17from sqlglot.dialects.postgres import Postgres 18from sqlglot.helper import seq_get 19from sqlglot.tokens import TokenType 20from sqlglot.parser import build_convert_timezone 21 22if t.TYPE_CHECKING: 23 from sqlglot._typing import E 24 25 26def _build_date_delta(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 27 def _builder(args: t.List) -> E: 28 expr = expr_type( 29 this=seq_get(args, 2), 30 expression=seq_get(args, 1), 31 unit=map_date_part(seq_get(args, 0)), 32 ) 33 if expr_type is exp.TsOrDsAdd: 34 expr.set("return_type", exp.DataType.build("TIMESTAMP")) 35 36 return expr 37 38 return _builder 39 40 41class Redshift(Postgres): 42 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 43 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 44 45 SUPPORTS_USER_DEFINED_TYPES = False 46 INDEX_OFFSET = 0 47 COPY_PARAMS_ARE_CSV = False 48 HEX_LOWERCASE = True 49 HAS_DISTINCT_ARRAY_CONSTRUCTORS = True 50 51 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 52 TIME_MAPPING = { 53 **Postgres.TIME_MAPPING, 54 "MON": "%b", 55 "HH": "%H", 56 } 57 58 class Parser(Postgres.Parser): 59 FUNCTIONS = { 60 **Postgres.Parser.FUNCTIONS, 61 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 62 this=seq_get(args, 0), 63 expression=seq_get(args, 1), 64 unit=exp.var("month"), 65 return_type=exp.DataType.build("TIMESTAMP"), 66 ), 67 "CONVERT_TIMEZONE": lambda args: build_convert_timezone(args, "UTC"), 68 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 69 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 70 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 71 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 72 "GETDATE": exp.CurrentTimestamp.from_arg_list, 73 "LISTAGG": exp.GroupConcat.from_arg_list, 74 "SPLIT_TO_ARRAY": lambda args: exp.StringToArray( 75 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string(",") 76 ), 77 "STRTOL": exp.FromBase.from_arg_list, 78 } 79 80 NO_PAREN_FUNCTION_PARSERS = { 81 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 82 "APPROXIMATE": lambda self: self._parse_approximate_count(), 83 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 84 } 85 86 SUPPORTS_IMPLICIT_UNNEST = True 87 88 def _parse_table( 89 self, 90 schema: bool = False, 91 joins: bool = False, 92 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 93 parse_bracket: bool = False, 94 is_db_reference: bool = False, 95 parse_partition: bool = False, 96 ) -> t.Optional[exp.Expression]: 97 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 98 unpivot = self._match(TokenType.UNPIVOT) 99 table = super()._parse_table( 100 schema=schema, 101 joins=joins, 102 alias_tokens=alias_tokens, 103 parse_bracket=parse_bracket, 104 is_db_reference=is_db_reference, 105 ) 106 107 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 108 109 def _parse_convert( 110 self, strict: bool, safe: t.Optional[bool] = None 111 ) -> t.Optional[exp.Expression]: 112 to = self._parse_types() 113 self._match(TokenType.COMMA) 114 this = self._parse_bitwise() 115 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 116 117 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 118 index = self._index - 1 119 func = self._parse_function() 120 121 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 122 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 123 self._retreat(index) 124 return None 125 126 class Tokenizer(Postgres.Tokenizer): 127 BIT_STRINGS = [] 128 HEX_STRINGS = [] 129 STRING_ESCAPES = ["\\", "'"] 130 131 KEYWORDS = { 132 **Postgres.Tokenizer.KEYWORDS, 133 "(+)": TokenType.JOIN_MARKER, 134 "HLLSKETCH": TokenType.HLLSKETCH, 135 "MINUS": TokenType.EXCEPT, 136 "SUPER": TokenType.SUPER, 137 "TOP": TokenType.TOP, 138 "UNLOAD": TokenType.COMMAND, 139 "VARBYTE": TokenType.VARBINARY, 140 } 141 KEYWORDS.pop("VALUES") 142 143 # Redshift allows # to appear as a table identifier prefix 144 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 145 SINGLE_TOKENS.pop("#") 146 147 class Generator(Postgres.Generator): 148 LOCKING_READS_SUPPORTED = False 149 QUERY_HINTS = False 150 VALUES_AS_TABLE = False 151 TZ_TO_WITH_TIME_ZONE = True 152 NVL2_SUPPORTED = True 153 LAST_DAY_SUPPORTS_DATE_PART = False 154 CAN_IMPLEMENT_ARRAY_ANY = False 155 MULTI_ARG_DISTINCT = True 156 COPY_PARAMS_ARE_WRAPPED = False 157 HEX_FUNC = "TO_HEX" 158 PARSE_JSON_NAME = "JSON_PARSE" 159 ARRAY_CONCAT_IS_VAR_LEN = False 160 SUPPORTS_CONVERT_TIMEZONE = True 161 162 # Redshift doesn't have `WITH` as part of their with_properties so we remove it 163 WITH_PROPERTIES_PREFIX = " " 164 165 TYPE_MAPPING = { 166 **Postgres.Generator.TYPE_MAPPING, 167 exp.DataType.Type.BINARY: "VARBYTE", 168 exp.DataType.Type.INT: "INTEGER", 169 exp.DataType.Type.TIMETZ: "TIME", 170 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 171 exp.DataType.Type.VARBINARY: "VARBYTE", 172 exp.DataType.Type.ROWVERSION: "VARBYTE", 173 } 174 175 TRANSFORMS = { 176 **Postgres.Generator.TRANSFORMS, 177 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CONCAT"), 178 exp.Concat: concat_to_dpipe_sql, 179 exp.ConcatWs: concat_ws_to_dpipe_sql, 180 exp.ApproxDistinct: lambda self, 181 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 182 exp.CurrentTimestamp: lambda self, e: ( 183 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 184 ), 185 exp.DateAdd: date_delta_sql("DATEADD"), 186 exp.DateDiff: date_delta_sql("DATEDIFF"), 187 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 188 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 189 exp.FromBase: rename_func("STRTOL"), 190 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 191 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 192 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 193 exp.GroupConcat: rename_func("LISTAGG"), 194 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 195 exp.Select: transforms.preprocess( 196 [ 197 transforms.eliminate_distinct_on, 198 transforms.eliminate_semi_and_anti_joins, 199 transforms.unqualify_unnest, 200 transforms.unnest_generate_date_array_using_recursive_cte, 201 ] 202 ), 203 exp.SortKeyProperty: lambda self, 204 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 205 exp.StartsWith: lambda self, 206 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 207 exp.StringToArray: rename_func("SPLIT_TO_ARRAY"), 208 exp.TableSample: no_tablesample_sql, 209 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 210 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 211 exp.UnixToTime: lambda self, 212 e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')", 213 } 214 215 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 216 TRANSFORMS.pop(exp.Pivot) 217 218 # Postgres doesn't support JSON_PARSE, but Redshift does 219 TRANSFORMS.pop(exp.ParseJSON) 220 221 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 222 TRANSFORMS.pop(exp.Pow) 223 224 # Redshift supports these functions 225 TRANSFORMS.pop(exp.AnyValue) 226 TRANSFORMS.pop(exp.LastDay) 227 TRANSFORMS.pop(exp.SHA2) 228 229 RESERVED_KEYWORDS = { 230 "aes128", 231 "aes256", 232 "all", 233 "allowoverwrite", 234 "analyse", 235 "analyze", 236 "and", 237 "any", 238 "array", 239 "as", 240 "asc", 241 "authorization", 242 "az64", 243 "backup", 244 "between", 245 "binary", 246 "blanksasnull", 247 "both", 248 "bytedict", 249 "bzip2", 250 "case", 251 "cast", 252 "check", 253 "collate", 254 "column", 255 "constraint", 256 "create", 257 "credentials", 258 "cross", 259 "current_date", 260 "current_time", 261 "current_timestamp", 262 "current_user", 263 "current_user_id", 264 "default", 265 "deferrable", 266 "deflate", 267 "defrag", 268 "delta", 269 "delta32k", 270 "desc", 271 "disable", 272 "distinct", 273 "do", 274 "else", 275 "emptyasnull", 276 "enable", 277 "encode", 278 "encrypt ", 279 "encryption", 280 "end", 281 "except", 282 "explicit", 283 "false", 284 "for", 285 "foreign", 286 "freeze", 287 "from", 288 "full", 289 "globaldict256", 290 "globaldict64k", 291 "grant", 292 "group", 293 "gzip", 294 "having", 295 "identity", 296 "ignore", 297 "ilike", 298 "in", 299 "initially", 300 "inner", 301 "intersect", 302 "interval", 303 "into", 304 "is", 305 "isnull", 306 "join", 307 "leading", 308 "left", 309 "like", 310 "limit", 311 "localtime", 312 "localtimestamp", 313 "lun", 314 "luns", 315 "lzo", 316 "lzop", 317 "minus", 318 "mostly16", 319 "mostly32", 320 "mostly8", 321 "natural", 322 "new", 323 "not", 324 "notnull", 325 "null", 326 "nulls", 327 "off", 328 "offline", 329 "offset", 330 "oid", 331 "old", 332 "on", 333 "only", 334 "open", 335 "or", 336 "order", 337 "outer", 338 "overlaps", 339 "parallel", 340 "partition", 341 "percent", 342 "permissions", 343 "pivot", 344 "placing", 345 "primary", 346 "raw", 347 "readratio", 348 "recover", 349 "references", 350 "rejectlog", 351 "resort", 352 "respect", 353 "restore", 354 "right", 355 "select", 356 "session_user", 357 "similar", 358 "snapshot", 359 "some", 360 "sysdate", 361 "system", 362 "table", 363 "tag", 364 "tdes", 365 "text255", 366 "text32k", 367 "then", 368 "timestamp", 369 "to", 370 "top", 371 "trailing", 372 "true", 373 "truncatecolumns", 374 "type", 375 "union", 376 "unique", 377 "unnest", 378 "unpivot", 379 "user", 380 "using", 381 "verbose", 382 "wallet", 383 "when", 384 "where", 385 "with", 386 "without", 387 } 388 389 def unnest_sql(self, expression: exp.Unnest) -> str: 390 args = expression.expressions 391 num_args = len(args) 392 393 if num_args > 1: 394 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 395 return "" 396 397 arg = self.sql(seq_get(args, 0)) 398 alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) 399 return f"{arg} AS {alias}" if alias else arg 400 401 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 402 if expression.is_type(exp.DataType.Type.JSON): 403 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 404 return self.sql(expression, "this") 405 406 return super().cast_sql(expression, safe_prefix=safe_prefix) 407 408 def datatype_sql(self, expression: exp.DataType) -> str: 409 """ 410 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 411 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 412 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 413 `TEXT` to `VARCHAR`. 414 """ 415 if expression.is_type("text"): 416 expression.set("this", exp.DataType.Type.VARCHAR) 417 precision = expression.args.get("expressions") 418 419 if not precision: 420 expression.append("expressions", exp.var("MAX")) 421 422 return super().datatype_sql(expression) 423 424 def alterset_sql(self, expression: exp.AlterSet) -> str: 425 exprs = self.expressions(expression, flat=True) 426 exprs = f" TABLE PROPERTIES ({exprs})" if exprs else "" 427 location = self.sql(expression, "location") 428 location = f" LOCATION {location}" if location else "" 429 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 430 file_format = f" FILE FORMAT {file_format}" if file_format else "" 431 432 return f"SET{exprs}{location}{file_format}" 433 434 def array_sql(self, expression: exp.Array) -> str: 435 if expression.args.get("bracket_notation"): 436 return super().array_sql(expression) 437 438 return rename_func("ARRAY")(self, expression)
42class Redshift(Postgres): 43 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 44 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 45 46 SUPPORTS_USER_DEFINED_TYPES = False 47 INDEX_OFFSET = 0 48 COPY_PARAMS_ARE_CSV = False 49 HEX_LOWERCASE = True 50 HAS_DISTINCT_ARRAY_CONSTRUCTORS = True 51 52 TIME_FORMAT = "'YYYY-MM-DD HH:MI:SS'" 53 TIME_MAPPING = { 54 **Postgres.TIME_MAPPING, 55 "MON": "%b", 56 "HH": "%H", 57 } 58 59 class Parser(Postgres.Parser): 60 FUNCTIONS = { 61 **Postgres.Parser.FUNCTIONS, 62 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 63 this=seq_get(args, 0), 64 expression=seq_get(args, 1), 65 unit=exp.var("month"), 66 return_type=exp.DataType.build("TIMESTAMP"), 67 ), 68 "CONVERT_TIMEZONE": lambda args: build_convert_timezone(args, "UTC"), 69 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 70 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 71 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 72 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 73 "GETDATE": exp.CurrentTimestamp.from_arg_list, 74 "LISTAGG": exp.GroupConcat.from_arg_list, 75 "SPLIT_TO_ARRAY": lambda args: exp.StringToArray( 76 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string(",") 77 ), 78 "STRTOL": exp.FromBase.from_arg_list, 79 } 80 81 NO_PAREN_FUNCTION_PARSERS = { 82 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 83 "APPROXIMATE": lambda self: self._parse_approximate_count(), 84 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 85 } 86 87 SUPPORTS_IMPLICIT_UNNEST = True 88 89 def _parse_table( 90 self, 91 schema: bool = False, 92 joins: bool = False, 93 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 94 parse_bracket: bool = False, 95 is_db_reference: bool = False, 96 parse_partition: bool = False, 97 ) -> t.Optional[exp.Expression]: 98 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 99 unpivot = self._match(TokenType.UNPIVOT) 100 table = super()._parse_table( 101 schema=schema, 102 joins=joins, 103 alias_tokens=alias_tokens, 104 parse_bracket=parse_bracket, 105 is_db_reference=is_db_reference, 106 ) 107 108 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 109 110 def _parse_convert( 111 self, strict: bool, safe: t.Optional[bool] = None 112 ) -> t.Optional[exp.Expression]: 113 to = self._parse_types() 114 self._match(TokenType.COMMA) 115 this = self._parse_bitwise() 116 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 117 118 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 119 index = self._index - 1 120 func = self._parse_function() 121 122 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 123 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 124 self._retreat(index) 125 return None 126 127 class Tokenizer(Postgres.Tokenizer): 128 BIT_STRINGS = [] 129 HEX_STRINGS = [] 130 STRING_ESCAPES = ["\\", "'"] 131 132 KEYWORDS = { 133 **Postgres.Tokenizer.KEYWORDS, 134 "(+)": TokenType.JOIN_MARKER, 135 "HLLSKETCH": TokenType.HLLSKETCH, 136 "MINUS": TokenType.EXCEPT, 137 "SUPER": TokenType.SUPER, 138 "TOP": TokenType.TOP, 139 "UNLOAD": TokenType.COMMAND, 140 "VARBYTE": TokenType.VARBINARY, 141 } 142 KEYWORDS.pop("VALUES") 143 144 # Redshift allows # to appear as a table identifier prefix 145 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 146 SINGLE_TOKENS.pop("#") 147 148 class Generator(Postgres.Generator): 149 LOCKING_READS_SUPPORTED = False 150 QUERY_HINTS = False 151 VALUES_AS_TABLE = False 152 TZ_TO_WITH_TIME_ZONE = True 153 NVL2_SUPPORTED = True 154 LAST_DAY_SUPPORTS_DATE_PART = False 155 CAN_IMPLEMENT_ARRAY_ANY = False 156 MULTI_ARG_DISTINCT = True 157 COPY_PARAMS_ARE_WRAPPED = False 158 HEX_FUNC = "TO_HEX" 159 PARSE_JSON_NAME = "JSON_PARSE" 160 ARRAY_CONCAT_IS_VAR_LEN = False 161 SUPPORTS_CONVERT_TIMEZONE = True 162 163 # Redshift doesn't have `WITH` as part of their with_properties so we remove it 164 WITH_PROPERTIES_PREFIX = " " 165 166 TYPE_MAPPING = { 167 **Postgres.Generator.TYPE_MAPPING, 168 exp.DataType.Type.BINARY: "VARBYTE", 169 exp.DataType.Type.INT: "INTEGER", 170 exp.DataType.Type.TIMETZ: "TIME", 171 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 172 exp.DataType.Type.VARBINARY: "VARBYTE", 173 exp.DataType.Type.ROWVERSION: "VARBYTE", 174 } 175 176 TRANSFORMS = { 177 **Postgres.Generator.TRANSFORMS, 178 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CONCAT"), 179 exp.Concat: concat_to_dpipe_sql, 180 exp.ConcatWs: concat_ws_to_dpipe_sql, 181 exp.ApproxDistinct: lambda self, 182 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 183 exp.CurrentTimestamp: lambda self, e: ( 184 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 185 ), 186 exp.DateAdd: date_delta_sql("DATEADD"), 187 exp.DateDiff: date_delta_sql("DATEDIFF"), 188 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 189 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 190 exp.FromBase: rename_func("STRTOL"), 191 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 192 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 193 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 194 exp.GroupConcat: rename_func("LISTAGG"), 195 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 196 exp.Select: transforms.preprocess( 197 [ 198 transforms.eliminate_distinct_on, 199 transforms.eliminate_semi_and_anti_joins, 200 transforms.unqualify_unnest, 201 transforms.unnest_generate_date_array_using_recursive_cte, 202 ] 203 ), 204 exp.SortKeyProperty: lambda self, 205 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 206 exp.StartsWith: lambda self, 207 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 208 exp.StringToArray: rename_func("SPLIT_TO_ARRAY"), 209 exp.TableSample: no_tablesample_sql, 210 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 211 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 212 exp.UnixToTime: lambda self, 213 e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')", 214 } 215 216 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 217 TRANSFORMS.pop(exp.Pivot) 218 219 # Postgres doesn't support JSON_PARSE, but Redshift does 220 TRANSFORMS.pop(exp.ParseJSON) 221 222 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 223 TRANSFORMS.pop(exp.Pow) 224 225 # Redshift supports these functions 226 TRANSFORMS.pop(exp.AnyValue) 227 TRANSFORMS.pop(exp.LastDay) 228 TRANSFORMS.pop(exp.SHA2) 229 230 RESERVED_KEYWORDS = { 231 "aes128", 232 "aes256", 233 "all", 234 "allowoverwrite", 235 "analyse", 236 "analyze", 237 "and", 238 "any", 239 "array", 240 "as", 241 "asc", 242 "authorization", 243 "az64", 244 "backup", 245 "between", 246 "binary", 247 "blanksasnull", 248 "both", 249 "bytedict", 250 "bzip2", 251 "case", 252 "cast", 253 "check", 254 "collate", 255 "column", 256 "constraint", 257 "create", 258 "credentials", 259 "cross", 260 "current_date", 261 "current_time", 262 "current_timestamp", 263 "current_user", 264 "current_user_id", 265 "default", 266 "deferrable", 267 "deflate", 268 "defrag", 269 "delta", 270 "delta32k", 271 "desc", 272 "disable", 273 "distinct", 274 "do", 275 "else", 276 "emptyasnull", 277 "enable", 278 "encode", 279 "encrypt ", 280 "encryption", 281 "end", 282 "except", 283 "explicit", 284 "false", 285 "for", 286 "foreign", 287 "freeze", 288 "from", 289 "full", 290 "globaldict256", 291 "globaldict64k", 292 "grant", 293 "group", 294 "gzip", 295 "having", 296 "identity", 297 "ignore", 298 "ilike", 299 "in", 300 "initially", 301 "inner", 302 "intersect", 303 "interval", 304 "into", 305 "is", 306 "isnull", 307 "join", 308 "leading", 309 "left", 310 "like", 311 "limit", 312 "localtime", 313 "localtimestamp", 314 "lun", 315 "luns", 316 "lzo", 317 "lzop", 318 "minus", 319 "mostly16", 320 "mostly32", 321 "mostly8", 322 "natural", 323 "new", 324 "not", 325 "notnull", 326 "null", 327 "nulls", 328 "off", 329 "offline", 330 "offset", 331 "oid", 332 "old", 333 "on", 334 "only", 335 "open", 336 "or", 337 "order", 338 "outer", 339 "overlaps", 340 "parallel", 341 "partition", 342 "percent", 343 "permissions", 344 "pivot", 345 "placing", 346 "primary", 347 "raw", 348 "readratio", 349 "recover", 350 "references", 351 "rejectlog", 352 "resort", 353 "respect", 354 "restore", 355 "right", 356 "select", 357 "session_user", 358 "similar", 359 "snapshot", 360 "some", 361 "sysdate", 362 "system", 363 "table", 364 "tag", 365 "tdes", 366 "text255", 367 "text32k", 368 "then", 369 "timestamp", 370 "to", 371 "top", 372 "trailing", 373 "true", 374 "truncatecolumns", 375 "type", 376 "union", 377 "unique", 378 "unnest", 379 "unpivot", 380 "user", 381 "using", 382 "verbose", 383 "wallet", 384 "when", 385 "where", 386 "with", 387 "without", 388 } 389 390 def unnest_sql(self, expression: exp.Unnest) -> str: 391 args = expression.expressions 392 num_args = len(args) 393 394 if num_args > 1: 395 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 396 return "" 397 398 arg = self.sql(seq_get(args, 0)) 399 alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) 400 return f"{arg} AS {alias}" if alias else arg 401 402 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 403 if expression.is_type(exp.DataType.Type.JSON): 404 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 405 return self.sql(expression, "this") 406 407 return super().cast_sql(expression, safe_prefix=safe_prefix) 408 409 def datatype_sql(self, expression: exp.DataType) -> str: 410 """ 411 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 412 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 413 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 414 `TEXT` to `VARCHAR`. 415 """ 416 if expression.is_type("text"): 417 expression.set("this", exp.DataType.Type.VARCHAR) 418 precision = expression.args.get("expressions") 419 420 if not precision: 421 expression.append("expressions", exp.var("MAX")) 422 423 return super().datatype_sql(expression) 424 425 def alterset_sql(self, expression: exp.AlterSet) -> str: 426 exprs = self.expressions(expression, flat=True) 427 exprs = f" TABLE PROPERTIES ({exprs})" if exprs else "" 428 location = self.sql(expression, "location") 429 location = f" LOCATION {location}" if location else "" 430 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 431 file_format = f" FILE FORMAT {file_format}" if file_format else "" 432 433 return f"SET{exprs}{location}{file_format}" 434 435 def array_sql(self, expression: exp.Array) -> str: 436 if expression.args.get("bracket_notation"): 437 return super().array_sql(expression) 438 439 return rename_func("ARRAY")(self, expression)
Specifies the strategy according to which identifiers should be normalized.
Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3) as the former is of type INT[] vs the latter which is SUPER
Associates this dialect's time formats with their equivalent Python strftime
formats.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- SAFE_DIVISION
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- SUPPORTS_FIXED_SIZE_ARRAYS
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
59 class Parser(Postgres.Parser): 60 FUNCTIONS = { 61 **Postgres.Parser.FUNCTIONS, 62 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 63 this=seq_get(args, 0), 64 expression=seq_get(args, 1), 65 unit=exp.var("month"), 66 return_type=exp.DataType.build("TIMESTAMP"), 67 ), 68 "CONVERT_TIMEZONE": lambda args: build_convert_timezone(args, "UTC"), 69 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 70 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 71 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 72 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 73 "GETDATE": exp.CurrentTimestamp.from_arg_list, 74 "LISTAGG": exp.GroupConcat.from_arg_list, 75 "SPLIT_TO_ARRAY": lambda args: exp.StringToArray( 76 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string(",") 77 ), 78 "STRTOL": exp.FromBase.from_arg_list, 79 } 80 81 NO_PAREN_FUNCTION_PARSERS = { 82 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 83 "APPROXIMATE": lambda self: self._parse_approximate_count(), 84 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, transaction=True), 85 } 86 87 SUPPORTS_IMPLICIT_UNNEST = True 88 89 def _parse_table( 90 self, 91 schema: bool = False, 92 joins: bool = False, 93 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 94 parse_bracket: bool = False, 95 is_db_reference: bool = False, 96 parse_partition: bool = False, 97 ) -> t.Optional[exp.Expression]: 98 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 99 unpivot = self._match(TokenType.UNPIVOT) 100 table = super()._parse_table( 101 schema=schema, 102 joins=joins, 103 alias_tokens=alias_tokens, 104 parse_bracket=parse_bracket, 105 is_db_reference=is_db_reference, 106 ) 107 108 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 109 110 def _parse_convert( 111 self, strict: bool, safe: t.Optional[bool] = None 112 ) -> t.Optional[exp.Expression]: 113 to = self._parse_types() 114 self._match(TokenType.COMMA) 115 this = self._parse_bitwise() 116 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 117 118 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 119 index = self._index - 1 120 func = self._parse_function() 121 122 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 123 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 124 self._retreat(index) 125 return None
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
127 class Tokenizer(Postgres.Tokenizer): 128 BIT_STRINGS = [] 129 HEX_STRINGS = [] 130 STRING_ESCAPES = ["\\", "'"] 131 132 KEYWORDS = { 133 **Postgres.Tokenizer.KEYWORDS, 134 "(+)": TokenType.JOIN_MARKER, 135 "HLLSKETCH": TokenType.HLLSKETCH, 136 "MINUS": TokenType.EXCEPT, 137 "SUPER": TokenType.SUPER, 138 "TOP": TokenType.TOP, 139 "UNLOAD": TokenType.COMMAND, 140 "VARBYTE": TokenType.VARBINARY, 141 } 142 KEYWORDS.pop("VALUES") 143 144 # Redshift allows # to appear as a table identifier prefix 145 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 146 SINGLE_TOKENS.pop("#")
Inherited Members
148 class Generator(Postgres.Generator): 149 LOCKING_READS_SUPPORTED = False 150 QUERY_HINTS = False 151 VALUES_AS_TABLE = False 152 TZ_TO_WITH_TIME_ZONE = True 153 NVL2_SUPPORTED = True 154 LAST_DAY_SUPPORTS_DATE_PART = False 155 CAN_IMPLEMENT_ARRAY_ANY = False 156 MULTI_ARG_DISTINCT = True 157 COPY_PARAMS_ARE_WRAPPED = False 158 HEX_FUNC = "TO_HEX" 159 PARSE_JSON_NAME = "JSON_PARSE" 160 ARRAY_CONCAT_IS_VAR_LEN = False 161 SUPPORTS_CONVERT_TIMEZONE = True 162 163 # Redshift doesn't have `WITH` as part of their with_properties so we remove it 164 WITH_PROPERTIES_PREFIX = " " 165 166 TYPE_MAPPING = { 167 **Postgres.Generator.TYPE_MAPPING, 168 exp.DataType.Type.BINARY: "VARBYTE", 169 exp.DataType.Type.INT: "INTEGER", 170 exp.DataType.Type.TIMETZ: "TIME", 171 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 172 exp.DataType.Type.VARBINARY: "VARBYTE", 173 exp.DataType.Type.ROWVERSION: "VARBYTE", 174 } 175 176 TRANSFORMS = { 177 **Postgres.Generator.TRANSFORMS, 178 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CONCAT"), 179 exp.Concat: concat_to_dpipe_sql, 180 exp.ConcatWs: concat_ws_to_dpipe_sql, 181 exp.ApproxDistinct: lambda self, 182 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 183 exp.CurrentTimestamp: lambda self, e: ( 184 "SYSDATE" if e.args.get("transaction") else "GETDATE()" 185 ), 186 exp.DateAdd: date_delta_sql("DATEADD"), 187 exp.DateDiff: date_delta_sql("DATEDIFF"), 188 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 189 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 190 exp.FromBase: rename_func("STRTOL"), 191 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 192 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 193 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 194 exp.GroupConcat: rename_func("LISTAGG"), 195 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 196 exp.Select: transforms.preprocess( 197 [ 198 transforms.eliminate_distinct_on, 199 transforms.eliminate_semi_and_anti_joins, 200 transforms.unqualify_unnest, 201 transforms.unnest_generate_date_array_using_recursive_cte, 202 ] 203 ), 204 exp.SortKeyProperty: lambda self, 205 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 206 exp.StartsWith: lambda self, 207 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 208 exp.StringToArray: rename_func("SPLIT_TO_ARRAY"), 209 exp.TableSample: no_tablesample_sql, 210 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 211 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 212 exp.UnixToTime: lambda self, 213 e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')", 214 } 215 216 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 217 TRANSFORMS.pop(exp.Pivot) 218 219 # Postgres doesn't support JSON_PARSE, but Redshift does 220 TRANSFORMS.pop(exp.ParseJSON) 221 222 # Redshift uses the POW | POWER (expr1, expr2) syntax instead of expr1 ^ expr2 (postgres) 223 TRANSFORMS.pop(exp.Pow) 224 225 # Redshift supports these functions 226 TRANSFORMS.pop(exp.AnyValue) 227 TRANSFORMS.pop(exp.LastDay) 228 TRANSFORMS.pop(exp.SHA2) 229 230 RESERVED_KEYWORDS = { 231 "aes128", 232 "aes256", 233 "all", 234 "allowoverwrite", 235 "analyse", 236 "analyze", 237 "and", 238 "any", 239 "array", 240 "as", 241 "asc", 242 "authorization", 243 "az64", 244 "backup", 245 "between", 246 "binary", 247 "blanksasnull", 248 "both", 249 "bytedict", 250 "bzip2", 251 "case", 252 "cast", 253 "check", 254 "collate", 255 "column", 256 "constraint", 257 "create", 258 "credentials", 259 "cross", 260 "current_date", 261 "current_time", 262 "current_timestamp", 263 "current_user", 264 "current_user_id", 265 "default", 266 "deferrable", 267 "deflate", 268 "defrag", 269 "delta", 270 "delta32k", 271 "desc", 272 "disable", 273 "distinct", 274 "do", 275 "else", 276 "emptyasnull", 277 "enable", 278 "encode", 279 "encrypt ", 280 "encryption", 281 "end", 282 "except", 283 "explicit", 284 "false", 285 "for", 286 "foreign", 287 "freeze", 288 "from", 289 "full", 290 "globaldict256", 291 "globaldict64k", 292 "grant", 293 "group", 294 "gzip", 295 "having", 296 "identity", 297 "ignore", 298 "ilike", 299 "in", 300 "initially", 301 "inner", 302 "intersect", 303 "interval", 304 "into", 305 "is", 306 "isnull", 307 "join", 308 "leading", 309 "left", 310 "like", 311 "limit", 312 "localtime", 313 "localtimestamp", 314 "lun", 315 "luns", 316 "lzo", 317 "lzop", 318 "minus", 319 "mostly16", 320 "mostly32", 321 "mostly8", 322 "natural", 323 "new", 324 "not", 325 "notnull", 326 "null", 327 "nulls", 328 "off", 329 "offline", 330 "offset", 331 "oid", 332 "old", 333 "on", 334 "only", 335 "open", 336 "or", 337 "order", 338 "outer", 339 "overlaps", 340 "parallel", 341 "partition", 342 "percent", 343 "permissions", 344 "pivot", 345 "placing", 346 "primary", 347 "raw", 348 "readratio", 349 "recover", 350 "references", 351 "rejectlog", 352 "resort", 353 "respect", 354 "restore", 355 "right", 356 "select", 357 "session_user", 358 "similar", 359 "snapshot", 360 "some", 361 "sysdate", 362 "system", 363 "table", 364 "tag", 365 "tdes", 366 "text255", 367 "text32k", 368 "then", 369 "timestamp", 370 "to", 371 "top", 372 "trailing", 373 "true", 374 "truncatecolumns", 375 "type", 376 "union", 377 "unique", 378 "unnest", 379 "unpivot", 380 "user", 381 "using", 382 "verbose", 383 "wallet", 384 "when", 385 "where", 386 "with", 387 "without", 388 } 389 390 def unnest_sql(self, expression: exp.Unnest) -> str: 391 args = expression.expressions 392 num_args = len(args) 393 394 if num_args > 1: 395 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 396 return "" 397 398 arg = self.sql(seq_get(args, 0)) 399 alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) 400 return f"{arg} AS {alias}" if alias else arg 401 402 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 403 if expression.is_type(exp.DataType.Type.JSON): 404 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 405 return self.sql(expression, "this") 406 407 return super().cast_sql(expression, safe_prefix=safe_prefix) 408 409 def datatype_sql(self, expression: exp.DataType) -> str: 410 """ 411 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 412 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 413 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 414 `TEXT` to `VARCHAR`. 415 """ 416 if expression.is_type("text"): 417 expression.set("this", exp.DataType.Type.VARCHAR) 418 precision = expression.args.get("expressions") 419 420 if not precision: 421 expression.append("expressions", exp.var("MAX")) 422 423 return super().datatype_sql(expression) 424 425 def alterset_sql(self, expression: exp.AlterSet) -> str: 426 exprs = self.expressions(expression, flat=True) 427 exprs = f" TABLE PROPERTIES ({exprs})" if exprs else "" 428 location = self.sql(expression, "location") 429 location = f" LOCATION {location}" if location else "" 430 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 431 file_format = f" FILE FORMAT {file_format}" if file_format else "" 432 433 return f"SET{exprs}{location}{file_format}" 434 435 def array_sql(self, expression: exp.Array) -> str: 436 if expression.args.get("bracket_notation"): 437 return super().array_sql(expression) 438 439 return rename_func("ARRAY")(self, expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
390 def unnest_sql(self, expression: exp.Unnest) -> str: 391 args = expression.expressions 392 num_args = len(args) 393 394 if num_args > 1: 395 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 396 return "" 397 398 arg = self.sql(seq_get(args, 0)) 399 alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) 400 return f"{arg} AS {alias}" if alias else arg
402 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 403 if expression.is_type(exp.DataType.Type.JSON): 404 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 405 return self.sql(expression, "this") 406 407 return super().cast_sql(expression, safe_prefix=safe_prefix)
409 def datatype_sql(self, expression: exp.DataType) -> str: 410 """ 411 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 412 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 413 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 414 `TEXT` to `VARCHAR`. 415 """ 416 if expression.is_type("text"): 417 expression.set("this", exp.DataType.Type.VARCHAR) 418 precision = expression.args.get("expressions") 419 420 if not precision: 421 expression.append("expressions", exp.var("MAX")) 422 423 return super().datatype_sql(expression)
Redshift converts the TEXT
data type to VARCHAR(255)
by default when people more generally mean
VARCHAR of max length which is VARCHAR(max)
in Redshift. Therefore if we get a TEXT
data type
without precision we convert it to VARCHAR(max)
and if it does have precision then we just convert
TEXT
to VARCHAR
.
425 def alterset_sql(self, expression: exp.AlterSet) -> str: 426 exprs = self.expressions(expression, flat=True) 427 exprs = f" TABLE PROPERTIES ({exprs})" if exprs else "" 428 location = self.sql(expression, "location") 429 location = f" LOCATION {location}" if location else "" 430 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 431 file_format = f" FILE FORMAT {file_format}" if file_format else "" 432 433 return f"SET{exprs}{location}{file_format}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_PARAMS_EQ_REQUIRED
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- sqlglot.dialects.postgres.Postgres.Generator
- SINGLE_STRING_INTERVAL
- RENAME_TABLE_WITH_DB
- JOIN_HINTS
- TABLE_HINTS
- PARAMETER_TOKEN
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_SEED_KEYWORD
- SUPPORTS_SELECT_INTO
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- COPY_HAS_INTO_KEYWORD
- SUPPORTED_JSON_PATH_PARTS
- PROPERTIES_LOCATION
- schemacommentproperty_sql
- commentcolumnconstraint_sql
- bracket_sql
- matchagainst_sql