sqlglot.dialects.postgres
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 arrow_json_extract_scalar_sql, 9 arrow_json_extract_sql, 10 datestrtodate_sql, 11 format_time_lambda, 12 max_or_greatest, 13 min_or_least, 14 no_paren_current_date_sql, 15 no_pivot_sql, 16 no_tablesample_sql, 17 no_trycast_sql, 18 rename_func, 19 str_position_sql, 20 timestamptrunc_sql, 21 trim_sql, 22) 23from sqlglot.helper import seq_get 24from sqlglot.parser import binary_range_parser 25from sqlglot.tokens import TokenType 26 27DATE_DIFF_FACTOR = { 28 "MICROSECOND": " * 1000000", 29 "MILLISECOND": " * 1000", 30 "SECOND": "", 31 "MINUTE": " / 60", 32 "HOUR": " / 3600", 33 "DAY": " / 86400", 34} 35 36 37def _date_add_sql(kind): 38 def func(self, expression): 39 from sqlglot.optimizer.simplify import simplify 40 41 this = self.sql(expression, "this") 42 unit = expression.args.get("unit") 43 expression = simplify(expression.args["expression"]) 44 45 if not isinstance(expression, exp.Literal): 46 self.unsupported("Cannot add non literal") 47 48 expression = expression.copy() 49 expression.args["is_string"] = True 50 return f"{this} {kind} {self.sql(exp.Interval(this=expression, unit=unit))}" 51 52 return func 53 54 55def _date_diff_sql(self, expression): 56 unit = expression.text("unit").upper() 57 factor = DATE_DIFF_FACTOR.get(unit) 58 59 end = f"CAST({expression.this} AS TIMESTAMP)" 60 start = f"CAST({expression.expression} AS TIMESTAMP)" 61 62 if factor is not None: 63 return f"CAST(EXTRACT(epoch FROM {end} - {start}){factor} AS BIGINT)" 64 65 age = f"AGE({end}, {start})" 66 67 if unit == "WEEK": 68 unit = f"EXTRACT(year FROM {age}) * 48 + EXTRACT(month FROM {age}) * 4 + EXTRACT(day FROM {age}) / 7" 69 elif unit == "MONTH": 70 unit = f"EXTRACT(year FROM {age}) * 12 + EXTRACT(month FROM {age})" 71 elif unit == "QUARTER": 72 unit = f"EXTRACT(year FROM {age}) * 4 + EXTRACT(month FROM {age}) / 3" 73 elif unit == "YEAR": 74 unit = f"EXTRACT(year FROM {age})" 75 else: 76 unit = age 77 78 return f"CAST({unit} AS BIGINT)" 79 80 81def _substring_sql(self, expression): 82 this = self.sql(expression, "this") 83 start = self.sql(expression, "start") 84 length = self.sql(expression, "length") 85 86 from_part = f" FROM {start}" if start else "" 87 for_part = f" FOR {length}" if length else "" 88 89 return f"SUBSTRING({this}{from_part}{for_part})" 90 91 92def _string_agg_sql(self, expression): 93 expression = expression.copy() 94 separator = expression.args.get("separator") or exp.Literal.string(",") 95 96 order = "" 97 this = expression.this 98 if isinstance(this, exp.Order): 99 if this.this: 100 this = this.this.pop() 101 order = self.sql(expression.this) # Order has a leading space 102 103 return f"STRING_AGG({self.format_args(this, separator)}{order})" 104 105 106def _datatype_sql(self, expression): 107 if expression.this == exp.DataType.Type.ARRAY: 108 return f"{self.expressions(expression, flat=True)}[]" 109 return self.datatype_sql(expression) 110 111 112def _auto_increment_to_serial(expression): 113 auto = expression.find(exp.AutoIncrementColumnConstraint) 114 115 if auto: 116 expression = expression.copy() 117 expression.args["constraints"].remove(auto.parent) 118 kind = expression.args["kind"] 119 120 if kind.this == exp.DataType.Type.INT: 121 kind.replace(exp.DataType(this=exp.DataType.Type.SERIAL)) 122 elif kind.this == exp.DataType.Type.SMALLINT: 123 kind.replace(exp.DataType(this=exp.DataType.Type.SMALLSERIAL)) 124 elif kind.this == exp.DataType.Type.BIGINT: 125 kind.replace(exp.DataType(this=exp.DataType.Type.BIGSERIAL)) 126 127 return expression 128 129 130def _serial_to_generated(expression): 131 kind = expression.args["kind"] 132 133 if kind.this == exp.DataType.Type.SERIAL: 134 data_type = exp.DataType(this=exp.DataType.Type.INT) 135 elif kind.this == exp.DataType.Type.SMALLSERIAL: 136 data_type = exp.DataType(this=exp.DataType.Type.SMALLINT) 137 elif kind.this == exp.DataType.Type.BIGSERIAL: 138 data_type = exp.DataType(this=exp.DataType.Type.BIGINT) 139 else: 140 data_type = None 141 142 if data_type: 143 expression = expression.copy() 144 expression.args["kind"].replace(data_type) 145 constraints = expression.args["constraints"] 146 generated = exp.ColumnConstraint(kind=exp.GeneratedAsIdentityColumnConstraint(this=False)) 147 notnull = exp.ColumnConstraint(kind=exp.NotNullColumnConstraint()) 148 if notnull not in constraints: 149 constraints.insert(0, notnull) 150 if generated not in constraints: 151 constraints.insert(0, generated) 152 153 return expression 154 155 156def _generate_series(args): 157 # The goal is to convert step values like '1 day' or INTERVAL '1 day' into INTERVAL '1' day 158 step = seq_get(args, 2) 159 160 if step is None: 161 # Postgres allows calls with just two arguments -- the "step" argument defaults to 1 162 return exp.GenerateSeries.from_arg_list(args) 163 164 if step.is_string: 165 args[2] = exp.to_interval(step.this) 166 elif isinstance(step, exp.Interval) and not step.args.get("unit"): 167 args[2] = exp.to_interval(step.this.this) 168 169 return exp.GenerateSeries.from_arg_list(args) 170 171 172def _to_timestamp(args): 173 # TO_TIMESTAMP accepts either a single double argument or (text, text) 174 if len(args) == 1: 175 # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TABLE 176 return exp.UnixToTime.from_arg_list(args) 177 # https://www.postgresql.org/docs/current/functions-formatting.html 178 return format_time_lambda(exp.StrToTime, "postgres")(args) 179 180 181class Postgres(Dialect): 182 null_ordering = "nulls_are_large" 183 time_format = "'YYYY-MM-DD HH24:MI:SS'" 184 time_mapping = { 185 "AM": "%p", 186 "PM": "%p", 187 "D": "%u", # 1-based day of week 188 "DD": "%d", # day of month 189 "DDD": "%j", # zero padded day of year 190 "FMDD": "%-d", # - is no leading zero for Python; same for FM in postgres 191 "FMDDD": "%-j", # day of year 192 "FMHH12": "%-I", # 9 193 "FMHH24": "%-H", # 9 194 "FMMI": "%-M", # Minute 195 "FMMM": "%-m", # 1 196 "FMSS": "%-S", # Second 197 "HH12": "%I", # 09 198 "HH24": "%H", # 09 199 "MI": "%M", # zero padded minute 200 "MM": "%m", # 01 201 "OF": "%z", # utc offset 202 "SS": "%S", # zero padded second 203 "TMDay": "%A", # TM is locale dependent 204 "TMDy": "%a", 205 "TMMon": "%b", # Sep 206 "TMMonth": "%B", # September 207 "TZ": "%Z", # uppercase timezone name 208 "US": "%f", # zero padded microsecond 209 "WW": "%U", # 1-based week of year 210 "YY": "%y", # 15 211 "YYYY": "%Y", # 2015 212 } 213 214 class Tokenizer(tokens.Tokenizer): 215 QUOTES = ["'", "$$"] 216 217 BIT_STRINGS = [("b'", "'"), ("B'", "'")] 218 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 219 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 220 221 KEYWORDS = { 222 **tokens.Tokenizer.KEYWORDS, 223 "~~": TokenType.LIKE, 224 "~~*": TokenType.ILIKE, 225 "~*": TokenType.IRLIKE, 226 "~": TokenType.RLIKE, 227 "@>": TokenType.AT_GT, 228 "<@": TokenType.LT_AT, 229 "BEGIN": TokenType.COMMAND, 230 "BEGIN TRANSACTION": TokenType.BEGIN, 231 "BIGSERIAL": TokenType.BIGSERIAL, 232 "CHARACTER VARYING": TokenType.VARCHAR, 233 "DECLARE": TokenType.COMMAND, 234 "DO": TokenType.COMMAND, 235 "HSTORE": TokenType.HSTORE, 236 "JSONB": TokenType.JSONB, 237 "REFRESH": TokenType.COMMAND, 238 "REINDEX": TokenType.COMMAND, 239 "RESET": TokenType.COMMAND, 240 "RETURNING": TokenType.RETURNING, 241 "REVOKE": TokenType.COMMAND, 242 "SERIAL": TokenType.SERIAL, 243 "SMALLSERIAL": TokenType.SMALLSERIAL, 244 "TEMP": TokenType.TEMPORARY, 245 "CSTRING": TokenType.PSEUDO_TYPE, 246 } 247 248 SINGLE_TOKENS = { 249 **tokens.Tokenizer.SINGLE_TOKENS, 250 "$": TokenType.PARAMETER, 251 } 252 253 VAR_SINGLE_TOKENS = {"$"} 254 255 class Parser(parser.Parser): 256 STRICT_CAST = False 257 258 FUNCTIONS = { 259 **parser.Parser.FUNCTIONS, 260 "DATE_TRUNC": lambda args: exp.TimestampTrunc( 261 this=seq_get(args, 1), unit=seq_get(args, 0) 262 ), 263 "GENERATE_SERIES": _generate_series, 264 "NOW": exp.CurrentTimestamp.from_arg_list, 265 "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"), 266 "TO_TIMESTAMP": _to_timestamp, 267 } 268 269 FUNCTION_PARSERS = { 270 **parser.Parser.FUNCTION_PARSERS, 271 "DATE_PART": lambda self: self._parse_date_part(), 272 } 273 274 BITWISE = { 275 **parser.Parser.BITWISE, 276 TokenType.HASH: exp.BitwiseXor, 277 } 278 279 EXPONENT = { 280 TokenType.CARET: exp.Pow, 281 } 282 283 RANGE_PARSERS = { 284 **parser.Parser.RANGE_PARSERS, 285 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 286 TokenType.AT_GT: binary_range_parser(exp.ArrayContains), 287 TokenType.LT_AT: binary_range_parser(exp.ArrayContained), 288 } 289 290 def _parse_factor(self) -> t.Optional[exp.Expression]: 291 return self._parse_tokens(self._parse_exponent, self.FACTOR) 292 293 def _parse_exponent(self) -> t.Optional[exp.Expression]: 294 return self._parse_tokens(self._parse_unary, self.EXPONENT) 295 296 def _parse_date_part(self) -> exp.Expression: 297 part = self._parse_type() 298 self._match(TokenType.COMMA) 299 value = self._parse_bitwise() 300 301 if part and part.is_string: 302 part = exp.Var(this=part.name) 303 304 return self.expression(exp.Extract, this=part, expression=value) 305 306 class Generator(generator.Generator): 307 SINGLE_STRING_INTERVAL = True 308 LOCKING_READS_SUPPORTED = True 309 JOIN_HINTS = False 310 TABLE_HINTS = False 311 PARAMETER_TOKEN = "$" 312 313 TYPE_MAPPING = { 314 **generator.Generator.TYPE_MAPPING, 315 exp.DataType.Type.TINYINT: "SMALLINT", 316 exp.DataType.Type.FLOAT: "REAL", 317 exp.DataType.Type.DOUBLE: "DOUBLE PRECISION", 318 exp.DataType.Type.BINARY: "BYTEA", 319 exp.DataType.Type.VARBINARY: "BYTEA", 320 exp.DataType.Type.DATETIME: "TIMESTAMP", 321 } 322 323 TRANSFORMS = { 324 **generator.Generator.TRANSFORMS, 325 exp.BitwiseXor: lambda self, e: self.binary(e, "#"), 326 exp.ColumnDef: transforms.preprocess( 327 [ 328 _auto_increment_to_serial, 329 _serial_to_generated, 330 ], 331 ), 332 exp.JSONExtract: arrow_json_extract_sql, 333 exp.JSONExtractScalar: arrow_json_extract_scalar_sql, 334 exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), 335 exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), 336 exp.JSONBContains: lambda self, e: self.binary(e, "?"), 337 exp.Pow: lambda self, e: self.binary(e, "^"), 338 exp.CurrentDate: no_paren_current_date_sql, 339 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 340 exp.DateAdd: _date_add_sql("+"), 341 exp.DateStrToDate: datestrtodate_sql, 342 exp.DateSub: _date_add_sql("-"), 343 exp.DateDiff: _date_diff_sql, 344 exp.LogicalOr: rename_func("BOOL_OR"), 345 exp.LogicalAnd: rename_func("BOOL_AND"), 346 exp.Max: max_or_greatest, 347 exp.Min: min_or_least, 348 exp.ArrayOverlaps: lambda self, e: self.binary(e, "&&"), 349 exp.ArrayContains: lambda self, e: self.binary(e, "@>"), 350 exp.ArrayContained: lambda self, e: self.binary(e, "<@"), 351 exp.Merge: transforms.preprocess([transforms.remove_target_from_merge]), 352 exp.Pivot: no_pivot_sql, 353 exp.RegexpLike: lambda self, e: self.binary(e, "~"), 354 exp.RegexpILike: lambda self, e: self.binary(e, "~*"), 355 exp.StrPosition: str_position_sql, 356 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 357 exp.Substring: _substring_sql, 358 exp.TimestampTrunc: timestamptrunc_sql, 359 exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)", 360 exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", 361 exp.TableSample: no_tablesample_sql, 362 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 363 exp.Trim: trim_sql, 364 exp.TryCast: no_trycast_sql, 365 exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})", 366 exp.DataType: _datatype_sql, 367 exp.GroupConcat: _string_agg_sql, 368 exp.Array: lambda self, e: f"{self.normalize_func('ARRAY')}({self.sql(e.expressions[0])})" 369 if isinstance(seq_get(e.expressions, 0), exp.Select) 370 else f"{self.normalize_func('ARRAY')}[{self.expressions(e, flat=True)}]", 371 } 372 373 PROPERTIES_LOCATION = { 374 **generator.Generator.PROPERTIES_LOCATION, 375 exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, 376 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 377 }
182class Postgres(Dialect): 183 null_ordering = "nulls_are_large" 184 time_format = "'YYYY-MM-DD HH24:MI:SS'" 185 time_mapping = { 186 "AM": "%p", 187 "PM": "%p", 188 "D": "%u", # 1-based day of week 189 "DD": "%d", # day of month 190 "DDD": "%j", # zero padded day of year 191 "FMDD": "%-d", # - is no leading zero for Python; same for FM in postgres 192 "FMDDD": "%-j", # day of year 193 "FMHH12": "%-I", # 9 194 "FMHH24": "%-H", # 9 195 "FMMI": "%-M", # Minute 196 "FMMM": "%-m", # 1 197 "FMSS": "%-S", # Second 198 "HH12": "%I", # 09 199 "HH24": "%H", # 09 200 "MI": "%M", # zero padded minute 201 "MM": "%m", # 01 202 "OF": "%z", # utc offset 203 "SS": "%S", # zero padded second 204 "TMDay": "%A", # TM is locale dependent 205 "TMDy": "%a", 206 "TMMon": "%b", # Sep 207 "TMMonth": "%B", # September 208 "TZ": "%Z", # uppercase timezone name 209 "US": "%f", # zero padded microsecond 210 "WW": "%U", # 1-based week of year 211 "YY": "%y", # 15 212 "YYYY": "%Y", # 2015 213 } 214 215 class Tokenizer(tokens.Tokenizer): 216 QUOTES = ["'", "$$"] 217 218 BIT_STRINGS = [("b'", "'"), ("B'", "'")] 219 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 220 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 221 222 KEYWORDS = { 223 **tokens.Tokenizer.KEYWORDS, 224 "~~": TokenType.LIKE, 225 "~~*": TokenType.ILIKE, 226 "~*": TokenType.IRLIKE, 227 "~": TokenType.RLIKE, 228 "@>": TokenType.AT_GT, 229 "<@": TokenType.LT_AT, 230 "BEGIN": TokenType.COMMAND, 231 "BEGIN TRANSACTION": TokenType.BEGIN, 232 "BIGSERIAL": TokenType.BIGSERIAL, 233 "CHARACTER VARYING": TokenType.VARCHAR, 234 "DECLARE": TokenType.COMMAND, 235 "DO": TokenType.COMMAND, 236 "HSTORE": TokenType.HSTORE, 237 "JSONB": TokenType.JSONB, 238 "REFRESH": TokenType.COMMAND, 239 "REINDEX": TokenType.COMMAND, 240 "RESET": TokenType.COMMAND, 241 "RETURNING": TokenType.RETURNING, 242 "REVOKE": TokenType.COMMAND, 243 "SERIAL": TokenType.SERIAL, 244 "SMALLSERIAL": TokenType.SMALLSERIAL, 245 "TEMP": TokenType.TEMPORARY, 246 "CSTRING": TokenType.PSEUDO_TYPE, 247 } 248 249 SINGLE_TOKENS = { 250 **tokens.Tokenizer.SINGLE_TOKENS, 251 "$": TokenType.PARAMETER, 252 } 253 254 VAR_SINGLE_TOKENS = {"$"} 255 256 class Parser(parser.Parser): 257 STRICT_CAST = False 258 259 FUNCTIONS = { 260 **parser.Parser.FUNCTIONS, 261 "DATE_TRUNC": lambda args: exp.TimestampTrunc( 262 this=seq_get(args, 1), unit=seq_get(args, 0) 263 ), 264 "GENERATE_SERIES": _generate_series, 265 "NOW": exp.CurrentTimestamp.from_arg_list, 266 "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"), 267 "TO_TIMESTAMP": _to_timestamp, 268 } 269 270 FUNCTION_PARSERS = { 271 **parser.Parser.FUNCTION_PARSERS, 272 "DATE_PART": lambda self: self._parse_date_part(), 273 } 274 275 BITWISE = { 276 **parser.Parser.BITWISE, 277 TokenType.HASH: exp.BitwiseXor, 278 } 279 280 EXPONENT = { 281 TokenType.CARET: exp.Pow, 282 } 283 284 RANGE_PARSERS = { 285 **parser.Parser.RANGE_PARSERS, 286 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 287 TokenType.AT_GT: binary_range_parser(exp.ArrayContains), 288 TokenType.LT_AT: binary_range_parser(exp.ArrayContained), 289 } 290 291 def _parse_factor(self) -> t.Optional[exp.Expression]: 292 return self._parse_tokens(self._parse_exponent, self.FACTOR) 293 294 def _parse_exponent(self) -> t.Optional[exp.Expression]: 295 return self._parse_tokens(self._parse_unary, self.EXPONENT) 296 297 def _parse_date_part(self) -> exp.Expression: 298 part = self._parse_type() 299 self._match(TokenType.COMMA) 300 value = self._parse_bitwise() 301 302 if part and part.is_string: 303 part = exp.Var(this=part.name) 304 305 return self.expression(exp.Extract, this=part, expression=value) 306 307 class Generator(generator.Generator): 308 SINGLE_STRING_INTERVAL = True 309 LOCKING_READS_SUPPORTED = True 310 JOIN_HINTS = False 311 TABLE_HINTS = False 312 PARAMETER_TOKEN = "$" 313 314 TYPE_MAPPING = { 315 **generator.Generator.TYPE_MAPPING, 316 exp.DataType.Type.TINYINT: "SMALLINT", 317 exp.DataType.Type.FLOAT: "REAL", 318 exp.DataType.Type.DOUBLE: "DOUBLE PRECISION", 319 exp.DataType.Type.BINARY: "BYTEA", 320 exp.DataType.Type.VARBINARY: "BYTEA", 321 exp.DataType.Type.DATETIME: "TIMESTAMP", 322 } 323 324 TRANSFORMS = { 325 **generator.Generator.TRANSFORMS, 326 exp.BitwiseXor: lambda self, e: self.binary(e, "#"), 327 exp.ColumnDef: transforms.preprocess( 328 [ 329 _auto_increment_to_serial, 330 _serial_to_generated, 331 ], 332 ), 333 exp.JSONExtract: arrow_json_extract_sql, 334 exp.JSONExtractScalar: arrow_json_extract_scalar_sql, 335 exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), 336 exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), 337 exp.JSONBContains: lambda self, e: self.binary(e, "?"), 338 exp.Pow: lambda self, e: self.binary(e, "^"), 339 exp.CurrentDate: no_paren_current_date_sql, 340 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 341 exp.DateAdd: _date_add_sql("+"), 342 exp.DateStrToDate: datestrtodate_sql, 343 exp.DateSub: _date_add_sql("-"), 344 exp.DateDiff: _date_diff_sql, 345 exp.LogicalOr: rename_func("BOOL_OR"), 346 exp.LogicalAnd: rename_func("BOOL_AND"), 347 exp.Max: max_or_greatest, 348 exp.Min: min_or_least, 349 exp.ArrayOverlaps: lambda self, e: self.binary(e, "&&"), 350 exp.ArrayContains: lambda self, e: self.binary(e, "@>"), 351 exp.ArrayContained: lambda self, e: self.binary(e, "<@"), 352 exp.Merge: transforms.preprocess([transforms.remove_target_from_merge]), 353 exp.Pivot: no_pivot_sql, 354 exp.RegexpLike: lambda self, e: self.binary(e, "~"), 355 exp.RegexpILike: lambda self, e: self.binary(e, "~*"), 356 exp.StrPosition: str_position_sql, 357 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 358 exp.Substring: _substring_sql, 359 exp.TimestampTrunc: timestamptrunc_sql, 360 exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)", 361 exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", 362 exp.TableSample: no_tablesample_sql, 363 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 364 exp.Trim: trim_sql, 365 exp.TryCast: no_trycast_sql, 366 exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})", 367 exp.DataType: _datatype_sql, 368 exp.GroupConcat: _string_agg_sql, 369 exp.Array: lambda self, e: f"{self.normalize_func('ARRAY')}({self.sql(e.expressions[0])})" 370 if isinstance(seq_get(e.expressions, 0), exp.Select) 371 else f"{self.normalize_func('ARRAY')}[{self.expressions(e, flat=True)}]", 372 } 373 374 PROPERTIES_LOCATION = { 375 **generator.Generator.PROPERTIES_LOCATION, 376 exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, 377 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 378 }
215 class Tokenizer(tokens.Tokenizer): 216 QUOTES = ["'", "$$"] 217 218 BIT_STRINGS = [("b'", "'"), ("B'", "'")] 219 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 220 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 221 222 KEYWORDS = { 223 **tokens.Tokenizer.KEYWORDS, 224 "~~": TokenType.LIKE, 225 "~~*": TokenType.ILIKE, 226 "~*": TokenType.IRLIKE, 227 "~": TokenType.RLIKE, 228 "@>": TokenType.AT_GT, 229 "<@": TokenType.LT_AT, 230 "BEGIN": TokenType.COMMAND, 231 "BEGIN TRANSACTION": TokenType.BEGIN, 232 "BIGSERIAL": TokenType.BIGSERIAL, 233 "CHARACTER VARYING": TokenType.VARCHAR, 234 "DECLARE": TokenType.COMMAND, 235 "DO": TokenType.COMMAND, 236 "HSTORE": TokenType.HSTORE, 237 "JSONB": TokenType.JSONB, 238 "REFRESH": TokenType.COMMAND, 239 "REINDEX": TokenType.COMMAND, 240 "RESET": TokenType.COMMAND, 241 "RETURNING": TokenType.RETURNING, 242 "REVOKE": TokenType.COMMAND, 243 "SERIAL": TokenType.SERIAL, 244 "SMALLSERIAL": TokenType.SMALLSERIAL, 245 "TEMP": TokenType.TEMPORARY, 246 "CSTRING": TokenType.PSEUDO_TYPE, 247 } 248 249 SINGLE_TOKENS = { 250 **tokens.Tokenizer.SINGLE_TOKENS, 251 "$": TokenType.PARAMETER, 252 } 253 254 VAR_SINGLE_TOKENS = {"$"}
Inherited Members
256 class Parser(parser.Parser): 257 STRICT_CAST = False 258 259 FUNCTIONS = { 260 **parser.Parser.FUNCTIONS, 261 "DATE_TRUNC": lambda args: exp.TimestampTrunc( 262 this=seq_get(args, 1), unit=seq_get(args, 0) 263 ), 264 "GENERATE_SERIES": _generate_series, 265 "NOW": exp.CurrentTimestamp.from_arg_list, 266 "TO_CHAR": format_time_lambda(exp.TimeToStr, "postgres"), 267 "TO_TIMESTAMP": _to_timestamp, 268 } 269 270 FUNCTION_PARSERS = { 271 **parser.Parser.FUNCTION_PARSERS, 272 "DATE_PART": lambda self: self._parse_date_part(), 273 } 274 275 BITWISE = { 276 **parser.Parser.BITWISE, 277 TokenType.HASH: exp.BitwiseXor, 278 } 279 280 EXPONENT = { 281 TokenType.CARET: exp.Pow, 282 } 283 284 RANGE_PARSERS = { 285 **parser.Parser.RANGE_PARSERS, 286 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 287 TokenType.AT_GT: binary_range_parser(exp.ArrayContains), 288 TokenType.LT_AT: binary_range_parser(exp.ArrayContained), 289 } 290 291 def _parse_factor(self) -> t.Optional[exp.Expression]: 292 return self._parse_tokens(self._parse_exponent, self.FACTOR) 293 294 def _parse_exponent(self) -> t.Optional[exp.Expression]: 295 return self._parse_tokens(self._parse_unary, self.EXPONENT) 296 297 def _parse_date_part(self) -> exp.Expression: 298 part = self._parse_type() 299 self._match(TokenType.COMMA) 300 value = self._parse_bitwise() 301 302 if part and part.is_string: 303 part = exp.Var(this=part.name) 304 305 return self.expression(exp.Extract, this=part, expression=value)
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Inherited Members
307 class Generator(generator.Generator): 308 SINGLE_STRING_INTERVAL = True 309 LOCKING_READS_SUPPORTED = True 310 JOIN_HINTS = False 311 TABLE_HINTS = False 312 PARAMETER_TOKEN = "$" 313 314 TYPE_MAPPING = { 315 **generator.Generator.TYPE_MAPPING, 316 exp.DataType.Type.TINYINT: "SMALLINT", 317 exp.DataType.Type.FLOAT: "REAL", 318 exp.DataType.Type.DOUBLE: "DOUBLE PRECISION", 319 exp.DataType.Type.BINARY: "BYTEA", 320 exp.DataType.Type.VARBINARY: "BYTEA", 321 exp.DataType.Type.DATETIME: "TIMESTAMP", 322 } 323 324 TRANSFORMS = { 325 **generator.Generator.TRANSFORMS, 326 exp.BitwiseXor: lambda self, e: self.binary(e, "#"), 327 exp.ColumnDef: transforms.preprocess( 328 [ 329 _auto_increment_to_serial, 330 _serial_to_generated, 331 ], 332 ), 333 exp.JSONExtract: arrow_json_extract_sql, 334 exp.JSONExtractScalar: arrow_json_extract_scalar_sql, 335 exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), 336 exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), 337 exp.JSONBContains: lambda self, e: self.binary(e, "?"), 338 exp.Pow: lambda self, e: self.binary(e, "^"), 339 exp.CurrentDate: no_paren_current_date_sql, 340 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 341 exp.DateAdd: _date_add_sql("+"), 342 exp.DateStrToDate: datestrtodate_sql, 343 exp.DateSub: _date_add_sql("-"), 344 exp.DateDiff: _date_diff_sql, 345 exp.LogicalOr: rename_func("BOOL_OR"), 346 exp.LogicalAnd: rename_func("BOOL_AND"), 347 exp.Max: max_or_greatest, 348 exp.Min: min_or_least, 349 exp.ArrayOverlaps: lambda self, e: self.binary(e, "&&"), 350 exp.ArrayContains: lambda self, e: self.binary(e, "@>"), 351 exp.ArrayContained: lambda self, e: self.binary(e, "<@"), 352 exp.Merge: transforms.preprocess([transforms.remove_target_from_merge]), 353 exp.Pivot: no_pivot_sql, 354 exp.RegexpLike: lambda self, e: self.binary(e, "~"), 355 exp.RegexpILike: lambda self, e: self.binary(e, "~*"), 356 exp.StrPosition: str_position_sql, 357 exp.StrToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')}, {self.format_time(e)})", 358 exp.Substring: _substring_sql, 359 exp.TimestampTrunc: timestamptrunc_sql, 360 exp.TimeStrToTime: lambda self, e: f"CAST({self.sql(e, 'this')} AS TIMESTAMP)", 361 exp.TimeToStr: lambda self, e: f"TO_CHAR({self.sql(e, 'this')}, {self.format_time(e)})", 362 exp.TableSample: no_tablesample_sql, 363 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 364 exp.Trim: trim_sql, 365 exp.TryCast: no_trycast_sql, 366 exp.UnixToTime: lambda self, e: f"TO_TIMESTAMP({self.sql(e, 'this')})", 367 exp.DataType: _datatype_sql, 368 exp.GroupConcat: _string_agg_sql, 369 exp.Array: lambda self, e: f"{self.normalize_func('ARRAY')}({self.sql(e.expressions[0])})" 370 if isinstance(seq_get(e.expressions, 0), exp.Select) 371 else f"{self.normalize_func('ARRAY')}[{self.expressions(e, flat=True)}]", 372 } 373 374 PROPERTIES_LOCATION = { 375 **generator.Generator.PROPERTIES_LOCATION, 376 exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, 377 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 378 }
Generator interprets the given syntax tree and produces a SQL string as an output.
Arguments:
- time_mapping (dict): the dictionary of custom time mappings in which the key represents a python time format and the output the target time format
- time_trie (trie): a trie of the time_mapping keys
- pretty (bool): if set to True the returned string will be formatted. Default: False.
- quote_start (str): specifies which starting character to use to delimit quotes. Default: '.
- quote_end (str): specifies which ending character to use to delimit quotes. Default: '.
- identifier_start (str): specifies which starting character to use to delimit identifiers. Default: ".
- identifier_end (str): specifies which ending character to use to delimit identifiers. Default: ".
- bit_start (str): specifies which starting character to use to delimit bit literals. Default: None.
- bit_end (str): specifies which ending character to use to delimit bit literals. Default: None.
- hex_start (str): specifies which starting character to use to delimit hex literals. Default: None.
- hex_end (str): specifies which ending character to use to delimit hex literals. Default: None.
- byte_start (str): specifies which starting character to use to delimit byte literals. Default: None.
- byte_end (str): specifies which ending character to use to delimit byte literals. Default: None.
- identify (bool | str): 'always': always quote, 'safe': quote identifiers if they don't contain an upcase, True defaults to always.
- normalize (bool): if set to True all identifiers will lower cased
- string_escape (str): specifies a string escape character. Default: '.
- identifier_escape (str): specifies an identifier escape character. Default: ".
- pad (int): determines padding in a formatted string. Default: 2.
- indent (int): determines the size of indentation in a formatted string. Default: 4.
- unnest_column_only (bool): if true unnest table aliases are considered only as column aliases
- normalize_functions (str): normalize function names, "upper", "lower", or None Default: "upper"
- alias_post_tablesample (bool): if the table alias comes after tablesample Default: False
- unsupported_level (ErrorLevel): determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- null_ordering (str): Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
- max_unsupported (int): Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma (bool): if the the comma is leading or trailing in select statements Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- datatypesize_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- concat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- unique_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql