Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3 pygments.lexers.sql 

4 ~~~~~~~~~~~~~~~~~~~ 

5 

6 Lexers for various SQL dialects and related interactive sessions. 

7 

8 Postgres specific lexers: 

9 

10 `PostgresLexer` 

11 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL 

12 lexer are: 

13 

14 - keywords and data types list parsed from the PG docs (run the 

15 `_postgres_builtins` module to update them); 

16 - Content of $-strings parsed using a specific lexer, e.g. the content 

17 of a PL/Python function is parsed using the Python lexer; 

18 - parse PG specific constructs: E-strings, $-strings, U&-strings, 

19 different operators and punctuation. 

20 

21 `PlPgsqlLexer` 

22 A lexer for the PL/pgSQL language. Adds a few specific construct on 

23 top of the PG SQL lexer (such as <<label>>). 

24 

25 `PostgresConsoleLexer` 

26 A lexer to highlight an interactive psql session: 

27 

28 - identifies the prompt and does its best to detect the end of command 

29 in multiline statement where not all the lines are prefixed by a 

30 prompt, telling them apart from the output; 

31 - highlights errors in the output and notification levels; 

32 - handles psql backslash commands. 

33 

34 The ``tests/examplefiles`` contains a few test files with data to be 

35 parsed by these lexers. 

36 

37 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. 

38 :license: BSD, see LICENSE for details. 

39""" 

40 

41import re 

42 

43from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words 

44from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \ 

45 Keyword, Name, String, Number, Generic, Literal 

46from pygments.lexers import get_lexer_by_name, ClassNotFound 

47 

48from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \ 

49 PSEUDO_TYPES, PLPGSQL_KEYWORDS 

50from pygments.lexers._mysql_builtins import \ 

51 MYSQL_CONSTANTS, \ 

52 MYSQL_DATATYPES, \ 

53 MYSQL_FUNCTIONS, \ 

54 MYSQL_KEYWORDS, \ 

55 MYSQL_OPTIMIZER_HINTS 

56 

57from pygments.lexers import _tsql_builtins 

58 

59 

60__all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer', 

61 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer', 

62 'SqliteConsoleLexer', 'RqlLexer'] 

63 

64line_re = re.compile('.*?\n') 

65 

66language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE) 

67 

68do_re = re.compile(r'\bDO\b', re.IGNORECASE) 

69 

70# Regular expressions for analyse_text() 

71name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]') 

72name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`') 

73tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE) 

74tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE) 

75tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b') 

76 

77 

78def language_callback(lexer, match): 

79 """Parse the content of a $-string using a lexer 

80 

81 The lexer is chosen looking for a nearby LANGUAGE or assumed as 

82 plpgsql if inside a DO statement and no LANGUAGE has been found. 

83 """ 

84 lx = None 

85 m = language_re.match(lexer.text[match.end():match.end()+100]) 

86 if m is not None: 

87 lx = lexer._get_lexer(m.group(1)) 

88 else: 

89 m = list(language_re.finditer( 

90 lexer.text[max(0, match.start()-100):match.start()])) 

91 if m: 

92 lx = lexer._get_lexer(m[-1].group(1)) 

93 else: 

94 m = list(do_re.finditer( 

95 lexer.text[max(0, match.start()-25):match.start()])) 

96 if m: 

97 lx = lexer._get_lexer('plpgsql') 

98 

99 # 1 = $, 2 = delimiter, 3 = $ 

100 yield (match.start(1), String, match.group(1)) 

101 yield (match.start(2), String.Delimiter, match.group(2)) 

102 yield (match.start(3), String, match.group(3)) 

103 # 4 = string contents 

104 if lx: 

105 for x in lx.get_tokens_unprocessed(match.group(4)): 

106 yield x 

107 else: 

108 yield (match.start(4), String, match.group(4)) 

109 # 5 = $, 6 = delimiter, 7 = $ 

110 yield (match.start(5), String, match.group(5)) 

111 yield (match.start(6), String.Delimiter, match.group(6)) 

112 yield (match.start(7), String, match.group(7)) 

113 

114 

115class PostgresBase: 

116 """Base class for Postgres-related lexers. 

117 

118 This is implemented as a mixin to avoid the Lexer metaclass kicking in. 

119 this way the different lexer don't have a common Lexer ancestor. If they 

120 had, _tokens could be created on this ancestor and not updated for the 

121 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming 

122 seem to suggest that regexp lexers are not really subclassable. 

123 """ 

124 def get_tokens_unprocessed(self, text, *args): 

125 # Have a copy of the entire text to be used by `language_callback`. 

126 self.text = text 

127 yield from super().get_tokens_unprocessed(text, *args) 

128 

129 def _get_lexer(self, lang): 

130 if lang.lower() == 'sql': 

131 return get_lexer_by_name('postgresql', **self.options) 

132 

133 tries = [lang] 

134 if lang.startswith('pl'): 

135 tries.append(lang[2:]) 

136 if lang.endswith('u'): 

137 tries.append(lang[:-1]) 

138 if lang.startswith('pl') and lang.endswith('u'): 

139 tries.append(lang[2:-1]) 

140 

141 for lx in tries: 

142 try: 

143 return get_lexer_by_name(lx, **self.options) 

144 except ClassNotFound: 

145 pass 

146 else: 

147 # TODO: better logging 

148 # print >>sys.stderr, "language not found:", lang 

149 return None 

150 

151 

152class PostgresLexer(PostgresBase, RegexLexer): 

153 """ 

154 Lexer for the PostgreSQL dialect of SQL. 

155 

156 .. versionadded:: 1.5 

157 """ 

158 

159 name = 'PostgreSQL SQL dialect' 

160 aliases = ['postgresql', 'postgres'] 

161 mimetypes = ['text/x-postgresql'] 

162 

163 flags = re.IGNORECASE 

164 tokens = { 

165 'root': [ 

166 (r'\s+', Text), 

167 (r'--.*\n?', Comment.Single), 

168 (r'/\*', Comment.Multiline, 'multiline-comments'), 

169 (r'(' + '|'.join(s.replace(" ", r"\s+") 

170 for s in DATATYPES + PSEUDO_TYPES) + r')\b', 

171 Name.Builtin), 

172 (words(KEYWORDS, suffix=r'\b'), Keyword), 

173 (r'[+*/<>=~!@#%^&|`?-]+', Operator), 

174 (r'::', Operator), # cast 

175 (r'\$\d+', Name.Variable), 

176 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), 

177 (r'[0-9]+', Number.Integer), 

178 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'), 

179 # quoted identifier 

180 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'), 

181 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback), 

182 (r'[a-z_]\w*', Name), 

183 

184 # psql variable in SQL 

185 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable), 

186 

187 (r'[;:()\[\]{},.]', Punctuation), 

188 ], 

189 'multiline-comments': [ 

190 (r'/\*', Comment.Multiline, 'multiline-comments'), 

191 (r'\*/', Comment.Multiline, '#pop'), 

192 (r'[^/*]+', Comment.Multiline), 

193 (r'[/*]', Comment.Multiline) 

194 ], 

195 'string': [ 

196 (r"[^']+", String.Single), 

197 (r"''", String.Single), 

198 (r"'", String.Single, '#pop'), 

199 ], 

200 'quoted-ident': [ 

201 (r'[^"]+', String.Name), 

202 (r'""', String.Name), 

203 (r'"', String.Name, '#pop'), 

204 ], 

205 } 

206 

207 

208class PlPgsqlLexer(PostgresBase, RegexLexer): 

209 """ 

210 Handle the extra syntax in Pl/pgSQL language. 

211 

212 .. versionadded:: 1.5 

213 """ 

214 name = 'PL/pgSQL' 

215 aliases = ['plpgsql'] 

216 mimetypes = ['text/x-plpgsql'] 

217 

218 flags = re.IGNORECASE 

219 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()} 

220 

221 # extend the keywords list 

222 for i, pattern in enumerate(tokens['root']): 

223 if pattern[1] == Keyword: 

224 tokens['root'][i] = ( 

225 words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'), 

226 Keyword) 

227 del i 

228 break 

229 else: 

230 assert 0, "SQL keywords not found" 

231 

232 # Add specific PL/pgSQL rules (before the SQL ones) 

233 tokens['root'][:0] = [ 

234 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype 

235 (r':=', Operator), 

236 (r'\<\<[a-z]\w*\>\>', Name.Label), 

237 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict 

238 ] 

239 

240 

241class PsqlRegexLexer(PostgresBase, RegexLexer): 

242 """ 

243 Extend the PostgresLexer adding support specific for psql commands. 

244 

245 This is not a complete psql lexer yet as it lacks prompt support 

246 and output rendering. 

247 """ 

248 

249 name = 'PostgreSQL console - regexp based lexer' 

250 aliases = [] # not public 

251 

252 flags = re.IGNORECASE 

253 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()} 

254 

255 tokens['root'].append( 

256 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command')) 

257 tokens['psql-command'] = [ 

258 (r'\n', Text, 'root'), 

259 (r'\s+', Text), 

260 (r'\\[^\s]+', Keyword.Pseudo), 

261 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable), 

262 (r"'(''|[^'])*'", String.Single), 

263 (r"`([^`])*`", String.Backtick), 

264 (r"[^\s]+", String.Symbol), 

265 ] 

266 

267 

268re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]') 

269re_psql_command = re.compile(r'\s*\\') 

270re_end_command = re.compile(r';\s*(--.*?)?$') 

271re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$') 

272re_error = re.compile(r'(ERROR|FATAL):') 

273re_message = re.compile( 

274 r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|' 

275 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)') 

276 

277 

278class lookahead: 

279 """Wrap an iterator and allow pushing back an item.""" 

280 def __init__(self, x): 

281 self.iter = iter(x) 

282 self._nextitem = None 

283 

284 def __iter__(self): 

285 return self 

286 

287 def send(self, i): 

288 self._nextitem = i 

289 return i 

290 

291 def __next__(self): 

292 if self._nextitem is not None: 

293 ni = self._nextitem 

294 self._nextitem = None 

295 return ni 

296 return next(self.iter) 

297 next = __next__ 

298 

299 

300class PostgresConsoleLexer(Lexer): 

301 """ 

302 Lexer for psql sessions. 

303 

304 .. versionadded:: 1.5 

305 """ 

306 

307 name = 'PostgreSQL console (psql)' 

308 aliases = ['psql', 'postgresql-console', 'postgres-console'] 

309 mimetypes = ['text/x-postgresql-psql'] 

310 

311 def get_tokens_unprocessed(self, data): 

312 sql = PsqlRegexLexer(**self.options) 

313 

314 lines = lookahead(line_re.findall(data)) 

315 

316 # prompt-output cycle 

317 while 1: 

318 

319 # consume the lines of the command: start with an optional prompt 

320 # and continue until the end of command is detected 

321 curcode = '' 

322 insertions = [] 

323 for line in lines: 

324 # Identify a shell prompt in case of psql commandline example 

325 if line.startswith('$') and not curcode: 

326 lexer = get_lexer_by_name('console', **self.options) 

327 yield from lexer.get_tokens_unprocessed(line) 

328 break 

329 

330 # Identify a psql prompt 

331 mprompt = re_prompt.match(line) 

332 if mprompt is not None: 

333 insertions.append((len(curcode), 

334 [(0, Generic.Prompt, mprompt.group())])) 

335 curcode += line[len(mprompt.group()):] 

336 else: 

337 curcode += line 

338 

339 # Check if this is the end of the command 

340 # TODO: better handle multiline comments at the end with 

341 # a lexer with an external state? 

342 if re_psql_command.match(curcode) \ 

343 or re_end_command.search(curcode): 

344 break 

345 

346 # Emit the combined stream of command and prompt(s) 

347 yield from do_insertions(insertions, 

348 sql.get_tokens_unprocessed(curcode)) 

349 

350 # Emit the output lines 

351 out_token = Generic.Output 

352 for line in lines: 

353 mprompt = re_prompt.match(line) 

354 if mprompt is not None: 

355 # push the line back to have it processed by the prompt 

356 lines.send(line) 

357 break 

358 

359 mmsg = re_message.match(line) 

360 if mmsg is not None: 

361 if mmsg.group(1).startswith("ERROR") \ 

362 or mmsg.group(1).startswith("FATAL"): 

363 out_token = Generic.Error 

364 yield (mmsg.start(1), Generic.Strong, mmsg.group(1)) 

365 yield (mmsg.start(2), out_token, mmsg.group(2)) 

366 else: 

367 yield (0, out_token, line) 

368 else: 

369 return 

370 

371 

372class SqlLexer(RegexLexer): 

373 """ 

374 Lexer for Structured Query Language. Currently, this lexer does 

375 not recognize any special syntax except ANSI SQL. 

376 """ 

377 

378 name = 'SQL' 

379 aliases = ['sql'] 

380 filenames = ['*.sql'] 

381 mimetypes = ['text/x-sql'] 

382 

383 flags = re.IGNORECASE 

384 tokens = { 

385 'root': [ 

386 (r'\s+', Text), 

387 (r'--.*\n?', Comment.Single), 

388 (r'/\*', Comment.Multiline, 'multiline-comments'), 

389 (words(( 

390 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER', 

391 'AGGREGATE', 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE', 

392 'AND', 'ANY', 'ARE', 'AS', 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT', 

393 'ASYMMETRIC', 'AT', 'ATOMIC', 'AUTHORIZATION', 'AVG', 'BACKWARD', 

394 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR', 'BIT_LENGTH', 'BOTH', 'BREADTH', 

395 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY', 'CASCADE', 

396 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN', 

397 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG', 

398 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK', 

399 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE', 

400 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION', 

401 'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN', 

402 'COLUMN_NAME', 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT', 

403 'COMMIT', 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT', 

404 'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS', 

405 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA', 

406 'CONSTRUCTOR', 'CONTAINS', 'CONTINUE', 'CONVERSION', 'CONVERT', 

407 'COPY', 'CORRESPONDING', 'COUNT', 'CREATE', 'CREATEDB', 'CREATEUSER', 

408 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_PATH', 

409 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', 

410 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE', 

411 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY', 

412 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE', 

413 'DEFERRED', 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS', 

414 'DEREF', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR', 

415 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH', 

416 'DISTINCT', 'DO', 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION', 

417 'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELSE', 'ELSIF', 'ENCODING', 

418 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY', 'EXCEPTION', 

419 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING', 

420 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL', 

421 'FIRST', 'FOR', 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE', 

422 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET', 

423 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED', 'GROUP', 'GROUPING', 

424 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF', 

425 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT', 

426 'IN', 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX', 

427 'INHERITS', 'INITIALIZE', 'INITIALLY', 'INNER', 'INOUT', 'INPUT', 

428 'INSENSITIVE', 'INSERT', 'INSTANTIABLE', 'INSTEAD', 'INTERSECT', 'INTO', 

429 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN', 'KEY', 

430 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST', 

431 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT', 

432 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION', 

433 'LOCATOR', 'LOCK', 'LOWER', 'MAP', 'MATCH', 'MAX', 'MAXVALUE', 

434 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN', 

435 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES', 'MODIFY', 'MONTH', 

436 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR', 'NCLOB', 

437 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT', 

438 'NOTHING', 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT', 

439 'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY', 

440 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS', 'OR', 'ORDER', 

441 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY', 

442 'OVERRIDING', 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE', 

443 'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION', 

444 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME', 

445 'PARAMETER_SPECIFIC_SCHEMA', 'PARTIAL', 'PASCAL', 'PENDANT', 'PERIOD', 'PLACING', 

446 'PLI', 'POSITION', 'POSTFIX', 'PRECEEDS', 'PRECISION', 'PREFIX', 'PREORDER', 

447 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL', 

448 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF', 

449 'REFERENCES', 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME', 

450 'REPEATABLE', 'REPLACE', 'RESET', 'RESTART', 'RESTRICT', 'RESULT', 

451 'RETURN', 'RETURNED_LENGTH', 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE', 

452 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE', 

453 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS', 

454 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA', 'SCHEMA_NAME', 

455 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF', 

456 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER', 

457 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE', 

458 'SOME', 'SOURCE', 'SPACE', 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME', 

459 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG', 

460 'STABLE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDIN', 

461 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE', 'SUBCLASS_ORIGIN', 

462 'SUBLIST', 'SUBSTRING', 'SUCCEEDS', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM', 

463 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY', 

464 'TERMINATE', 'THAN', 'THEN', 'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR', 

465 'TIMEZONE_MINUTE', 'TO', 'TOAST', 'TRAILING', 'TRANSACTION', 

466 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE', 

467 'TRANSFORM', 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER', 

468 'TRIGGER_CATALOG', 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE', 

469 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED', 

470 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 'UNNAMED', 'UNNEST', 'UNTIL', 

471 'UPDATE', 'UPPER', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG', 

472 'USER_DEFINED_TYPE_NAME', 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM', 

473 'VALID', 'VALIDATOR', 'VALUES', 'VARIABLE', 'VERBOSE', 

474 'VERSION', 'VERSIONS', 'VERSIONING', 'VIEW', 

475 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WITH', 'WITHOUT', 'WORK', 

476 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'), 

477 Keyword), 

478 (words(( 

479 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR', 

480 'CHARACTER', 'DATE', 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER', 

481 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL', 'SERIAL', 'SMALLINT', 

482 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'), 

483 Name.Builtin), 

484 (r'[+*/<>=~!@#%^&|`?-]', Operator), 

485 (r'[0-9]+', Number.Integer), 

486 # TODO: Backslash escapes? 

487 (r"'(''|[^'])*'", String.Single), 

488 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL 

489 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle 

490 (r'[;:()\[\],.]', Punctuation) 

491 ], 

492 'multiline-comments': [ 

493 (r'/\*', Comment.Multiline, 'multiline-comments'), 

494 (r'\*/', Comment.Multiline, '#pop'), 

495 (r'[^/*]+', Comment.Multiline), 

496 (r'[/*]', Comment.Multiline) 

497 ] 

498 } 

499 

500 def analyse_text(text): 

501 return 0.01 

502 

503 

504class TransactSqlLexer(RegexLexer): 

505 """ 

506 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to 

507 SQL. 

508 

509 The list of keywords includes ODBC and keywords reserved for future use.. 

510 """ 

511 

512 name = 'Transact-SQL' 

513 aliases = ['tsql', 't-sql'] 

514 filenames = ['*.sql'] 

515 mimetypes = ['text/x-tsql'] 

516 

517 # Use re.UNICODE to allow non ASCII letters in names. 

518 flags = re.IGNORECASE | re.UNICODE 

519 tokens = { 

520 'root': [ 

521 (r'\s+', Whitespace), 

522 (r'--.*?$\n?', Comment.Single), 

523 (r'/\*', Comment.Multiline, 'multiline-comments'), 

524 (words(_tsql_builtins.OPERATORS), Operator), 

525 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word), 

526 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class), 

527 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function), 

528 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)), 

529 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword), 

530 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)), 

531 (r'0x[0-9a-f]+', Number.Hex), 

532 # Float variant 1, for example: 1., 1.e2, 1.2e3 

533 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), 

534 # Float variant 2, for example: .1, .1e2 

535 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), 

536 # Float variant 3, for example: 123e45 

537 (r'[0-9]+e[+-]?[0-9]+', Number.Float), 

538 (r'[0-9]+', Number.Integer), 

539 (r"'(''|[^'])*'", String.Single), 

540 (r'"(""|[^"])*"', String.Symbol), 

541 (r'[;(),.]', Punctuation), 

542 # Below we use \w even for the first "real" character because 

543 # tokens starting with a digit have already been recognized 

544 # as Number above. 

545 (r'@@\w+', Name.Builtin), 

546 (r'@\w+', Name.Variable), 

547 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)), 

548 (r'#?#?\w+', Name), # names for temp tables and anything else 

549 (r'\?', Name.Variable.Magic), # parameter for prepared statements 

550 ], 

551 'multiline-comments': [ 

552 (r'/\*', Comment.Multiline, 'multiline-comments'), 

553 (r'\*/', Comment.Multiline, '#pop'), 

554 (r'[^/*]+', Comment.Multiline), 

555 (r'[/*]', Comment.Multiline) 

556 ] 

557 } 

558 

559 def analyse_text(text): 

560 rating = 0 

561 if tsql_declare_re.search(text): 

562 # Found T-SQL variable declaration. 

563 rating = 1.0 

564 else: 

565 name_between_backtick_count = len( 

566 name_between_backtick_re.findall(text)) 

567 name_between_bracket_count = len( 

568 name_between_bracket_re.findall(text)) 

569 # We need to check if there are any names using 

570 # backticks or brackets, as otherwise both are 0 

571 # and 0 >= 2 * 0, so we would always assume it's true 

572 dialect_name_count = name_between_backtick_count + name_between_bracket_count 

573 if dialect_name_count >= 1 and \ 

574 name_between_bracket_count >= 2 * name_between_backtick_count: 

575 # Found at least twice as many [name] as `name`. 

576 rating += 0.5 

577 elif name_between_bracket_count > name_between_backtick_count: 

578 rating += 0.2 

579 elif name_between_bracket_count > 0: 

580 rating += 0.1 

581 if tsql_variable_re.search(text) is not None: 

582 rating += 0.1 

583 if tsql_go_re.search(text) is not None: 

584 rating += 0.1 

585 return rating 

586 

587 

588class MySqlLexer(RegexLexer): 

589 """The Oracle MySQL lexer. 

590 

591 This lexer does not attempt to maintain strict compatibility with 

592 MariaDB syntax or keywords. Although MySQL and MariaDB's common code 

593 history suggests there may be significant overlap between the two, 

594 compatibility between the two is not a target for this lexer. 

595 """ 

596 

597 name = 'MySQL' 

598 aliases = ['mysql'] 

599 mimetypes = ['text/x-mysql'] 

600 

601 flags = re.IGNORECASE 

602 tokens = { 

603 'root': [ 

604 (r'\s+', Text), 

605 

606 # Comments 

607 (r'(?:#|--\s+).*', Comment.Single), 

608 (r'/\*\+', Comment.Special, 'optimizer-hints'), 

609 (r'/\*', Comment.Multiline, 'multiline-comment'), 

610 

611 # Hexadecimal literals 

612 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form. 

613 (r'0x[0-9a-f]+', Number.Hex), 

614 

615 # Binary literals 

616 (r"b'[01]+'", Number.Bin), 

617 (r'0b[01]+', Number.Bin), 

618 

619 # Numeric literals 

620 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent 

621 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent 

622 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats 

623 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name 

624 

625 # Date literals 

626 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", 

627 Literal.Date), 

628 

629 # Time literals 

630 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", 

631 Literal.Date), 

632 

633 # Timestamp literals 

634 ( 

635 r"\{\s*ts\s*(?P<quote>['\"])\s*" 

636 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part 

637 r"\s+" # Whitespace between date and time 

638 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part 

639 r"\s*(?P=quote)\s*\}", 

640 Literal.Date 

641 ), 

642 

643 # String literals 

644 (r"'", String.Single, 'single-quoted-string'), 

645 (r'"', String.Double, 'double-quoted-string'), 

646 

647 # Variables 

648 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable), 

649 (r'@[a-z0-9_$.]+', Name.Variable), 

650 (r"@'", Name.Variable, 'single-quoted-variable'), 

651 (r'@"', Name.Variable, 'double-quoted-variable'), 

652 (r"@`", Name.Variable, 'backtick-quoted-variable'), 

653 (r'\?', Name.Variable), # For demonstrating prepared statements 

654 

655 # Operators 

656 (r'[!%&*+/:<=>^|~-]+', Operator), 

657 

658 # Exceptions; these words tokenize differently in different contexts. 

659 (r'\b(set)(?!\s*\()', Keyword), 

660 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Text, Keyword)), 

661 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES. 

662 

663 (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant), 

664 (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type), 

665 (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword), 

666 (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'), 

667 bygroups(Name.Function, Text, Punctuation)), 

668 

669 # Schema object names 

670 # 

671 # Note: Although the first regex supports unquoted all-numeric 

672 # identifiers, this will not be a problem in practice because 

673 # numeric literals have already been handled above. 

674 # 

675 ('[0-9a-z$_\u0080-\uffff]+', Name), 

676 (r'`', Name.Quoted, 'schema-object-name'), 

677 

678 # Punctuation 

679 (r'[(),.;]', Punctuation), 

680 ], 

681 

682 # Multiline comment substates 

683 # --------------------------- 

684 

685 'optimizer-hints': [ 

686 (r'[^*a-z]+', Comment.Special), 

687 (r'\*/', Comment.Special, '#pop'), 

688 (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc), 

689 ('[a-z]+', Comment.Special), 

690 (r'\*', Comment.Special), 

691 ], 

692 

693 'multiline-comment': [ 

694 (r'[^*]+', Comment.Multiline), 

695 (r'\*/', Comment.Multiline, '#pop'), 

696 (r'\*', Comment.Multiline), 

697 ], 

698 

699 # String substates 

700 # ---------------- 

701 

702 'single-quoted-string': [ 

703 (r"[^'\\]+", String.Single), 

704 (r"''", String.Escape), 

705 (r"""\\[0'"bnrtZ\\%_]""", String.Escape), 

706 (r"'", String.Single, '#pop'), 

707 ], 

708 

709 'double-quoted-string': [ 

710 (r'[^"\\]+', String.Double), 

711 (r'""', String.Escape), 

712 (r"""\\[0'"bnrtZ\\%_]""", String.Escape), 

713 (r'"', String.Double, '#pop'), 

714 ], 

715 

716 # Variable substates 

717 # ------------------ 

718 

719 'single-quoted-variable': [ 

720 (r"[^']+", Name.Variable), 

721 (r"''", Name.Variable), 

722 (r"'", Name.Variable, '#pop'), 

723 ], 

724 

725 'double-quoted-variable': [ 

726 (r'[^"]+', Name.Variable), 

727 (r'""', Name.Variable), 

728 (r'"', Name.Variable, '#pop'), 

729 ], 

730 

731 'backtick-quoted-variable': [ 

732 (r'[^`]+', Name.Variable), 

733 (r'``', Name.Variable), 

734 (r'`', Name.Variable, '#pop'), 

735 ], 

736 

737 # Schema object name substates 

738 # ---------------------------- 

739 # 

740 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but 

741 # formatters will style them as "Name" by default but add 

742 # additional styles based on the token name. This gives users 

743 # flexibility to add custom styles as desired. 

744 # 

745 'schema-object-name': [ 

746 (r'[^`]+', Name.Quoted), 

747 (r'``', Name.Quoted.Escape), 

748 (r'`', Name.Quoted, '#pop'), 

749 ], 

750 } 

751 

752 def analyse_text(text): 

753 rating = 0 

754 name_between_backtick_count = len( 

755 name_between_backtick_re.findall(text)) 

756 name_between_bracket_count = len( 

757 name_between_bracket_re.findall(text)) 

758 # Same logic as above in the TSQL analysis 

759 dialect_name_count = name_between_backtick_count + name_between_bracket_count 

760 if dialect_name_count >= 1 and \ 

761 name_between_backtick_count >= 2 * name_between_bracket_count: 

762 # Found at least twice as many `name` as [name]. 

763 rating += 0.5 

764 elif name_between_backtick_count > name_between_bracket_count: 

765 rating += 0.2 

766 elif name_between_backtick_count > 0: 

767 rating += 0.1 

768 return rating 

769 

770 

771class SqliteConsoleLexer(Lexer): 

772 """ 

773 Lexer for example sessions using sqlite3. 

774 

775 .. versionadded:: 0.11 

776 """ 

777 

778 name = 'sqlite3con' 

779 aliases = ['sqlite3'] 

780 filenames = ['*.sqlite3-console'] 

781 mimetypes = ['text/x-sqlite3-console'] 

782 

783 def get_tokens_unprocessed(self, data): 

784 sql = SqlLexer(**self.options) 

785 

786 curcode = '' 

787 insertions = [] 

788 for match in line_re.finditer(data): 

789 line = match.group() 

790 if line.startswith('sqlite> ') or line.startswith(' ...> '): 

791 insertions.append((len(curcode), 

792 [(0, Generic.Prompt, line[:8])])) 

793 curcode += line[8:] 

794 else: 

795 if curcode: 

796 yield from do_insertions(insertions, 

797 sql.get_tokens_unprocessed(curcode)) 

798 curcode = '' 

799 insertions = [] 

800 if line.startswith('SQL error: '): 

801 yield (match.start(), Generic.Traceback, line) 

802 else: 

803 yield (match.start(), Generic.Output, line) 

804 if curcode: 

805 yield from do_insertions(insertions, 

806 sql.get_tokens_unprocessed(curcode)) 

807 

808 

809class RqlLexer(RegexLexer): 

810 """ 

811 Lexer for Relation Query Language. 

812 

813 `RQL <http://www.logilab.org/project/rql>`_ 

814 

815 .. versionadded:: 2.0 

816 """ 

817 name = 'RQL' 

818 aliases = ['rql'] 

819 filenames = ['*.rql'] 

820 mimetypes = ['text/x-rql'] 

821 

822 flags = re.IGNORECASE 

823 tokens = { 

824 'root': [ 

825 (r'\s+', Text), 

826 (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR' 

827 r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET' 

828 r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword), 

829 (r'[+*/<>=%-]', Operator), 

830 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin), 

831 (r'[0-9]+', Number.Integer), 

832 (r'[A-Z_]\w*\??', Name), 

833 (r"'(''|[^'])*'", String.Single), 

834 (r'"(""|[^"])*"', String.Single), 

835 (r'[;:()\[\],.]', Punctuation) 

836 ], 

837 }