Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pygments/lexers/sql.py : 44%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3 pygments.lexers.sql
4 ~~~~~~~~~~~~~~~~~~~
6 Lexers for various SQL dialects and related interactive sessions.
8 Postgres specific lexers:
10 `PostgresLexer`
11 A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL
12 lexer are:
14 - keywords and data types list parsed from the PG docs (run the
15 `_postgres_builtins` module to update them);
16 - Content of $-strings parsed using a specific lexer, e.g. the content
17 of a PL/Python function is parsed using the Python lexer;
18 - parse PG specific constructs: E-strings, $-strings, U&-strings,
19 different operators and punctuation.
21 `PlPgsqlLexer`
22 A lexer for the PL/pgSQL language. Adds a few specific construct on
23 top of the PG SQL lexer (such as <<label>>).
25 `PostgresConsoleLexer`
26 A lexer to highlight an interactive psql session:
28 - identifies the prompt and does its best to detect the end of command
29 in multiline statement where not all the lines are prefixed by a
30 prompt, telling them apart from the output;
31 - highlights errors in the output and notification levels;
32 - handles psql backslash commands.
34 The ``tests/examplefiles`` contains a few test files with data to be
35 parsed by these lexers.
37 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
38 :license: BSD, see LICENSE for details.
39"""
41import re
43from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
44from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \
45 Keyword, Name, String, Number, Generic, Literal
46from pygments.lexers import get_lexer_by_name, ClassNotFound
48from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
49 PSEUDO_TYPES, PLPGSQL_KEYWORDS
50from pygments.lexers._mysql_builtins import \
51 MYSQL_CONSTANTS, \
52 MYSQL_DATATYPES, \
53 MYSQL_FUNCTIONS, \
54 MYSQL_KEYWORDS, \
55 MYSQL_OPTIMIZER_HINTS
57from pygments.lexers import _tsql_builtins
60__all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
61 'SqlLexer', 'TransactSqlLexer', 'MySqlLexer',
62 'SqliteConsoleLexer', 'RqlLexer']
64line_re = re.compile('.*?\n')
66language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
68do_re = re.compile(r'\bDO\b', re.IGNORECASE)
70# Regular expressions for analyse_text()
71name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]')
72name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`')
73tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE)
74tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE)
75tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b')
78def language_callback(lexer, match):
79 """Parse the content of a $-string using a lexer
81 The lexer is chosen looking for a nearby LANGUAGE or assumed as
82 plpgsql if inside a DO statement and no LANGUAGE has been found.
83 """
84 lx = None
85 m = language_re.match(lexer.text[match.end():match.end()+100])
86 if m is not None:
87 lx = lexer._get_lexer(m.group(1))
88 else:
89 m = list(language_re.finditer(
90 lexer.text[max(0, match.start()-100):match.start()]))
91 if m:
92 lx = lexer._get_lexer(m[-1].group(1))
93 else:
94 m = list(do_re.finditer(
95 lexer.text[max(0, match.start()-25):match.start()]))
96 if m:
97 lx = lexer._get_lexer('plpgsql')
99 # 1 = $, 2 = delimiter, 3 = $
100 yield (match.start(1), String, match.group(1))
101 yield (match.start(2), String.Delimiter, match.group(2))
102 yield (match.start(3), String, match.group(3))
103 # 4 = string contents
104 if lx:
105 for x in lx.get_tokens_unprocessed(match.group(4)):
106 yield x
107 else:
108 yield (match.start(4), String, match.group(4))
109 # 5 = $, 6 = delimiter, 7 = $
110 yield (match.start(5), String, match.group(5))
111 yield (match.start(6), String.Delimiter, match.group(6))
112 yield (match.start(7), String, match.group(7))
115class PostgresBase:
116 """Base class for Postgres-related lexers.
118 This is implemented as a mixin to avoid the Lexer metaclass kicking in.
119 this way the different lexer don't have a common Lexer ancestor. If they
120 had, _tokens could be created on this ancestor and not updated for the
121 other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming
122 seem to suggest that regexp lexers are not really subclassable.
123 """
124 def get_tokens_unprocessed(self, text, *args):
125 # Have a copy of the entire text to be used by `language_callback`.
126 self.text = text
127 yield from super().get_tokens_unprocessed(text, *args)
129 def _get_lexer(self, lang):
130 if lang.lower() == 'sql':
131 return get_lexer_by_name('postgresql', **self.options)
133 tries = [lang]
134 if lang.startswith('pl'):
135 tries.append(lang[2:])
136 if lang.endswith('u'):
137 tries.append(lang[:-1])
138 if lang.startswith('pl') and lang.endswith('u'):
139 tries.append(lang[2:-1])
141 for lx in tries:
142 try:
143 return get_lexer_by_name(lx, **self.options)
144 except ClassNotFound:
145 pass
146 else:
147 # TODO: better logging
148 # print >>sys.stderr, "language not found:", lang
149 return None
152class PostgresLexer(PostgresBase, RegexLexer):
153 """
154 Lexer for the PostgreSQL dialect of SQL.
156 .. versionadded:: 1.5
157 """
159 name = 'PostgreSQL SQL dialect'
160 aliases = ['postgresql', 'postgres']
161 mimetypes = ['text/x-postgresql']
163 flags = re.IGNORECASE
164 tokens = {
165 'root': [
166 (r'\s+', Text),
167 (r'--.*\n?', Comment.Single),
168 (r'/\*', Comment.Multiline, 'multiline-comments'),
169 (r'(' + '|'.join(s.replace(" ", r"\s+")
170 for s in DATATYPES + PSEUDO_TYPES) + r')\b',
171 Name.Builtin),
172 (words(KEYWORDS, suffix=r'\b'), Keyword),
173 (r'[+*/<>=~!@#%^&|`?-]+', Operator),
174 (r'::', Operator), # cast
175 (r'\$\d+', Name.Variable),
176 (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
177 (r'[0-9]+', Number.Integer),
178 (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
179 # quoted identifier
180 (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
181 (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
182 (r'[a-z_]\w*', Name),
184 # psql variable in SQL
185 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
187 (r'[;:()\[\]{},.]', Punctuation),
188 ],
189 'multiline-comments': [
190 (r'/\*', Comment.Multiline, 'multiline-comments'),
191 (r'\*/', Comment.Multiline, '#pop'),
192 (r'[^/*]+', Comment.Multiline),
193 (r'[/*]', Comment.Multiline)
194 ],
195 'string': [
196 (r"[^']+", String.Single),
197 (r"''", String.Single),
198 (r"'", String.Single, '#pop'),
199 ],
200 'quoted-ident': [
201 (r'[^"]+', String.Name),
202 (r'""', String.Name),
203 (r'"', String.Name, '#pop'),
204 ],
205 }
208class PlPgsqlLexer(PostgresBase, RegexLexer):
209 """
210 Handle the extra syntax in Pl/pgSQL language.
212 .. versionadded:: 1.5
213 """
214 name = 'PL/pgSQL'
215 aliases = ['plpgsql']
216 mimetypes = ['text/x-plpgsql']
218 flags = re.IGNORECASE
219 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}
221 # extend the keywords list
222 for i, pattern in enumerate(tokens['root']):
223 if pattern[1] == Keyword:
224 tokens['root'][i] = (
225 words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'),
226 Keyword)
227 del i
228 break
229 else:
230 assert 0, "SQL keywords not found"
232 # Add specific PL/pgSQL rules (before the SQL ones)
233 tokens['root'][:0] = [
234 (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype
235 (r':=', Operator),
236 (r'\<\<[a-z]\w*\>\>', Name.Label),
237 (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict
238 ]
241class PsqlRegexLexer(PostgresBase, RegexLexer):
242 """
243 Extend the PostgresLexer adding support specific for psql commands.
245 This is not a complete psql lexer yet as it lacks prompt support
246 and output rendering.
247 """
249 name = 'PostgreSQL console - regexp based lexer'
250 aliases = [] # not public
252 flags = re.IGNORECASE
253 tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}
255 tokens['root'].append(
256 (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
257 tokens['psql-command'] = [
258 (r'\n', Text, 'root'),
259 (r'\s+', Text),
260 (r'\\[^\s]+', Keyword.Pseudo),
261 (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
262 (r"'(''|[^'])*'", String.Single),
263 (r"`([^`])*`", String.Backtick),
264 (r"[^\s]+", String.Symbol),
265 ]
268re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')
269re_psql_command = re.compile(r'\s*\\')
270re_end_command = re.compile(r';\s*(--.*?)?$')
271re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')
272re_error = re.compile(r'(ERROR|FATAL):')
273re_message = re.compile(
274 r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|'
275 r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)')
278class lookahead:
279 """Wrap an iterator and allow pushing back an item."""
280 def __init__(self, x):
281 self.iter = iter(x)
282 self._nextitem = None
284 def __iter__(self):
285 return self
287 def send(self, i):
288 self._nextitem = i
289 return i
291 def __next__(self):
292 if self._nextitem is not None:
293 ni = self._nextitem
294 self._nextitem = None
295 return ni
296 return next(self.iter)
297 next = __next__
300class PostgresConsoleLexer(Lexer):
301 """
302 Lexer for psql sessions.
304 .. versionadded:: 1.5
305 """
307 name = 'PostgreSQL console (psql)'
308 aliases = ['psql', 'postgresql-console', 'postgres-console']
309 mimetypes = ['text/x-postgresql-psql']
311 def get_tokens_unprocessed(self, data):
312 sql = PsqlRegexLexer(**self.options)
314 lines = lookahead(line_re.findall(data))
316 # prompt-output cycle
317 while 1:
319 # consume the lines of the command: start with an optional prompt
320 # and continue until the end of command is detected
321 curcode = ''
322 insertions = []
323 for line in lines:
324 # Identify a shell prompt in case of psql commandline example
325 if line.startswith('$') and not curcode:
326 lexer = get_lexer_by_name('console', **self.options)
327 yield from lexer.get_tokens_unprocessed(line)
328 break
330 # Identify a psql prompt
331 mprompt = re_prompt.match(line)
332 if mprompt is not None:
333 insertions.append((len(curcode),
334 [(0, Generic.Prompt, mprompt.group())]))
335 curcode += line[len(mprompt.group()):]
336 else:
337 curcode += line
339 # Check if this is the end of the command
340 # TODO: better handle multiline comments at the end with
341 # a lexer with an external state?
342 if re_psql_command.match(curcode) \
343 or re_end_command.search(curcode):
344 break
346 # Emit the combined stream of command and prompt(s)
347 yield from do_insertions(insertions,
348 sql.get_tokens_unprocessed(curcode))
350 # Emit the output lines
351 out_token = Generic.Output
352 for line in lines:
353 mprompt = re_prompt.match(line)
354 if mprompt is not None:
355 # push the line back to have it processed by the prompt
356 lines.send(line)
357 break
359 mmsg = re_message.match(line)
360 if mmsg is not None:
361 if mmsg.group(1).startswith("ERROR") \
362 or mmsg.group(1).startswith("FATAL"):
363 out_token = Generic.Error
364 yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
365 yield (mmsg.start(2), out_token, mmsg.group(2))
366 else:
367 yield (0, out_token, line)
368 else:
369 return
372class SqlLexer(RegexLexer):
373 """
374 Lexer for Structured Query Language. Currently, this lexer does
375 not recognize any special syntax except ANSI SQL.
376 """
378 name = 'SQL'
379 aliases = ['sql']
380 filenames = ['*.sql']
381 mimetypes = ['text/x-sql']
383 flags = re.IGNORECASE
384 tokens = {
385 'root': [
386 (r'\s+', Text),
387 (r'--.*\n?', Comment.Single),
388 (r'/\*', Comment.Multiline, 'multiline-comments'),
389 (words((
390 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER',
391 'AGGREGATE', 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE',
392 'AND', 'ANY', 'ARE', 'AS', 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT',
393 'ASYMMETRIC', 'AT', 'ATOMIC', 'AUTHORIZATION', 'AVG', 'BACKWARD',
394 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR', 'BIT_LENGTH', 'BOTH', 'BREADTH',
395 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY', 'CASCADE',
396 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN',
397 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG',
398 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK',
399 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE',
400 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION',
401 'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN',
402 'COLUMN_NAME', 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT',
403 'COMMIT', 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT',
404 'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS',
405 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA',
406 'CONSTRUCTOR', 'CONTAINS', 'CONTINUE', 'CONVERSION', 'CONVERT',
407 'COPY', 'CORRESPONDING', 'COUNT', 'CREATE', 'CREATEDB', 'CREATEUSER',
408 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_PATH',
409 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
410 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE',
411 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY',
412 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE',
413 'DEFERRED', 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS',
414 'DEREF', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR',
415 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH',
416 'DISTINCT', 'DO', 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION',
417 'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELSE', 'ELSIF', 'ENCODING',
418 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY', 'EXCEPTION',
419 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING',
420 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL',
421 'FIRST', 'FOR', 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE',
422 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET',
423 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED', 'GROUP', 'GROUPING',
424 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF',
425 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT',
426 'IN', 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX',
427 'INHERITS', 'INITIALIZE', 'INITIALLY', 'INNER', 'INOUT', 'INPUT',
428 'INSENSITIVE', 'INSERT', 'INSTANTIABLE', 'INSTEAD', 'INTERSECT', 'INTO',
429 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN', 'KEY',
430 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST',
431 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT',
432 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION',
433 'LOCATOR', 'LOCK', 'LOWER', 'MAP', 'MATCH', 'MAX', 'MAXVALUE',
434 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN',
435 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES', 'MODIFY', 'MONTH',
436 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR', 'NCLOB',
437 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT',
438 'NOTHING', 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT',
439 'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY',
440 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS', 'OR', 'ORDER',
441 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY',
442 'OVERRIDING', 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE',
443 'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION',
444 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME',
445 'PARAMETER_SPECIFIC_SCHEMA', 'PARTIAL', 'PASCAL', 'PENDANT', 'PERIOD', 'PLACING',
446 'PLI', 'POSITION', 'POSTFIX', 'PRECEEDS', 'PRECISION', 'PREFIX', 'PREORDER',
447 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL',
448 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF',
449 'REFERENCES', 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME',
450 'REPEATABLE', 'REPLACE', 'RESET', 'RESTART', 'RESTRICT', 'RESULT',
451 'RETURN', 'RETURNED_LENGTH', 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE',
452 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE',
453 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS',
454 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA', 'SCHEMA_NAME',
455 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF',
456 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER',
457 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE',
458 'SOME', 'SOURCE', 'SPACE', 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME',
459 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG',
460 'STABLE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDIN',
461 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE', 'SUBCLASS_ORIGIN',
462 'SUBLIST', 'SUBSTRING', 'SUCCEEDS', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM',
463 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY',
464 'TERMINATE', 'THAN', 'THEN', 'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR',
465 'TIMEZONE_MINUTE', 'TO', 'TOAST', 'TRAILING', 'TRANSACTION',
466 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE',
467 'TRANSFORM', 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER',
468 'TRIGGER_CATALOG', 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE',
469 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED',
470 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 'UNNAMED', 'UNNEST', 'UNTIL',
471 'UPDATE', 'UPPER', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG',
472 'USER_DEFINED_TYPE_NAME', 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM',
473 'VALID', 'VALIDATOR', 'VALUES', 'VARIABLE', 'VERBOSE',
474 'VERSION', 'VERSIONS', 'VERSIONING', 'VIEW',
475 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WITH', 'WITHOUT', 'WORK',
476 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'),
477 Keyword),
478 (words((
479 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR',
480 'CHARACTER', 'DATE', 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER',
481 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL', 'SERIAL', 'SMALLINT',
482 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'),
483 Name.Builtin),
484 (r'[+*/<>=~!@#%^&|`?-]', Operator),
485 (r'[0-9]+', Number.Integer),
486 # TODO: Backslash escapes?
487 (r"'(''|[^'])*'", String.Single),
488 (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL
489 (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle
490 (r'[;:()\[\],.]', Punctuation)
491 ],
492 'multiline-comments': [
493 (r'/\*', Comment.Multiline, 'multiline-comments'),
494 (r'\*/', Comment.Multiline, '#pop'),
495 (r'[^/*]+', Comment.Multiline),
496 (r'[/*]', Comment.Multiline)
497 ]
498 }
500 def analyse_text(text):
501 return 0.01
504class TransactSqlLexer(RegexLexer):
505 """
506 Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to
507 SQL.
509 The list of keywords includes ODBC and keywords reserved for future use..
510 """
512 name = 'Transact-SQL'
513 aliases = ['tsql', 't-sql']
514 filenames = ['*.sql']
515 mimetypes = ['text/x-tsql']
517 # Use re.UNICODE to allow non ASCII letters in names.
518 flags = re.IGNORECASE | re.UNICODE
519 tokens = {
520 'root': [
521 (r'\s+', Whitespace),
522 (r'--.*?$\n?', Comment.Single),
523 (r'/\*', Comment.Multiline, 'multiline-comments'),
524 (words(_tsql_builtins.OPERATORS), Operator),
525 (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
526 (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),
527 (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),
528 (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),
529 (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),
530 (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),
531 (r'0x[0-9a-f]+', Number.Hex),
532 # Float variant 1, for example: 1., 1.e2, 1.2e3
533 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
534 # Float variant 2, for example: .1, .1e2
535 (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),
536 # Float variant 3, for example: 123e45
537 (r'[0-9]+e[+-]?[0-9]+', Number.Float),
538 (r'[0-9]+', Number.Integer),
539 (r"'(''|[^'])*'", String.Single),
540 (r'"(""|[^"])*"', String.Symbol),
541 (r'[;(),.]', Punctuation),
542 # Below we use \w even for the first "real" character because
543 # tokens starting with a digit have already been recognized
544 # as Number above.
545 (r'@@\w+', Name.Builtin),
546 (r'@\w+', Name.Variable),
547 (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),
548 (r'#?#?\w+', Name), # names for temp tables and anything else
549 (r'\?', Name.Variable.Magic), # parameter for prepared statements
550 ],
551 'multiline-comments': [
552 (r'/\*', Comment.Multiline, 'multiline-comments'),
553 (r'\*/', Comment.Multiline, '#pop'),
554 (r'[^/*]+', Comment.Multiline),
555 (r'[/*]', Comment.Multiline)
556 ]
557 }
559 def analyse_text(text):
560 rating = 0
561 if tsql_declare_re.search(text):
562 # Found T-SQL variable declaration.
563 rating = 1.0
564 else:
565 name_between_backtick_count = len(
566 name_between_backtick_re.findall(text))
567 name_between_bracket_count = len(
568 name_between_bracket_re.findall(text))
569 # We need to check if there are any names using
570 # backticks or brackets, as otherwise both are 0
571 # and 0 >= 2 * 0, so we would always assume it's true
572 dialect_name_count = name_between_backtick_count + name_between_bracket_count
573 if dialect_name_count >= 1 and \
574 name_between_bracket_count >= 2 * name_between_backtick_count:
575 # Found at least twice as many [name] as `name`.
576 rating += 0.5
577 elif name_between_bracket_count > name_between_backtick_count:
578 rating += 0.2
579 elif name_between_bracket_count > 0:
580 rating += 0.1
581 if tsql_variable_re.search(text) is not None:
582 rating += 0.1
583 if tsql_go_re.search(text) is not None:
584 rating += 0.1
585 return rating
588class MySqlLexer(RegexLexer):
589 """The Oracle MySQL lexer.
591 This lexer does not attempt to maintain strict compatibility with
592 MariaDB syntax or keywords. Although MySQL and MariaDB's common code
593 history suggests there may be significant overlap between the two,
594 compatibility between the two is not a target for this lexer.
595 """
597 name = 'MySQL'
598 aliases = ['mysql']
599 mimetypes = ['text/x-mysql']
601 flags = re.IGNORECASE
602 tokens = {
603 'root': [
604 (r'\s+', Text),
606 # Comments
607 (r'(?:#|--\s+).*', Comment.Single),
608 (r'/\*\+', Comment.Special, 'optimizer-hints'),
609 (r'/\*', Comment.Multiline, 'multiline-comment'),
611 # Hexadecimal literals
612 (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.
613 (r'0x[0-9a-f]+', Number.Hex),
615 # Binary literals
616 (r"b'[01]+'", Number.Bin),
617 (r'0b[01]+', Number.Bin),
619 # Numeric literals
620 (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
621 (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
622 (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
623 (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name
625 # Date literals
626 (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
627 Literal.Date),
629 # Time literals
630 (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
631 Literal.Date),
633 # Timestamp literals
634 (
635 r"\{\s*ts\s*(?P<quote>['\"])\s*"
636 r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part
637 r"\s+" # Whitespace between date and time
638 r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part
639 r"\s*(?P=quote)\s*\}",
640 Literal.Date
641 ),
643 # String literals
644 (r"'", String.Single, 'single-quoted-string'),
645 (r'"', String.Double, 'double-quoted-string'),
647 # Variables
648 (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
649 (r'@[a-z0-9_$.]+', Name.Variable),
650 (r"@'", Name.Variable, 'single-quoted-variable'),
651 (r'@"', Name.Variable, 'double-quoted-variable'),
652 (r"@`", Name.Variable, 'backtick-quoted-variable'),
653 (r'\?', Name.Variable), # For demonstrating prepared statements
655 # Operators
656 (r'[!%&*+/:<=>^|~-]+', Operator),
658 # Exceptions; these words tokenize differently in different contexts.
659 (r'\b(set)(?!\s*\()', Keyword),
660 (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Text, Keyword)),
661 # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.
663 (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant),
664 (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type),
665 (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword),
666 (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),
667 bygroups(Name.Function, Text, Punctuation)),
669 # Schema object names
670 #
671 # Note: Although the first regex supports unquoted all-numeric
672 # identifiers, this will not be a problem in practice because
673 # numeric literals have already been handled above.
674 #
675 ('[0-9a-z$_\u0080-\uffff]+', Name),
676 (r'`', Name.Quoted, 'schema-object-name'),
678 # Punctuation
679 (r'[(),.;]', Punctuation),
680 ],
682 # Multiline comment substates
683 # ---------------------------
685 'optimizer-hints': [
686 (r'[^*a-z]+', Comment.Special),
687 (r'\*/', Comment.Special, '#pop'),
688 (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc),
689 ('[a-z]+', Comment.Special),
690 (r'\*', Comment.Special),
691 ],
693 'multiline-comment': [
694 (r'[^*]+', Comment.Multiline),
695 (r'\*/', Comment.Multiline, '#pop'),
696 (r'\*', Comment.Multiline),
697 ],
699 # String substates
700 # ----------------
702 'single-quoted-string': [
703 (r"[^'\\]+", String.Single),
704 (r"''", String.Escape),
705 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
706 (r"'", String.Single, '#pop'),
707 ],
709 'double-quoted-string': [
710 (r'[^"\\]+', String.Double),
711 (r'""', String.Escape),
712 (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
713 (r'"', String.Double, '#pop'),
714 ],
716 # Variable substates
717 # ------------------
719 'single-quoted-variable': [
720 (r"[^']+", Name.Variable),
721 (r"''", Name.Variable),
722 (r"'", Name.Variable, '#pop'),
723 ],
725 'double-quoted-variable': [
726 (r'[^"]+', Name.Variable),
727 (r'""', Name.Variable),
728 (r'"', Name.Variable, '#pop'),
729 ],
731 'backtick-quoted-variable': [
732 (r'[^`]+', Name.Variable),
733 (r'``', Name.Variable),
734 (r'`', Name.Variable, '#pop'),
735 ],
737 # Schema object name substates
738 # ----------------------------
739 #
740 # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but
741 # formatters will style them as "Name" by default but add
742 # additional styles based on the token name. This gives users
743 # flexibility to add custom styles as desired.
744 #
745 'schema-object-name': [
746 (r'[^`]+', Name.Quoted),
747 (r'``', Name.Quoted.Escape),
748 (r'`', Name.Quoted, '#pop'),
749 ],
750 }
752 def analyse_text(text):
753 rating = 0
754 name_between_backtick_count = len(
755 name_between_backtick_re.findall(text))
756 name_between_bracket_count = len(
757 name_between_bracket_re.findall(text))
758 # Same logic as above in the TSQL analysis
759 dialect_name_count = name_between_backtick_count + name_between_bracket_count
760 if dialect_name_count >= 1 and \
761 name_between_backtick_count >= 2 * name_between_bracket_count:
762 # Found at least twice as many `name` as [name].
763 rating += 0.5
764 elif name_between_backtick_count > name_between_bracket_count:
765 rating += 0.2
766 elif name_between_backtick_count > 0:
767 rating += 0.1
768 return rating
771class SqliteConsoleLexer(Lexer):
772 """
773 Lexer for example sessions using sqlite3.
775 .. versionadded:: 0.11
776 """
778 name = 'sqlite3con'
779 aliases = ['sqlite3']
780 filenames = ['*.sqlite3-console']
781 mimetypes = ['text/x-sqlite3-console']
783 def get_tokens_unprocessed(self, data):
784 sql = SqlLexer(**self.options)
786 curcode = ''
787 insertions = []
788 for match in line_re.finditer(data):
789 line = match.group()
790 if line.startswith('sqlite> ') or line.startswith(' ...> '):
791 insertions.append((len(curcode),
792 [(0, Generic.Prompt, line[:8])]))
793 curcode += line[8:]
794 else:
795 if curcode:
796 yield from do_insertions(insertions,
797 sql.get_tokens_unprocessed(curcode))
798 curcode = ''
799 insertions = []
800 if line.startswith('SQL error: '):
801 yield (match.start(), Generic.Traceback, line)
802 else:
803 yield (match.start(), Generic.Output, line)
804 if curcode:
805 yield from do_insertions(insertions,
806 sql.get_tokens_unprocessed(curcode))
809class RqlLexer(RegexLexer):
810 """
811 Lexer for Relation Query Language.
813 `RQL <http://www.logilab.org/project/rql>`_
815 .. versionadded:: 2.0
816 """
817 name = 'RQL'
818 aliases = ['rql']
819 filenames = ['*.rql']
820 mimetypes = ['text/x-rql']
822 flags = re.IGNORECASE
823 tokens = {
824 'root': [
825 (r'\s+', Text),
826 (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR'
827 r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET'
828 r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword),
829 (r'[+*/<>=%-]', Operator),
830 (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin),
831 (r'[0-9]+', Number.Integer),
832 (r'[A-Z_]\w*\??', Name),
833 (r"'(''|[^'])*'", String.Single),
834 (r'"(""|[^"])*"', String.Single),
835 (r'[;:()\[\],.]', Punctuation)
836 ],
837 }