Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pygments/lexers/data.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3 pygments.lexers.data
4 ~~~~~~~~~~~~~~~~~~~~
6 Lexers for data file format.
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10"""
12import re
14from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, LexerContext, \
15 include, bygroups, inherit
16from pygments.token import Text, Comment, Keyword, Name, String, Number, \
17 Punctuation, Literal, Error
19__all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer']
22class YamlLexerContext(LexerContext):
23 """Indentation context for the YAML lexer."""
25 def __init__(self, *args, **kwds):
26 super().__init__(*args, **kwds)
27 self.indent_stack = []
28 self.indent = -1
29 self.next_indent = 0
30 self.block_scalar_indent = None
33class YamlLexer(ExtendedRegexLexer):
34 """
35 Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
36 language.
38 .. versionadded:: 0.11
39 """
41 name = 'YAML'
42 aliases = ['yaml']
43 filenames = ['*.yaml', '*.yml']
44 mimetypes = ['text/x-yaml']
46 def something(token_class):
47 """Do not produce empty tokens."""
48 def callback(lexer, match, context):
49 text = match.group()
50 if not text:
51 return
52 yield match.start(), token_class, text
53 context.pos = match.end()
54 return callback
56 def reset_indent(token_class):
57 """Reset the indentation levels."""
58 def callback(lexer, match, context):
59 text = match.group()
60 context.indent_stack = []
61 context.indent = -1
62 context.next_indent = 0
63 context.block_scalar_indent = None
64 yield match.start(), token_class, text
65 context.pos = match.end()
66 return callback
68 def save_indent(token_class, start=False):
69 """Save a possible indentation level."""
70 def callback(lexer, match, context):
71 text = match.group()
72 extra = ''
73 if start:
74 context.next_indent = len(text)
75 if context.next_indent < context.indent:
76 while context.next_indent < context.indent:
77 context.indent = context.indent_stack.pop()
78 if context.next_indent > context.indent:
79 extra = text[context.indent:]
80 text = text[:context.indent]
81 else:
82 context.next_indent += len(text)
83 if text:
84 yield match.start(), token_class, text
85 if extra:
86 yield match.start()+len(text), token_class.Error, extra
87 context.pos = match.end()
88 return callback
90 def set_indent(token_class, implicit=False):
91 """Set the previously saved indentation level."""
92 def callback(lexer, match, context):
93 text = match.group()
94 if context.indent < context.next_indent:
95 context.indent_stack.append(context.indent)
96 context.indent = context.next_indent
97 if not implicit:
98 context.next_indent += len(text)
99 yield match.start(), token_class, text
100 context.pos = match.end()
101 return callback
103 def set_block_scalar_indent(token_class):
104 """Set an explicit indentation level for a block scalar."""
105 def callback(lexer, match, context):
106 text = match.group()
107 context.block_scalar_indent = None
108 if not text:
109 return
110 increment = match.group(1)
111 if increment:
112 current_indent = max(context.indent, 0)
113 increment = int(increment)
114 context.block_scalar_indent = current_indent + increment
115 if text:
116 yield match.start(), token_class, text
117 context.pos = match.end()
118 return callback
120 def parse_block_scalar_empty_line(indent_token_class, content_token_class):
121 """Process an empty line in a block scalar."""
122 def callback(lexer, match, context):
123 text = match.group()
124 if (context.block_scalar_indent is None or
125 len(text) <= context.block_scalar_indent):
126 if text:
127 yield match.start(), indent_token_class, text
128 else:
129 indentation = text[:context.block_scalar_indent]
130 content = text[context.block_scalar_indent:]
131 yield match.start(), indent_token_class, indentation
132 yield (match.start()+context.block_scalar_indent,
133 content_token_class, content)
134 context.pos = match.end()
135 return callback
137 def parse_block_scalar_indent(token_class):
138 """Process indentation spaces in a block scalar."""
139 def callback(lexer, match, context):
140 text = match.group()
141 if context.block_scalar_indent is None:
142 if len(text) <= max(context.indent, 0):
143 context.stack.pop()
144 context.stack.pop()
145 return
146 context.block_scalar_indent = len(text)
147 else:
148 if len(text) < context.block_scalar_indent:
149 context.stack.pop()
150 context.stack.pop()
151 return
152 if text:
153 yield match.start(), token_class, text
154 context.pos = match.end()
155 return callback
157 def parse_plain_scalar_indent(token_class):
158 """Process indentation spaces in a plain scalar."""
159 def callback(lexer, match, context):
160 text = match.group()
161 if len(text) <= context.indent:
162 context.stack.pop()
163 context.stack.pop()
164 return
165 if text:
166 yield match.start(), token_class, text
167 context.pos = match.end()
168 return callback
170 tokens = {
171 # the root rules
172 'root': [
173 # ignored whitespaces
174 (r'[ ]+(?=#|$)', Text),
175 # line breaks
176 (r'\n+', Text),
177 # a comment
178 (r'#[^\n]*', Comment.Single),
179 # the '%YAML' directive
180 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
181 # the %TAG directive
182 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
183 # document start and document end indicators
184 (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
185 'block-line'),
186 # indentation spaces
187 (r'[ ]*(?!\s|$)', save_indent(Text, start=True),
188 ('block-line', 'indentation')),
189 ],
191 # trailing whitespaces after directives or a block scalar indicator
192 'ignored-line': [
193 # ignored whitespaces
194 (r'[ ]+(?=#|$)', Text),
195 # a comment
196 (r'#[^\n]*', Comment.Single),
197 # line break
198 (r'\n', Text, '#pop:2'),
199 ],
201 # the %YAML directive
202 'yaml-directive': [
203 # the version number
204 (r'([ ]+)([0-9]+\.[0-9]+)',
205 bygroups(Text, Number), 'ignored-line'),
206 ],
208 # the %TAG directive
209 'tag-directive': [
210 # a tag handle and the corresponding prefix
211 (r'([ ]+)(!|![\w-]*!)'
212 r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)',
213 bygroups(Text, Keyword.Type, Text, Keyword.Type),
214 'ignored-line'),
215 ],
217 # block scalar indicators and indentation spaces
218 'indentation': [
219 # trailing whitespaces are ignored
220 (r'[ ]*$', something(Text), '#pop:2'),
221 # whitespaces preceding block collection indicators
222 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
223 # block collection indicators
224 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
225 # the beginning a block line
226 (r'[ ]*', save_indent(Text), '#pop'),
227 ],
229 # an indented line in the block context
230 'block-line': [
231 # the line end
232 (r'[ ]*(?=#|$)', something(Text), '#pop'),
233 # whitespaces separating tokens
234 (r'[ ]+', Text),
235 # key with colon
236 (r'''([^#,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''',
237 bygroups(Name.Tag, set_indent(Punctuation, implicit=True))),
238 # tags, anchors and aliases,
239 include('descriptors'),
240 # block collections and scalars
241 include('block-nodes'),
242 # flow collections and quoted scalars
243 include('flow-nodes'),
244 # a plain scalar
245 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)',
246 something(Name.Variable),
247 'plain-scalar-in-block-context'),
248 ],
250 # tags, anchors, aliases
251 'descriptors': [
252 # a full-form tag
253 (r'!<[\w#;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type),
254 # a tag in the form '!', '!suffix' or '!handle!suffix'
255 (r'!(?:[\w-]+!)?'
256 r'[\w#;/?:@&=+$,.!~*\'()\[\]%-]*', Keyword.Type),
257 # an anchor
258 (r'&[\w-]+', Name.Label),
259 # an alias
260 (r'\*[\w-]+', Name.Variable),
261 ],
263 # block collections and scalars
264 'block-nodes': [
265 # implicit key
266 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
267 # literal and folded scalars
268 (r'[|>]', Punctuation.Indicator,
269 ('block-scalar-content', 'block-scalar-header')),
270 ],
272 # flow collections and quoted scalars
273 'flow-nodes': [
274 # a flow sequence
275 (r'\[', Punctuation.Indicator, 'flow-sequence'),
276 # a flow mapping
277 (r'\{', Punctuation.Indicator, 'flow-mapping'),
278 # a single-quoted scalar
279 (r'\'', String, 'single-quoted-scalar'),
280 # a double-quoted scalar
281 (r'\"', String, 'double-quoted-scalar'),
282 ],
284 # the content of a flow collection
285 'flow-collection': [
286 # whitespaces
287 (r'[ ]+', Text),
288 # line breaks
289 (r'\n+', Text),
290 # a comment
291 (r'#[^\n]*', Comment.Single),
292 # simple indicators
293 (r'[?:,]', Punctuation.Indicator),
294 # tags, anchors and aliases
295 include('descriptors'),
296 # nested collections and quoted scalars
297 include('flow-nodes'),
298 # a plain scalar
299 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])',
300 something(Name.Variable),
301 'plain-scalar-in-flow-context'),
302 ],
304 # a flow sequence indicated by '[' and ']'
305 'flow-sequence': [
306 # include flow collection rules
307 include('flow-collection'),
308 # the closing indicator
309 (r'\]', Punctuation.Indicator, '#pop'),
310 ],
312 # a flow mapping indicated by '{' and '}'
313 'flow-mapping': [
314 # key with colon
315 (r'''([^,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''',
316 bygroups(Name.Tag, Punctuation)),
317 # include flow collection rules
318 include('flow-collection'),
319 # the closing indicator
320 (r'\}', Punctuation.Indicator, '#pop'),
321 ],
323 # block scalar lines
324 'block-scalar-content': [
325 # line break
326 (r'\n', Text),
327 # empty line
328 (r'^[ ]+$',
329 parse_block_scalar_empty_line(Text, Name.Constant)),
330 # indentation spaces (we may leave the state here)
331 (r'^[ ]*', parse_block_scalar_indent(Text)),
332 # line content
333 (r'[\S\t ]+', Name.Constant),
334 ],
336 # the content of a literal or folded scalar
337 'block-scalar-header': [
338 # indentation indicator followed by chomping flag
339 (r'([1-9])?[+-]?(?=[ ]|$)',
340 set_block_scalar_indent(Punctuation.Indicator),
341 'ignored-line'),
342 # chomping flag followed by indentation indicator
343 (r'[+-]?([1-9])?(?=[ ]|$)',
344 set_block_scalar_indent(Punctuation.Indicator),
345 'ignored-line'),
346 ],
348 # ignored and regular whitespaces in quoted scalars
349 'quoted-scalar-whitespaces': [
350 # leading and trailing whitespaces are ignored
351 (r'^[ ]+', Text),
352 (r'[ ]+$', Text),
353 # line breaks are ignored
354 (r'\n+', Text),
355 # other whitespaces are a part of the value
356 (r'[ ]+', Name.Variable),
357 ],
359 # single-quoted scalars
360 'single-quoted-scalar': [
361 # include whitespace and line break rules
362 include('quoted-scalar-whitespaces'),
363 # escaping of the quote character
364 (r'\'\'', String.Escape),
365 # regular non-whitespace characters
366 (r'[^\s\']+', String),
367 # the closing quote
368 (r'\'', String, '#pop'),
369 ],
371 # double-quoted scalars
372 'double-quoted-scalar': [
373 # include whitespace and line break rules
374 include('quoted-scalar-whitespaces'),
375 # escaping of special characters
376 (r'\\[0abt\tn\nvfre "\\N_LP]', String),
377 # escape codes
378 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
379 String.Escape),
380 # regular non-whitespace characters
381 (r'[^\s"\\]+', String),
382 # the closing quote
383 (r'"', String, '#pop'),
384 ],
386 # the beginning of a new line while scanning a plain scalar
387 'plain-scalar-in-block-context-new-line': [
388 # empty lines
389 (r'^[ ]+$', Text),
390 # line breaks
391 (r'\n+', Text),
392 # document start and document end indicators
393 (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
394 # indentation spaces (we may leave the block line state here)
395 (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
396 ],
398 # a plain scalar in the block context
399 'plain-scalar-in-block-context': [
400 # the scalar ends with the ':' indicator
401 (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
402 # the scalar ends with whitespaces followed by a comment
403 (r'[ ]+(?=#)', Text, '#pop'),
404 # trailing whitespaces are ignored
405 (r'[ ]+$', Text),
406 # line breaks are ignored
407 (r'\n+', Text, 'plain-scalar-in-block-context-new-line'),
408 # other whitespaces are a part of the value
409 (r'[ ]+', Literal.Scalar.Plain),
410 # regular non-whitespace characters
411 (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain),
412 ],
414 # a plain scalar is the flow context
415 'plain-scalar-in-flow-context': [
416 # the scalar ends with an indicator character
417 (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'),
418 # the scalar ends with a comment
419 (r'[ ]+(?=#)', Text, '#pop'),
420 # leading and trailing whitespaces are ignored
421 (r'^[ ]+', Text),
422 (r'[ ]+$', Text),
423 # line breaks are ignored
424 (r'\n+', Text),
425 # other whitespaces are a part of the value
426 (r'[ ]+', Name.Variable),
427 # regular non-whitespace characters
428 (r'[^\s,:?\[\]{}]+', Name.Variable),
429 ],
431 }
433 def get_tokens_unprocessed(self, text=None, context=None):
434 if context is None:
435 context = YamlLexerContext(text, 0)
436 return super().get_tokens_unprocessed(text, context)
439class JsonLexer(Lexer):
440 """
441 For JSON data structures.
443 .. versionadded:: 1.5
444 """
446 name = 'JSON'
447 aliases = ['json', 'json-object']
448 filenames = ['*.json', 'Pipfile.lock']
449 mimetypes = ['application/json', 'application/json-object']
451 # No validation of integers, floats, or constants is done.
452 # As long as the characters are members of the following
453 # sets, the token will be considered valid. For example,
454 #
455 # "--1--" is parsed as an integer
456 # "1...eee" is parsed as a float
457 # "trustful" is parsed as a constant
458 #
459 integers = set('-0123456789')
460 floats = set('.eE+')
461 constants = set('truefalsenull') # true|false|null
462 hexadecimals = set('0123456789abcdefABCDEF')
463 punctuations = set('{}[],')
464 whitespaces = {'\u0020', '\u000a', '\u000d', '\u0009'}
466 def get_tokens_unprocessed(self, text):
467 """Parse JSON data."""
469 in_string = False
470 in_escape = False
471 in_unicode_escape = 0
472 in_whitespace = False
473 in_constant = False
474 in_number = False
475 in_float = False
476 in_punctuation = False
478 start = 0
480 # The queue is used to store data that may need to be tokenized
481 # differently based on what follows. In particular, JSON object
482 # keys are tokenized differently than string values, but cannot
483 # be distinguished until punctuation is encountered outside the
484 # string.
485 #
486 # A ":" character after the string indicates that the string is
487 # an object key; any other character indicates the string is a
488 # regular string value.
489 #
490 # The queue holds tuples that contain the following data:
491 #
492 # (start_index, token_type, text)
493 #
494 # By default the token type of text in double quotes is
495 # String.Double. The token type will be replaced if a colon
496 # is encountered after the string closes.
497 #
498 queue = []
500 for stop, character in enumerate(text):
501 if in_string:
502 if in_unicode_escape:
503 if character in self.hexadecimals:
504 in_unicode_escape -= 1
505 if not in_unicode_escape:
506 in_escape = False
507 else:
508 in_unicode_escape = 0
509 in_escape = False
511 elif in_escape:
512 if character == 'u':
513 in_unicode_escape = 4
514 else:
515 in_escape = False
517 elif character == '\\':
518 in_escape = True
520 elif character == '"':
521 queue.append((start, String.Double, text[start:stop + 1]))
522 in_string = False
523 in_escape = False
524 in_unicode_escape = 0
526 continue
528 elif in_whitespace:
529 if character in self.whitespaces:
530 continue
532 if queue:
533 queue.append((start, Text, text[start:stop]))
534 else:
535 yield start, Text, text[start:stop]
536 in_whitespace = False
537 # Fall through so the new character can be evaluated.
539 elif in_constant:
540 if character in self.constants:
541 continue
543 yield start, Keyword.Constant, text[start:stop]
544 in_constant = False
545 # Fall through so the new character can be evaluated.
547 elif in_number:
548 if character in self.integers:
549 continue
550 elif character in self.floats:
551 in_float = True
552 continue
554 if in_float:
555 yield start, Number.Float, text[start:stop]
556 else:
557 yield start, Number.Integer, text[start:stop]
558 in_number = False
559 in_float = False
560 # Fall through so the new character can be evaluated.
562 elif in_punctuation:
563 if character in self.punctuations:
564 continue
566 yield start, Punctuation, text[start:stop]
567 in_punctuation = False
568 # Fall through so the new character can be evaluated.
570 start = stop
572 if character == '"':
573 in_string = True
575 elif character in self.whitespaces:
576 in_whitespace = True
578 elif character in {'f', 'n', 't'}: # The first letters of true|false|null
579 # Exhaust the queue. Accept the existing token types.
580 yield from queue
581 queue.clear()
583 in_constant = True
585 elif character in self.integers:
586 # Exhaust the queue. Accept the existing token types.
587 yield from queue
588 queue.clear()
590 in_number = True
592 elif character == ':':
593 # Yield from the queue. Replace string token types.
594 for _start, _token, _text in queue:
595 if _token is Text:
596 yield _start, _token, _text
597 elif _token is String.Double:
598 yield _start, Name.Tag, _text
599 else:
600 yield _start, Error, _text
601 queue.clear()
603 in_punctuation = True
605 elif character in self.punctuations:
606 # Exhaust the queue. Accept the existing token types.
607 yield from queue
608 queue.clear()
610 in_punctuation = True
612 else:
613 # Exhaust the queue. Accept the existing token types.
614 yield from queue
615 queue.clear()
617 yield start, Error, character
619 # Yield any remaining text.
620 yield from queue
621 if in_string:
622 yield start, Error, text[start:]
623 elif in_float:
624 yield start, Number.Float, text[start:]
625 elif in_number:
626 yield start, Number.Integer, text[start:]
627 elif in_constant:
628 yield start, Keyword.Constant, text[start:]
629 elif in_whitespace:
630 yield start, Text, text[start:]
631 elif in_punctuation:
632 yield start, Punctuation, text[start:]
635class JsonBareObjectLexer(JsonLexer):
636 """
637 For JSON data structures (with missing object curly braces).
639 .. versionadded:: 2.2
641 .. deprecated:: 2.8.0
643 Behaves the same as `JsonLexer` now.
644 """
646 name = 'JSONBareObject'
647 aliases = []
648 filenames = []
649 mimetypes = []
652class JsonLdLexer(JsonLexer):
653 """
654 For `JSON-LD <https://json-ld.org/>`_ linked data.
656 .. versionadded:: 2.0
657 """
659 name = 'JSON-LD'
660 aliases = ['jsonld', 'json-ld']
661 filenames = ['*.jsonld']
662 mimetypes = ['application/ld+json']
664 json_ld_keywords = {
665 '"@%s"' % keyword
666 for keyword in (
667 'base',
668 'container',
669 'context',
670 'direction',
671 'graph',
672 'id',
673 'import',
674 'included',
675 'index',
676 'json',
677 'language',
678 'list',
679 'nest',
680 'none',
681 'prefix',
682 'propagate',
683 'protected',
684 'reverse',
685 'set',
686 'type',
687 'value',
688 'version',
689 'vocab',
690 )
691 }
693 def get_tokens_unprocessed(self, text):
694 for start, token, value in super(JsonLdLexer, self).get_tokens_unprocessed(text):
695 if token is Name.Tag and value in self.json_ld_keywords:
696 yield start, Name.Decorator, value
697 else:
698 yield start, token, value