Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/computation/parsing.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1""":func:`~pandas.eval` source string parsing functions
2"""
4from io import StringIO
5from keyword import iskeyword
6import token
7import tokenize
8from typing import Iterator, Tuple
10# A token value Python's tokenizer probably will never use.
11BACKTICK_QUOTED_STRING = 100
14def create_valid_python_identifier(name: str) -> str:
15 """
16 Create valid Python identifiers from any string.
18 Check if name contains any special characters. If it contains any
19 special characters, the special characters will be replaced by
20 a special string and a prefix is added.
22 Raises
23 ------
24 SyntaxError
25 If the returned name is not a Python valid identifier, raise an exception.
26 This can happen if there is a hashtag in the name, as the tokenizer will
27 than terminate and not find the backtick.
28 But also for characters that fall out of the range of (U+0001..U+007F).
29 """
30 if name.isidentifier() and not iskeyword(name):
31 return name
33 # Create a dict with the special characters and their replacement string.
34 # EXACT_TOKEN_TYPES contains these special characters
35 # toke.tok_name contains a readable description of the replacement string.
36 special_characters_replacements = {
37 char: f"_{token.tok_name[tokval]}_"
38 # The ignore here is because of a bug in mypy that is resolved in 0.740
39 for char, tokval in tokenize.EXACT_TOKEN_TYPES.items() # type: ignore
40 }
41 special_characters_replacements.update(
42 {
43 " ": "_",
44 "?": "_QUESTIONMARK_",
45 "!": "_EXCLAMATIONMARK_",
46 "$": "_DOLLARSIGN_",
47 "€": "_EUROSIGN_",
48 # Including quotes works, but there are exceptions.
49 "'": "_SINGLEQUOTE_",
50 '"': "_DOUBLEQUOTE_",
51 # Currently not possible. Terminates parser and won't find backtick.
52 # "#": "_HASH_",
53 }
54 )
56 name = "".join(special_characters_replacements.get(char, char) for char in name)
57 name = "BACKTICK_QUOTED_STRING_" + name
59 if not name.isidentifier():
60 raise SyntaxError(f"Could not convert '{name}' to a valid Python identifier.")
62 return name
65def clean_backtick_quoted_toks(tok: Tuple[int, str]) -> Tuple[int, str]:
66 """
67 Clean up a column name if surrounded by backticks.
69 Backtick quoted string are indicated by a certain tokval value. If a string
70 is a backtick quoted token it will processed by
71 :func:`_create_valid_python_identifier` so that the parser can find this
72 string when the query is executed.
73 In this case the tok will get the NAME tokval.
75 Parameters
76 ----------
77 tok : tuple of int, str
78 ints correspond to the all caps constants in the tokenize module
80 Returns
81 -------
82 tok : Tuple[int, str]
83 Either the input or token or the replacement values
84 """
85 toknum, tokval = tok
86 if toknum == BACKTICK_QUOTED_STRING:
87 return tokenize.NAME, create_valid_python_identifier(tokval)
88 return toknum, tokval
91def clean_column_name(name: str) -> str:
92 """
93 Function to emulate the cleaning of a backtick quoted name.
95 The purpose for this function is to see what happens to the name of
96 identifier if it goes to the process of being parsed a Python code
97 inside a backtick quoted string and than being cleaned
98 (removed of any special characters).
100 Parameters
101 ----------
102 name : str
103 Name to be cleaned.
105 Returns
106 -------
107 name : str
108 Returns the name after tokenizing and cleaning.
110 Notes
111 -----
112 For some cases, a name cannot be converted to a valid Python identifier.
113 In that case :func:`tokenize_string` raises a SyntaxError.
114 In that case, we just return the name unmodified.
116 If this name was used in the query string (this makes the query call impossible)
117 an error will be raised by :func:`tokenize_backtick_quoted_string` instead,
118 which is not catched and propogates to the user level.
119 """
120 try:
121 tokenized = tokenize_string(f"`{name}`")
122 tokval = next(tokenized)[1]
123 return create_valid_python_identifier(tokval)
124 except SyntaxError:
125 return name
128def tokenize_backtick_quoted_string(
129 token_generator: Iterator[tokenize.TokenInfo], source: str, string_start: int
130) -> Tuple[int, str]:
131 """
132 Creates a token from a backtick quoted string.
134 Moves the token_generator forwards till right after the next backtick.
136 Parameters
137 ----------
138 token_generator : Iterator[tokenize.TokenInfo]
139 The generator that yields the tokens of the source string (Tuple[int, str]).
140 The generator is at the first token after the backtick (`)
142 source : str
143 The Python source code string.
145 string_start : int
146 This is the start of backtick quoted string inside the source string.
148 Returns
149 -------
150 tok: Tuple[int, str]
151 The token that represents the backtick quoted string.
152 The integer is equal to BACKTICK_QUOTED_STRING (100).
153 """
154 for _, tokval, start, _, _ in token_generator:
155 if tokval == "`":
156 string_end = start[1]
157 break
159 return BACKTICK_QUOTED_STRING, source[string_start:string_end]
162def tokenize_string(source: str) -> Iterator[Tuple[int, str]]:
163 """
164 Tokenize a Python source code string.
166 Parameters
167 ----------
168 source : str
169 The Python source code string.
171 Returns
172 -------
173 tok_generator : Iterator[Tuple[int, str]]
174 An iterator yielding all tokens with only toknum and tokval (Tuple[ing, str]).
175 """
176 line_reader = StringIO(source).readline
177 token_generator = tokenize.generate_tokens(line_reader)
179 # Loop over all tokens till a backtick (`) is found.
180 # Then, take all tokens till the next backtick to form a backtick quoted string
181 for toknum, tokval, start, _, _ in token_generator:
182 if tokval == "`":
183 try:
184 yield tokenize_backtick_quoted_string(
185 token_generator, source, string_start=start[1] + 1
186 )
187 except Exception:
188 raise SyntaxError(f"Failed to parse backticks in '{source}'.")
189 else:
190 yield toknum, tokval