Coverage for C:\leo.repo\leo-editor\leo\plugins\importers\python.py: 98%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#@+leo-ver=5-thin
2#@+node:ekr.20211209153303.1: * @file ../plugins/importers/python.py
3"""The new, tokenize based, @auto importer for Python."""
4import sys
5import tokenize
6import token
7from collections import defaultdict
8import leo.core.leoGlobals as g
9#@+others
10#@+node:ekr.20211209052710.1: ** do_import
11def do_import(c, s, parent):
13 if sys.version_info < (3, 7, 0):
14 g.es_print('The python importer requires python 3.7 or above')
15 return False
16 split_root(parent, s.splitlines(True))
17 parent.b = f'@language python\n@tabwidth -4\n{parent.b}'
18 if c.config.getBool('put-class-in-imported-headlines'):
19 for p in parent.subtree(): # Don't change parent.h.
20 if p.b.startswith('class ') or p.b.partition('\nclass ')[1]:
21 p.h = f'class {p.h}'
22 return True
23#@+node:vitalije.20211201230203.1: ** split_root
24SPLIT_THRESHOLD = 10
25def split_root(root, lines):
26 '''
27 Parses given lines and separates all top level function
28 definitions and class definitions in separate nodes which
29 are all direct children of the root. All longer class
30 nodes are further divided, each method in a separate node.
32 This function puts comments and decorators in the same node
33 above the definition.
34 '''
35 #@+others
36 #@+node:vitalije.20211208183603.1: *3* is_intro_line
37 def is_intro_line(n, col):
38 """
39 Intro line is either a comment line that starts at the same column as the
40 def/class line or a decorator line
41 """
42 # first we filter list of all tokens in the line n. We don't want white space tokens
43 # we are interested only in the tokens containing some text.
44 xs = [x for x in lntokens[n] if x[0] not in (token.DEDENT, token.INDENT, token.NL)]
46 if not xs:
47 # all tokens in this line are white space, therefore we
48 # have a blank line. We want to allow a blank line in the
49 # block of comments, so we return True
50 return True
52 t = xs[0] # this is the first non blank token in the line n
53 if t[2][1] != col:
54 # if it isn't at the same column as the definition, it can't be
55 # considered as a `intro` line
56 return False
57 if t[0] == token.OP and t[1] == '@':
58 # this lines starts with `@`, which means it is the decorator
59 return True
60 if t[0] == token.COMMENT:
61 # this line starts with the comment at the same column as the definition
62 return True
64 # in all other cases this isn't an `intro` line
65 return False
66 #@+node:vitalije.20211208084231.1: *3* get_intro
67 def get_intro(row, col):
68 """
69 Returns the number of preceeding lines that can be considered as an `intro`
70 to this funciton/class/method definition.
71 """
72 last = row
73 for i in range(row - 1, 0, -1):
74 if is_intro_line(i, col):
75 last = i
76 else:
77 break
78 # we don't want `intro` to start with the bunch of blank lines
79 # they better be added to the end of the preceeding node.
80 for i in range(last, row):
81 if lines[i - 1].isspace():
82 last = i + 1
83 return row - last
84 #@+node:vitalije.20211206182505.1: *3* mkreadline
85 def mkreadline(lines):
86 # tokenize uses readline for its input
87 itlines = iter(lines)
88 def nextline():
89 try:
90 return next(itlines)
91 except StopIteration:
92 return ''
93 return nextline
94 #@+node:vitalije.20211208092828.1: *3* itoks
95 def itoks(i):
96 yield from enumerate(rawtokens[i:], start=i)
97 #@+node:vitalije.20211208092833.1: *3* search
98 def search(i, k):
99 for j, t in itoks(i):
100 if t[0] == k:
101 yield j, t
102 #@+node:vitalije.20211208092910.1: *3* getdefn
103 def getdefn(start):
105 # pylint: disable=undefined-loop-variable
106 tok = rawtokens[start]
107 if tok[0] != token.NAME or tok[1] not in ('async', 'def', 'class'):
108 return None
110 # The following few values are easy to get
111 if tok[1] == 'async':
112 kind = rawtokens[start + 1][1]
113 name = rawtokens[start + 2][1]
114 else:
115 kind = tok[1]
116 name = rawtokens[start + 1][1]
117 if kind == 'def' and rawtokens[start - 1][1] == 'async':
118 return None
119 a, col = tok[2]
121 # now we are searching for the end of the definition line
122 # this one logical line may be divided in several physical
123 # lines. At the end of this logical line, there will be a
124 # NEWLINE token
125 for i, t in search(start + 1, token.NEWLINE):
126 # The last of the `header lines`.
127 # These lines should not be indented in the node body.
128 # The body lines *will* be indented.
129 end_h = t[2][0]
130 # In case we have a oneliner, let's define end_b here
131 end_b = end_h
132 # indented body starts on the next line
133 start_b = end_h + 1
134 break
136 # Look ahead to check if we have a oneline definition or not.
137 # That is, see which whether INDENT or NEWLINE will come first.
138 oneliner = True
139 for (i1, t), (i2, t1) in zip(search(i + 1, token.INDENT), search(i + 1, token.NEWLINE)):
140 # INDENT comes after the NEWLINE, means the definition is in a single line
141 oneliner = i1 > i2
142 break
144 # Find the end of this definition
145 if oneliner:
146 # The following lines will not be indented
147 # because the definition was in the same line.
148 c_ind = col
149 # The end of the body is the same as the start of the body
150 end_b = start_b
151 else:
152 # We have some body lines. Presumably the next token is INDENT.
153 i += 1
154 # This is the indentation of the first function/method/class body line
155 c_ind = len(t[1]) + col
156 # Now search to find the end of this function/method/body
157 for i, t in itoks(i + 1):
158 col2 = t[2][1]
159 if col2 > col:
160 continue
161 if t[0] in (token.DEDENT, token.COMMENT):
162 end_b = t[2][0]
163 break
165 # Increase end_b to include all following blank lines
166 for j in range(end_b, len(lines) + 1):
167 if lines[j - 1].isspace():
168 end_b = j + 1
169 else:
170 break
172 # Compute the number of `intro` lines
173 intro = get_intro(a, col)
174 return col, a - intro, end_h, start_b, kind, name, c_ind, end_b
175 #@+node:vitalije.20211208101750.1: *3* body
176 def bodyLine(x, ind):
177 if ind == 0 or x[:ind].isspace():
178 return x[ind:] or '\n'
179 n = len(x) - len(x.lstrip())
180 return f'\\\\-{ind-n}.{x[n:]}'
182 def body(a, b, ind):
183 xlines = (bodyLine(x, ind) for x in lines[a - 1 : b and (b - 1)])
184 return ''.join(xlines)
185 #@+node:vitalije.20211208110301.1: *3* indent
186 def indent(x, n):
187 return x.rjust(len(x) + n)
188 #@+node:vitalije.20211208104408.1: *3* mknode
189 def mknode(p, start, start_b, end, l_ind, col, xdefs):
190 # start - first line of this node
191 # start_b - first line of this node's function/class body
192 # end - first line after this node
193 # l_ind - amount of white space to strip from left
194 # col - column start of child nodes
195 # xdefs - all definitions inside this node
197 # first let's find all defs that start at the same column
198 # as our indented function/method/class body
199 tdefs = [x for x in xdefs if x[0] == col]
201 if not tdefs or end - start < SPLIT_THRESHOLD:
202 # if there are no inner definitions or the total number of
203 # lines is less than threshold, all lines should be added
204 # to this node and no further splitting is necessary
205 p.b = body(start, end, l_ind)
206 return
208 # last keeps track of the last used line
209 last = start
211 # lets check the first inner definition
212 col, h1, h2, start_b, kind, name, c_ind, end_b = tdefs[0]
213 if h1 > start:
214 # first inner definition starts later
215 # so we have some content before at-others
216 b1 = body(start, h1, l_ind)
217 else:
218 # inner definitions start at the beginning of our body
219 # so at-others will be the first line in our body
220 b1 = ''
221 o = indent('@others\n', col - l_ind)
223 # now for the part after at-others we need to check the
224 # last of inner definitions
225 if tdefs[-1][-1] < end:
226 # there are some lines after at-others
227 b2 = body(tdefs[-1][-1], end, l_ind)
228 else:
229 # there are no lines after at-others
230 b2 = ''
231 # finally we can set our body
232 p.b = f'{b1}{o}{b2}'
234 # now we can continue to add children for each of the inner definitions
235 last = h1
236 for col, h1, h2, start_b, kind, name, c_ind, end_b in tdefs:
237 if h1 > last:
238 new_body = body(last, h1, col) # #2500.
239 # there are some declaration lines in between two inner definitions
240 p1 = p.insertAsLastChild()
241 p1.h = declaration_headline(new_body) # #2500
242 p1.b = new_body
243 last = h1
244 p1 = p.insertAsLastChild()
245 p1.h = name
247 # let's find all next level inner definitions
248 # those are the definitions whose starting and end line are
249 # between the start and the end of this node
250 subdefs = [x for x in xdefs if x[1] > h1 and x[-1] <= end_b]
251 if subdefs:
252 # there are some next level inner definitions
253 # so let's split this node
254 mknode(p=p1
255 , start=h1
256 , start_b=start_b
257 , end=end_b
258 , l_ind=l_ind + col # increase indentation for at-others
259 , col=c_ind
260 , xdefs=subdefs
261 )
262 else:
263 # there are no next level inner definitions
264 # so we can just set the body and continue
265 # to the next definition
266 p1.b = body(h1, end_b, col)
268 last = end_b
269 #@+node:ekr.20220320055103.1: *3* declaration_headline
270 def declaration_headline(body_string): # #2500
271 """
272 Return an informative headline for s, a group of declarations.
273 """
274 for s1 in g.splitLines(body_string):
275 s = s1.strip()
276 if s.startswith('#') and len(s.replace('#', '').strip()) > 1:
277 # A non-trivial comment: Return the comment w/o the leading '#'.
278 return s[1:].strip()
279 if s and not s.startswith('#'):
280 # A non-trivial non-comment.
281 return s
282 return "...some declarations" # Return legacy headline.
283 #@-others
284 # rawtokens is a list of all tokens found in input lines
285 rawtokens = list(tokenize.generate_tokens(mkreadline(lines)))
287 # lntokens - line tokens are tokens groupped by the line number
288 # from which they originate.
289 lntokens = defaultdict(list)
290 for t in rawtokens:
291 row = t[2][0]
292 lntokens[row].append(t)
294 # we create list of all definitions in the token list
295 # both `def` and `class` definitions
296 # each definition is a tuple with the following values
297 #
298 # 0: col - column where the definition starts
299 # 1: h1 - line number of the first line of this node
300 # this line may be above the starting line
301 # (comment lines and decorators are in these lines)
302 # 2: h2 - line number of the last line of the declaration
303 # it is the line number where the `:` (colon) is.
304 # 3: start_b - line number of the first indented line of the
305 # function/class body.
306 # 4: kind - can be 'def' or 'class'
307 # 5: name - name of the function, class or method
308 # 6: c_ind - column of the indented body
309 # 7: b_ind - minimal number of leading spaces in each line of the
310 # function, method or class body
311 # 8: end_b - line number of the first line after the definition
312 #
313 # function getdefn returns None if the token at this index isn't start
314 # of a definition, or if it isn't possible to calculate all the values
315 # mentioned earlier. Therefore, we filter the list.
316 definitions = list(filter(None, map(getdefn, range(len(rawtokens) - 1))))
318 # a preparation step
319 root.deleteAllChildren()
321 # function mknode, sets the body and adds children recursively using
322 # precalculated definitions list.
323 # parameters are:
324 # p - current node
325 # start - line number of the first line of this node
326 # end - line number of the first line after this node
327 # l_ind - this is the accumulated indentation through at-others
328 # it is the number of spaces that should be stripped from
329 # the beginning of each line in this node
330 # ind - number of leading white spaces common to all indented
331 # body lines of this node. It is the indentation at which
332 # we should put the at-others directive in this body
333 # col - the column at which start all of the inner definitions
334 # like methods or inner functions and classes
335 # xdefs - list of the definitions covering this node
336 mknode(p=root
337 , start=1
338 , start_b=1
339 , end=len(lines) + 1
340 , l_ind=0
341 , col=0
342 , xdefs=definitions
343 )
344 return definitions
345#@-others
346importer_dict = {
347 'func': do_import,
348 'extensions': ['.py', '.pyw', '.pyi'], # mypy uses .pyi extension.
349}
350#@@language python
351#@@tabwidth -4
352#@-leo