1 """
2 Generic I/O utility objects.
3
4 Here is a list of the most essential classes in this module:
5
6 1. temporary file system objects: L{TempFile}, L{TempFolder}
7 2. special/decorated streams: L{MemoryStream}, L{AutoFlushStream}
8 3. reusable stream readers and writers: L{EntryReader}, L{EntryWriter}
9 4. convenient communication with the shell: L{Shell}
10
11 In addition, csb.io is also part of the CSB compatibility layer. In order to
12 ensure cross-interpreter compatibility, always use the following csb.io objects:
13
14 - L{MemoryStream} instead of (c)StringIO
15 - csb.io.Pickle instead of pickle or cPickle
16 - csb.io.urllib instead of urllib or urllib.request
17
18 See also L{csb.core} for additional notes on compatibility.
19 """
20
21 import os
22 import time
23 import errno
24 import shlex
25 import shutil
26 import tempfile
27 import subprocess
28
29 import csb.core
30
31
32 try:
33 from StringIO import StringIO
34 except ImportError:
35 from io import StringIO
36
37 try:
38 import cPickle as Pickle
39 except ImportError:
40 import pickle as Pickle
41
42 try:
43 import urllib.request as urllib
44 except ImportError:
45 import urllib
46
47
48 NEWLINE = "\n"
49
50
51 -class Shell(object):
52
53 POLL = 1.0
54
55 @staticmethod
56 - def run(cmd, timeout=None):
57 """
58 Run a shell command and return the output.
59
60 @param cmd: shell command with its arguments
61 @param cmd: tuple or str
62 @param timeout: maximum duration in seconds
63 @type timeout: float or None
64
65 @rtype: L{ShellInfo}
66 @raise InvalidCommandError: on invalid executable
67 @raise TimeoutError: when the timeout is expired
68 """
69
70 if isinstance(cmd, csb.core.string):
71 cmd = shlex.split(cmd)
72
73 try:
74 cmd = tuple(cmd)
75 start = time.time()
76 process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
77
78 if timeout is not None:
79 while True:
80 if process.poll() == 0:
81 break
82 elif time.time() >= (start + timeout):
83 try:
84 process.kill()
85 except:
86 pass
87 raise TimeoutError(cmd, timeout)
88 else:
89 time.sleep(Shell.POLL)
90
91 stdout, stderr = process.communicate()
92 code = process.returncode
93
94 except OSError as oe:
95 if oe.errno == 2:
96 raise InvalidCommandError(oe.strerror, cmd)
97 else:
98 raise
99
100 return ShellInfo(code, stdout.decode() or '', stderr.decode() or '', cmd)
101
102 @staticmethod
104 """
105 Same as L{Shell.run()}, but raises L{ProcessError} on bad exit code.
106
107 @param cmd: shell command with its arguments
108 @param cmd: tuple or str
109 @param timeout: maximum duration in seconds
110 @type timeout: float or None
111
112 @rtype: L{ShellInfo}
113 @raise ProcessError: on bad exit code
114 @raise TimeoutError: when the timeout is expired
115 """
116 si = Shell.run(cmd, timeout=timeout)
117
118 if si.code == 0:
119 return si
120 else:
121 raise ProcessError(si)
122
124 """
125 Raised on L{Shell.run()} failures.
126 @type context: L{ShellInfo}
127 """
131
133 return 'Bad exit code: #{0.code}'.format(self.context)
134
136 """
137 Raised on L{Shell.run()} timeouts.
138 """
145
147 return 'The process "{0.context.cmd}" did not finish in {0.timeout}s'.format(self)
148
150 """
151 Raised when L{Shell.run()} encounters an OSError.
152 """
162
165
167 """
168 Shell command execution info
169 """
170
171 - def __init__(self, code, stdout, stderr, cmd):
172
173 self.code = code
174 self.stdout = stdout or ''
175 self.stderr = stderr or ''
176 self.cmd = ' '.join(cmd)
177
180 """
181 In-memory stream object. Can be used in a context manager.
182 """
185
187 try:
188 self.close()
189 except:
190 pass
191
193 """
194 Wrapper around a buffered stream which automatically calls flush()
195 after each write(). This is essentially a proxy/decorator.
196
197 @param stream: the stream object to wrap
198 @type stream: file
199 """
200
203
205 self._subject.write(data)
206 self._subject.flush()
207
209 """
210 Create a temporary file and take care of deleting it upon object
211 destruction. The file can be opened multiple times on any platform, unlike
212 the case with tempfile.NamedTemporaryFile (does not work on Windows).
213
214 >>> with TempFile() as tmp:
215 tmp.write(...)
216 open(tmp.name)...
217
218 @param dispose: automatically delete the file
219 @type dispose: bool
220 @param mode: file open mode (text, binary), default=t
221 @type text: str
222 """
223
224 - def __init__(self, dispose=True, mode='t'):
225
226 fd, file = tempfile.mkstemp()
227
228 self.__file = file
229 self.__fd = fd
230 self.__fh = open(self.__file, 'w' + mode)
231 self.__mode = mode
232 self.__dispose = bool(dispose)
233
234 super(TempFile, self).__init__(self.__fh)
235
237
238 if self.__dispose:
239 try:
240 self.close()
241 except:
242 pass
243
246
249
251 """
252 Flush, close and delete the file.
253 """
254
255 if not self.__fh.closed:
256 self.__fh.flush()
257 self.__fh.close()
258 os.close(self.__fd)
259
260 if os.path.exists(self.__file):
261 os.remove(self.__file)
262
264 """
265 @return: the current content of the file.
266 @rtype: str or bytes
267 """
268 self.flush()
269 with open(self.name, 'r' + self.__mode) as f:
270 return f.read()
271
272 @property
274 """
275 Full path and file name
276 @rtype: str
277 """
278 return self.__file
279
281 """
282 Create a temporary directory which is automatically wiped when the object
283 is closed.
284
285 >>> with TempFolder() as tmp:
286 # put some files in tmp.name...
287
288 @param dispose: automaticlaly delete the folder and its contents
289 @type dispose: bool
290 """
291
293
294 name = tempfile.mkdtemp()
295
296 self.__name = os.path.abspath(name)
297 self.__dispose = bool(dispose)
298
300
301 if self.__dispose:
302 try:
303 self.close()
304 except:
305 pass
306
309
312
314 """
315 Delete the entire directory and its contents.
316 """
317 if os.path.exists(self.name):
318 shutil.rmtree(self.name)
319
320 @property
322 """
323 Full directory name
324 @rtype: str
325 """
326 return self.__name
327
328 -class EntryReader(object):
329 """
330 Generic flatfile reader. Provides efficient iterable interface over the entries
331 in the specified stream.
332
333 @param stream: the source data stream to read
334 @type stream: file
335 @param start_marker: a string marker which marks the beginning of a new entry
336 @type start_marker: str
337 @param end_marker: a string marker which signals the end of the file
338 @type end_marker: str, None
339 """
340 - def __init__(self, stream, start_marker, end_marker=None):
341
342 if not (hasattr(stream, 'seek') and hasattr(stream, 'readline')):
343 raise TypeError('The entry reader requires an opened stream.')
344
345 stream.seek(0)
346 self._stream = stream
347 self._start_marker = None
348 self._end_marker = None
349
350 self.start_marker = start_marker
351 self.end_marker = end_marker
352
353 @property
354 - def start_marker(self):
355 return self._start_marker
356 @start_marker.setter
357 - def start_marker(self, value):
358 if value is not None:
359 value = str(value)
360 self._start_marker = value
361
362 @property
363 - def end_marker(self):
364 return self._end_marker
365 @end_marker.setter
366 - def end_marker(self, value):
367 if value is not None:
368 value = str(value)
369 self._end_marker = value
370
372
373 try:
374 self._stream.close()
375 except:
376 pass
377
379 """
380 Return an iterator over all entries from the stream/flat file.
381
382 @return: iterable over all entries read from the stream
383 @rtype: generator
384 """
385
386 self._stream.seek(0)
387
388 entry = ''
389 in_entry = False
390
391 while True:
392 line = self._stream.readline()
393
394 if not in_entry:
395 if line.startswith(self.start_marker):
396 in_entry = True
397 entry = line
398 else:
399 if line.startswith(self.start_marker):
400 yield entry
401 entry = line
402 elif not line or line.strip() == self.end_marker:
403 yield entry
404 break
405 else:
406 entry += line
407
408 if not line:
409 break
410
412 """
413 Return a list of all entries in the stream.
414
415 @rtype: list
416 """
417 return list(self.entries())
418
419 -class EntryWriter(object):
420 """
421 A simple stream writer. The best way to use it is::
422
423 with EntryWriter(output_file, close=True) as out:
424 out.write(object)
425
426 In this way the stream is automatically closed at the end of the block.
427
428 @param destination: output file name or opened stream
429 @type destination: str or stream
430 @param newline: new line string (the default is L{csb.io.NEWLINE})
431 @type newline: str
432 @param close: if True (the default), the stream is automatically
433 closed when the object is destroyed
434 @type close: bool
435 """
436
437 - def __init__(self, destination, close=True, newline=NEWLINE):
438
439 self._stream = None
440 self._newline = NEWLINE
441 self._autoclose = True
442
443 self.newline = newline
444 self.autoclose = close
445
446 if isinstance(destination, csb.core.string):
447 self._stream = open(destination, 'w')
448 self.autoclose = True
449
450 elif hasattr(destination, 'write'):
451 self._stream = destination
452
453 else:
454 raise TypeError(destination)
455
456 - def __enter__(self):
458
459 - def __exit__(self, exc_type, exc_value, traceback):
460 if self.autoclose:
461 self.close()
462
464 if self.autoclose:
465 self.close()
466
467 @property
469 """
470 Destination stream
471 @rtype: stream
472 """
473 return self._stream
474
475 @property
478 @newline.setter
479 - def newline(self, value):
480 self._newline = str(value)
481
482 @property
483 - def autoclose(self):
484 return self._autoclose
485 @autoclose.setter
486 - def autoclose(self, value):
487 self._autoclose = bool(value)
488
490 """
491 Close the destination stream.
492 """
493 try:
494 self._stream.close()
495 except:
496 pass
497
498 - def write(self, data):
499 """
500 Write a chunk of sting data to the destination stream.
501 """
502 self._stream.write(data)
503
504 - def writeline(self, data):
505 """
506 Same as C{write}, but appends a newline character at the end.
507 """
508 self._stream.write(data)
509 self._stream.write(self.newline)
510
511 - def writeall(self, entries, delimiter=NEWLINE):
512 """
513 Write all C{entries} to the destination stream, separating them with
514 C{delimiter}
515
516 @param entries: a collection of objects
517 @type entries: iterable
518 @param delimiter: append this string after each entry (the default is a
519 C{self.newline} character)
520 @type delimiter: str
521 """
522 if delimiter == NEWLINE:
523 delimiter = self.newline
524 for entry in entries:
525 self.write(entry)
526 self.write(delimiter)
527
528 -def dump(this, filename, gzip=False, lock=None, timeout=None):
529 """
530 Writes a python object to a file, using python cPickle
531 Supports also '~' or '~user'.
532
533 @param this: The object, which will be written to disk
534 @type this: Any python object
535 @param filename: Filename of the new file
536 @type filename: String
537 @param gzip: Use gzip to compress the file
538 @type gzip: Boolean
539 @param lock: Use a lockfile to restrict access
540 """
541
542
543
544 filename = os.path.expanduser(filename)
545
546 if lock is not None:
547 lockdir = filename + '.lock'
548
549 if timeout is not None and timeout > 0:
550 end_time = timeout + time.time()
551
552 while True:
553 try:
554 os.mkdir(lockdir)
555 except OSError as e:
556
557 if e.errno == errno.EEXIST:
558 if timeout is not None and time.time() > end_time:
559 raise IOError("Failed to acquire Lock")
560 else:
561 raise IOError("Failed to acquire Lock")
562 else:
563 break
564
565 if gzip:
566 import gzip
567 stream = gzip.GzipFile(filename, 'wb')
568 else:
569 stream = open(filename, 'wb')
570
571 try:
572 if type(this).__name__ == 'array':
573 import Numeric
574 p = Numeric.Pickler(stream)
575 p.dump(this)
576 else:
577 Pickle.dump(this, stream, 1)
578 finally:
579 stream.close()
580
581 if lock is not None:
582
583 try:
584 os.rmdir(lockdir)
585 except:
586 raise IOError('missing lockfile {0}'.format(lockdir))
587
588 -def load(filename, gzip=False, lock=None, timeout=None):
589 """
590 Unpickle an object from filename
591
592 @param filename: Filename pickled object
593 @param gzip: Use gzip to uncompress the file
594 @type gzip: Boolean
595 @param lock: Use a lockfile to restrict access
596
597 @return: Python object unpickled from file
598 """
599
600 filename = os.path.expanduser(filename)
601
602 if lock is not None:
603 lockdir = filename + '.lock'
604
605 if timeout is not None and timeout > 0:
606 end_time = timeout + time.time()
607
608 while True:
609 try:
610 os.mkdir(lockdir)
611 except OSError as e:
612
613 if e.errno == errno.EEXIST:
614 if timeout is not None and time.time() > end_time:
615 raise IOError("Failed to acquire Lock")
616 else:
617 raise IOError("Failed to acquire Lock")
618 else:
619 break
620
621 if gzip:
622 import gzip
623 stream = gzip.GzipFile(filename)
624 try:
625 stream.readline()
626 stream.seek(0)
627 except:
628 stream.close()
629 raise
630
631 else:
632 stream = open(filename, 'rb')
633
634 try:
635 this = Pickle.load(stream)
636 except:
637 stream.close()
638 import Numeric
639 try:
640 stream = gzip.GzipFile(filename)
641 except:
642 stream = open(filename, 'rb')
643
644 try:
645 unpickler = Numeric.Unpickler(stream)
646 this = unpickler.load()
647 except:
648 stream.close()
649 raise
650
651 stream.close()
652
653 if lock is not None:
654
655 try:
656 os.rmdir(lockdir)
657 except:
658 raise IOError('missing lockfile {0}'.format(lockdir))
659
660 return this
661