Package csb :: Package io
[frames] | no frames]

Source Code for Package csb.io

  1  """ 
  2  Generic I/O utility objects. 
  3   
  4  Here is a list of the most essential classes in this module: 
  5   
  6      1. temporary file system objects: L{TempFile}, L{TempFolder}  
  7      2. special/decorated streams: L{MemoryStream}, L{AutoFlushStream} 
  8      3. reusable stream readers and writers: L{EntryReader}, L{EntryWriter} 
  9      4. convenient communication with the shell: L{Shell} 
 10   
 11  In addition, csb.io is also part of the CSB compatibility layer. In order to 
 12  ensure cross-interpreter compatibility, always use the following csb.io objects: 
 13   
 14      - L{MemoryStream} instead of (c)StringIO 
 15      - csb.io.Pickle instead of pickle or cPickle 
 16      - csb.io.urllib instead of urllib or urllib.request 
 17       
 18  See also L{csb.core} for additional notes on compatibility.     
 19  """ 
 20   
 21  import os 
 22  import time 
 23  import errno 
 24  import shlex 
 25  import shutil 
 26  import tempfile 
 27  import subprocess 
 28   
 29  import csb.core 
 30   
 31   
 32  try: 
 33      from StringIO import StringIO 
 34  except ImportError: 
 35      from io import StringIO 
 36       
 37  try: 
 38      import cPickle as Pickle 
 39  except ImportError: 
 40      import pickle as Pickle 
 41   
 42  try: 
 43      import urllib.request as urllib 
 44  except ImportError: 
 45      import urllib2 as urllib 
 46           
 47   
 48  NEWLINE = "\n" 
49 50 51 -class Shell(object):
52 53 POLL = 1.0 54 55 @staticmethod
56 - def run(cmd, timeout=None):
57 """ 58 Run a shell command and return the output. 59 60 @param cmd: shell command with its arguments 61 @param cmd: tuple or str 62 @param timeout: maximum duration in seconds 63 @type timeout: float or None 64 65 @rtype: L{ShellInfo} 66 @raise InvalidCommandError: on invalid executable 67 @raise TimeoutError: when the timeout is expired 68 """ 69 70 if isinstance(cmd, csb.core.string): 71 cmd = shlex.split(cmd) 72 73 try: 74 cmd = tuple(cmd) 75 start = time.time() 76 process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 77 78 if timeout is not None: 79 while True: 80 if process.poll() == 0: 81 break 82 elif time.time() >= (start + timeout): 83 try: 84 process.kill() 85 except: 86 pass 87 raise TimeoutError(cmd, timeout) 88 else: 89 time.sleep(Shell.POLL) 90 91 stdout, stderr = process.communicate() 92 code = process.returncode 93 94 except OSError as oe: 95 if oe.errno == 2: 96 raise InvalidCommandError(oe.strerror, cmd) 97 else: 98 raise 99 100 return ShellInfo(code, stdout.decode() or '', stderr.decode() or '', cmd)
101 102 @staticmethod
103 - def runstrict(cmd, timeout=None):
104 """ 105 Same as L{Shell.run()}, but raises L{ProcessError} on bad exit code. 106 107 @param cmd: shell command with its arguments 108 @param cmd: tuple or str 109 @param timeout: maximum duration in seconds 110 @type timeout: float or None 111 112 @rtype: L{ShellInfo} 113 @raise ProcessError: on bad exit code 114 @raise TimeoutError: when the timeout is expired 115 """ 116 si = Shell.run(cmd, timeout=timeout) 117 118 if si.code == 0: 119 return si 120 else: 121 raise ProcessError(si)
122
123 -class ProcessError(Exception):
124 """ 125 Raised on L{Shell.run()} failures. 126 @type context: L{ShellInfo} 127 """
128 - def __init__(self, context, *args):
129 self.context = context 130 super(ProcessError, self).__init__(context, [])
131
132 - def __str__(self):
133 return 'Bad exit code: #{0.code}'.format(self.context)
134
135 -class TimeoutError(ProcessError):
136 """ 137 Raised on L{Shell.run()} timeouts. 138 """
139 - def __init__(self, cmd, timeout):
140 141 self.timeout = timeout 142 context = ShellInfo(None, '', '', cmd) 143 144 super(TimeoutError, self).__init__(context)
145
146 - def __str__(self):
147 return 'The process "{0.context.cmd}" did not finish in {0.timeout}s'.format(self)
148
149 -class InvalidCommandError(ValueError):
150 """ 151 Raised when L{Shell.run()} encounters an OSError. 152 """
153 - def __init__(self, message, cmd):
154 155 self.program = cmd[0] 156 if csb.core.iterable(cmd): 157 cmd = ' '.join(cmd) 158 self.cmd = cmd 159 self.msg = message 160 161 super(InvalidCommandError, self).__init__(message, cmd)
162
163 - def __str__(self):
164 return self.msg
165
166 -class ShellInfo(object):
167 """ 168 Shell command execution info 169 """ 170
171 - def __init__(self, code, stdout, stderr, cmd):
172 173 self.code = code 174 self.stdout = stdout or '' 175 self.stderr = stderr or '' 176 self.cmd = ' '.join(cmd)
177
178 179 -class MemoryStream(StringIO):
180 """ 181 In-memory stream object. Can be used in a context manager. 182 """
183 - def __enter__(self):
184 return self
185
186 - def __exit__(self, *a, **k):
187 try: 188 self.close() 189 except: 190 pass
191
192 -class AutoFlushStream(csb.core.Proxy):
193 """ 194 Wrapper around a buffered stream which automatically calls flush() 195 after each write(). This is essentially a proxy/decorator. 196 197 @param stream: the stream object to wrap 198 @type stream: file 199 """ 200
201 - def __init__(self, stream):
202 super(AutoFlushStream, self).__init__(stream)
203
204 - def write(self, data):
205 self._subject.write(data) 206 self._subject.flush()
207
208 -class TempFile(csb.core.Proxy):
209 """ 210 Create a temporary file and take care of deleting it upon object 211 destruction. The file can be opened multiple times on any platform, unlike 212 the case with tempfile.NamedTemporaryFile (does not work on Windows). 213 214 >>> with TempFile() as tmp: 215 tmp.write(...) 216 open(tmp.name)... 217 218 @param dispose: automatically delete the file 219 @type dispose: bool 220 @param mode: file open mode (text, binary), default=t 221 @type text: str 222 """ 223
224 - def __init__(self, dispose=True, mode='t'):
225 226 fd, file = tempfile.mkstemp() 227 228 self.__file = file 229 self.__fd = fd 230 self.__fh = open(self.__file, 'w' + mode) 231 self.__mode = mode 232 self.__dispose = bool(dispose) 233 234 super(TempFile, self).__init__(self.__fh)
235
236 - def __del__(self):
237 238 if self.__dispose: 239 try: 240 self.close() 241 except: 242 pass
243
244 - def __enter__(self):
245 return self
246
247 - def __exit__(self, *args):
248 self.close()
249
250 - def close(self):
251 """ 252 Flush, close and delete the file. 253 """ 254 255 if not self.__fh.closed: 256 self.__fh.flush() 257 self.__fh.close() 258 os.close(self.__fd) 259 260 if os.path.exists(self.__file): 261 os.remove(self.__file)
262
263 - def content(self):
264 """ 265 @return: the current content of the file. 266 @rtype: str or bytes 267 """ 268 self.flush() 269 with open(self.name, 'r' + self.__mode) as f: 270 return f.read()
271 272 @property
273 - def name(self):
274 """ 275 Full path and file name 276 @rtype: str 277 """ 278 return self.__file
279
280 -class TempFolder(object):
281 """ 282 Create a temporary directory which is automatically wiped when the object 283 is closed. 284 285 >>> with TempFolder() as tmp: 286 # put some files in tmp.name... 287 288 @param dispose: automaticlaly delete the folder and its contents 289 @type dispose: bool 290 """ 291
292 - def __init__(self, dispose=True):
293 294 name = tempfile.mkdtemp() 295 296 self.__name = os.path.abspath(name) 297 self.__dispose = bool(dispose)
298
299 - def __del__(self):
300 301 if self.__dispose: 302 try: 303 self.close() 304 except: 305 pass
306
307 - def __enter__(self):
308 return self
309
310 - def __exit__(self, *args):
311 self.close()
312
313 - def close(self):
314 """ 315 Delete the entire directory and its contents. 316 """ 317 if os.path.exists(self.name): 318 shutil.rmtree(self.name)
319 320 @property
321 - def name(self):
322 """ 323 Full directory name 324 @rtype: str 325 """ 326 return self.__name
327
328 -class EntryReader(object):
329 """ 330 Generic flatfile reader. Provides efficient iterable interface over the entries 331 in the specified stream. 332 333 @param stream: the source data stream to read 334 @type stream: file 335 @param start_marker: a string marker which marks the beginning of a new entry 336 @type start_marker: str 337 @param end_marker: a string marker which signals the end of the file 338 @type end_marker: str, None 339 """
340 - def __init__(self, stream, start_marker, end_marker=None):
341 342 if not (hasattr(stream, 'seek') and hasattr(stream, 'readline')): 343 raise TypeError('The entry reader requires an opened stream.') 344 345 stream.seek(0) 346 self._stream = stream 347 self._start_marker = None 348 self._end_marker = None 349 350 self.start_marker = start_marker 351 self.end_marker = end_marker
352 353 @property
354 - def start_marker(self):
355 return self._start_marker
356 @start_marker.setter
357 - def start_marker(self, value):
358 if value is not None: 359 value = str(value) 360 self._start_marker = value
361 362 @property
363 - def end_marker(self):
364 return self._end_marker
365 @end_marker.setter
366 - def end_marker(self, value):
367 if value is not None: 368 value = str(value) 369 self._end_marker = value
370
371 - def __del__(self):
372 373 try: 374 self._stream.close() 375 except: 376 pass
377
378 - def entries(self):
379 """ 380 Return an iterator over all entries from the stream/flat file. 381 382 @return: iterable over all entries read from the stream 383 @rtype: generator 384 """ 385 386 self._stream.seek(0) 387 388 entry = '' 389 in_entry = False 390 391 while True: 392 line = self._stream.readline() 393 394 if not in_entry: 395 if line.startswith(self.start_marker): 396 in_entry = True 397 entry = line 398 else: 399 if line.startswith(self.start_marker): 400 yield entry 401 entry = line 402 elif not line or line.strip() == self.end_marker: 403 yield entry 404 break 405 else: 406 entry += line 407 408 if not line: 409 break
410
411 - def readall(self):
412 """ 413 Return a list of all entries in the stream. 414 415 @rtype: list 416 """ 417 return list(self.entries())
418
419 -class EntryWriter(object):
420 """ 421 A simple stream writer. The best way to use it is:: 422 423 with EntryWriter(output_file, close=True) as out: 424 out.write(object) 425 426 In this way the stream is automatically closed at the end of the block. 427 428 @param destination: output file name or opened stream 429 @type destination: str or stream 430 @param newline: new line string (the default is L{csb.io.NEWLINE}) 431 @type newline: str 432 @param close: if True (the default), the stream is automatically 433 closed when the object is destroyed 434 @type close: bool 435 """ 436
437 - def __init__(self, destination, close=True, newline=NEWLINE):
438 439 self._stream = None 440 self._newline = NEWLINE 441 self._autoclose = True 442 443 self.newline = newline 444 self.autoclose = close 445 446 if isinstance(destination, csb.core.string): 447 self._stream = open(destination, 'w') 448 self.autoclose = True 449 450 elif hasattr(destination, 'write'): 451 self._stream = destination 452 453 else: 454 raise TypeError(destination)
455
456 - def __enter__(self):
457 return self
458
459 - def __exit__(self, exc_type, exc_value, traceback):
460 if self.autoclose: 461 self.close()
462
463 - def __del__(self):
464 if self.autoclose: 465 self.close()
466 467 @property
468 - def stream(self):
469 """ 470 Destination stream 471 @rtype: stream 472 """ 473 return self._stream
474 475 @property
476 - def newline(self):
477 return self._newline
478 @newline.setter
479 - def newline(self, value):
480 self._newline = str(value)
481 482 @property
483 - def autoclose(self):
484 return self._autoclose
485 @autoclose.setter
486 - def autoclose(self, value):
487 self._autoclose = bool(value)
488
489 - def close(self):
490 """ 491 Close the destination stream. 492 """ 493 try: 494 self._stream.close() 495 except: 496 pass
497
498 - def write(self, data):
499 """ 500 Write a chunk of sting data to the destination stream. 501 """ 502 self._stream.write(data)
503
504 - def writeline(self, data):
505 """ 506 Same as C{write}, but appends a newline character at the end. 507 """ 508 self._stream.write(data) 509 self._stream.write(self.newline)
510
511 - def writeall(self, entries, delimiter=NEWLINE):
512 """ 513 Write all C{entries} to the destination stream, separating them with 514 C{delimiter} 515 516 @param entries: a collection of objects 517 @type entries: iterable 518 @param delimiter: append this string after each entry (the default is a 519 C{self.newline} character) 520 @type delimiter: str 521 """ 522 if delimiter == NEWLINE: 523 delimiter = self.newline 524 for entry in entries: 525 self.write(entry) 526 self.write(delimiter)
527
528 -def dump(this, filename, gzip=False, lock=None, timeout=None):
529 """ 530 Writes a python object to a file, using python cPickle 531 Supports also '~' or '~user'. 532 533 @param this: The object, which will be written to disk 534 @type this: Any python object 535 @param filename: Filename of the new file 536 @type filename: String 537 @param gzip: Use gzip to compress the file 538 @type gzip: Boolean 539 @param lock: Use a lockfile to restrict access 540 """ 541 542 ## check whether file is locked 543 ## file locked? 544 filename = os.path.expanduser(filename) 545 546 if lock is not None: 547 lockdir = filename + '.lock' 548 549 if timeout is not None and timeout > 0: 550 end_time = timeout + time.time() 551 552 while True: 553 try: 554 os.mkdir(lockdir) 555 except OSError as e: 556 # File is already locked 557 if e.errno == errno.EEXIST: 558 if timeout is not None and time.time() > end_time: 559 raise IOError("Failed to acquire Lock") 560 else: 561 raise IOError("Failed to acquire Lock") 562 else: 563 break 564 565 if gzip: 566 import gzip 567 stream = gzip.GzipFile(filename, 'wb') 568 else: 569 stream = open(filename, 'wb') 570 571 try: 572 if type(this).__name__ == 'array': 573 import Numeric #@UnresolvedImport 574 p = Numeric.Pickler(stream) 575 p.dump(this) 576 else: 577 Pickle.dump(this, stream, 2) 578 finally: 579 stream.close() 580 581 if lock is not None: 582 ## release lock 583 try: 584 os.rmdir(lockdir) 585 except: 586 raise IOError('missing lockfile {0}'.format(lockdir))
587
588 -def load(filename, gzip=False, lock=None, timeout=None):
589 """ 590 Unpickle an object from filename 591 592 @param filename: Filename pickled object 593 @param gzip: Use gzip to uncompress the file 594 @type gzip: Boolean 595 @param lock: Use a lockfile to restrict access 596 597 @return: Python object unpickled from file 598 """ 599 ## file locked? 600 filename = os.path.expanduser(filename) 601 602 if lock is not None: 603 lockdir = filename + '.lock' 604 605 if timeout is not None and timeout > 0: 606 end_time = timeout + time.time() 607 608 while True: 609 try: 610 os.mkdir(lockdir) 611 except OSError as e: 612 # File is already locked 613 if e.errno == errno.EEXIST: 614 if timeout is not None and time.time() > end_time: 615 raise IOError("Failed to acquire Lock") 616 else: 617 raise IOError("Failed to acquire Lock") 618 else: 619 break 620 621 if gzip: 622 import gzip 623 stream = gzip.GzipFile(filename) 624 try: 625 stream.readline() 626 stream.seek(0) 627 except: 628 stream.close() 629 raise 630 631 else: 632 stream = open(filename, 'rb') 633 634 try: 635 this = Pickle.load(stream) 636 except: 637 stream.close() 638 import Numeric #@UnresolvedImport 639 try: 640 stream = gzip.GzipFile(filename) 641 except: 642 stream = open(filename, 'rb') 643 644 try: 645 unpickler = Numeric.Unpickler(stream) 646 this = unpickler.load() 647 except: 648 stream.close() 649 raise 650 651 stream.close() 652 653 if lock is not None: 654 ## release lock 655 try: 656 os.rmdir(lockdir) 657 except: 658 raise IOError('missing lockfile {0}'.format(lockdir)) 659 660 return this
661