Package paramiko :: Module sftp_file
[frames] | no frames]

Source Code for Module paramiko.sftp_file

  1  # Copyright (C) 2003-2007  Robey Pointer <robeypointer@gmail.com> 
  2  # 
  3  # This file is part of paramiko. 
  4  # 
  5  # Paramiko is free software; you can redistribute it and/or modify it under the 
  6  # terms of the GNU Lesser General Public License as published by the Free 
  7  # Software Foundation; either version 2.1 of the License, or (at your option) 
  8  # any later version. 
  9  # 
 10  # Paramiko is distributed in the hope that it will be useful, but WITHOUT ANY 
 11  # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 
 12  # A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more 
 13  # details. 
 14  # 
 15  # You should have received a copy of the GNU Lesser General Public License 
 16  # along with Paramiko; if not, write to the Free Software Foundation, Inc., 
 17  # 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA. 
 18   
 19  """ 
 20  L{SFTPFile} 
 21  """ 
 22   
 23  from __future__ import with_statement 
 24   
 25  from binascii import hexlify 
 26  from collections import deque 
 27  import socket 
 28  import threading 
 29  import time 
 30   
 31  from paramiko.common import * 
 32  from paramiko.sftp import * 
 33  from paramiko.file import BufferedFile 
 34  from paramiko.sftp_attr import SFTPAttributes 
 35   
 36   
37 -class SFTPFile (BufferedFile):
38 """ 39 Proxy object for a file on the remote server, in client mode SFTP. 40 41 Instances of this class may be used as context managers in the same way 42 that built-in Python file objects are. 43 """ 44 45 # Some sftp servers will choke if you send read/write requests larger than 46 # this size. 47 MAX_REQUEST_SIZE = 32768 48
49 - def __init__(self, sftp, handle, mode='r', bufsize=-1):
50 BufferedFile.__init__(self) 51 self.sftp = sftp 52 self.handle = handle 53 BufferedFile._set_mode(self, mode, bufsize) 54 self.pipelined = False 55 self._prefetching = False 56 self._prefetch_done = False 57 self._prefetch_data = {} 58 self._prefetch_extents = {} 59 self._prefetch_lock = threading.Lock() 60 self._saved_exception = None 61 self._reqs = deque()
62
63 - def __del__(self):
64 self._close(async=True)
65
66 - def close(self):
67 self._close(async=False)
68
69 - def _close(self, async=False):
70 # We allow double-close without signaling an error, because real 71 # Python file objects do. However, we must protect against actually 72 # sending multiple CMD_CLOSE packets, because after we close our 73 # handle, the same handle may be re-allocated by the server, and we 74 # may end up mysteriously closing some random other file. (This is 75 # especially important because we unconditionally call close() from 76 # __del__.) 77 if self._closed: 78 return 79 self.sftp._log(DEBUG, 'close(%s)' % hexlify(self.handle)) 80 if self.pipelined: 81 self.sftp._finish_responses(self) 82 BufferedFile.close(self) 83 try: 84 if async: 85 # GC'd file handle could be called from an arbitrary thread -- don't wait for a response 86 self.sftp._async_request(type(None), CMD_CLOSE, self.handle) 87 else: 88 self.sftp._request(CMD_CLOSE, self.handle) 89 except EOFError: 90 # may have outlived the Transport connection 91 pass 92 except (IOError, socket.error): 93 # may have outlived the Transport connection 94 pass
95
96 - def _data_in_prefetch_requests(self, offset, size):
97 k = [x for x in self._prefetch_extents.values() if x[0] <= offset] 98 if len(k) == 0: 99 return False 100 k.sort(lambda x, y: cmp(x[0], y[0])) 101 buf_offset, buf_size = k[-1] 102 if buf_offset + buf_size <= offset: 103 # prefetch request ends before this one begins 104 return False 105 if buf_offset + buf_size >= offset + size: 106 # inclusive 107 return True 108 # well, we have part of the request. see if another chunk has the rest. 109 return self._data_in_prefetch_requests(buf_offset + buf_size, offset + size - buf_offset - buf_size)
110
111 - def _data_in_prefetch_buffers(self, offset):
112 """ 113 if a block of data is present in the prefetch buffers, at the given 114 offset, return the offset of the relevant prefetch buffer. otherwise, 115 return None. this guarantees nothing about the number of bytes 116 collected in the prefetch buffer so far. 117 """ 118 k = [i for i in self._prefetch_data.keys() if i <= offset] 119 if len(k) == 0: 120 return None 121 index = max(k) 122 buf_offset = offset - index 123 if buf_offset >= len(self._prefetch_data[index]): 124 # it's not here 125 return None 126 return index
127
128 - def _read_prefetch(self, size):
129 """ 130 read data out of the prefetch buffer, if possible. if the data isn't 131 in the buffer, return None. otherwise, behaves like a normal read. 132 """ 133 # while not closed, and haven't fetched past the current position, and haven't reached EOF... 134 while True: 135 offset = self._data_in_prefetch_buffers(self._realpos) 136 if offset is not None: 137 break 138 if self._prefetch_done or self._closed: 139 break 140 self.sftp._read_response() 141 self._check_exception() 142 if offset is None: 143 self._prefetching = False 144 return None 145 prefetch = self._prefetch_data[offset] 146 del self._prefetch_data[offset] 147 148 buf_offset = self._realpos - offset 149 if buf_offset > 0: 150 self._prefetch_data[offset] = prefetch[:buf_offset] 151 prefetch = prefetch[buf_offset:] 152 if size < len(prefetch): 153 self._prefetch_data[self._realpos + size] = prefetch[size:] 154 prefetch = prefetch[:size] 155 return prefetch
156
157 - def _read(self, size):
158 size = min(size, self.MAX_REQUEST_SIZE) 159 if self._prefetching: 160 data = self._read_prefetch(size) 161 if data is not None: 162 return data 163 t, msg = self.sftp._request(CMD_READ, self.handle, long(self._realpos), int(size)) 164 if t != CMD_DATA: 165 raise SFTPError('Expected data') 166 return msg.get_string()
167
168 - def _write(self, data):
169 # may write less than requested if it would exceed max packet size 170 chunk = min(len(data), self.MAX_REQUEST_SIZE) 171 self._reqs.append(self.sftp._async_request(type(None), CMD_WRITE, self.handle, long(self._realpos), str(data[:chunk]))) 172 if not self.pipelined or (len(self._reqs) > 100 and self.sftp.sock.recv_ready()): 173 while len(self._reqs): 174 req = self._reqs.popleft() 175 t, msg = self.sftp._read_response(req) 176 if t != CMD_STATUS: 177 raise SFTPError('Expected status') 178 # convert_status already called 179 return chunk
180
181 - def settimeout(self, timeout):
182 """ 183 Set a timeout on read/write operations on the underlying socket or 184 ssh L{Channel}. 185 186 @see: L{Channel.settimeout} 187 @param timeout: seconds to wait for a pending read/write operation 188 before raising C{socket.timeout}, or C{None} for no timeout 189 @type timeout: float 190 """ 191 self.sftp.sock.settimeout(timeout)
192
193 - def gettimeout(self):
194 """ 195 Returns the timeout in seconds (as a float) associated with the socket 196 or ssh L{Channel} used for this file. 197 198 @see: L{Channel.gettimeout} 199 @rtype: float 200 """ 201 return self.sftp.sock.gettimeout()
202
203 - def setblocking(self, blocking):
204 """ 205 Set blocking or non-blocking mode on the underiying socket or ssh 206 L{Channel}. 207 208 @see: L{Channel.setblocking} 209 @param blocking: 0 to set non-blocking mode; non-0 to set blocking 210 mode. 211 @type blocking: int 212 """ 213 self.sftp.sock.setblocking(blocking)
214
215 - def seek(self, offset, whence=0):
216 self.flush() 217 if whence == self.SEEK_SET: 218 self._realpos = self._pos = offset 219 elif whence == self.SEEK_CUR: 220 self._pos += offset 221 self._realpos = self._pos 222 else: 223 self._realpos = self._pos = self._get_size() + offset 224 self._rbuffer = ''
225
226 - def stat(self):
227 """ 228 Retrieve information about this file from the remote system. This is 229 exactly like L{SFTP.stat}, except that it operates on an already-open 230 file. 231 232 @return: an object containing attributes about this file. 233 @rtype: SFTPAttributes 234 """ 235 t, msg = self.sftp._request(CMD_FSTAT, self.handle) 236 if t != CMD_ATTRS: 237 raise SFTPError('Expected attributes') 238 return SFTPAttributes._from_msg(msg)
239
240 - def chmod(self, mode):
241 """ 242 Change the mode (permissions) of this file. The permissions are 243 unix-style and identical to those used by python's C{os.chmod} 244 function. 245 246 @param mode: new permissions 247 @type mode: int 248 """ 249 self.sftp._log(DEBUG, 'chmod(%s, %r)' % (hexlify(self.handle), mode)) 250 attr = SFTPAttributes() 251 attr.st_mode = mode 252 self.sftp._request(CMD_FSETSTAT, self.handle, attr)
253
254 - def chown(self, uid, gid):
255 """ 256 Change the owner (C{uid}) and group (C{gid}) of this file. As with 257 python's C{os.chown} function, you must pass both arguments, so if you 258 only want to change one, use L{stat} first to retrieve the current 259 owner and group. 260 261 @param uid: new owner's uid 262 @type uid: int 263 @param gid: new group id 264 @type gid: int 265 """ 266 self.sftp._log(DEBUG, 'chown(%s, %r, %r)' % (hexlify(self.handle), uid, gid)) 267 attr = SFTPAttributes() 268 attr.st_uid, attr.st_gid = uid, gid 269 self.sftp._request(CMD_FSETSTAT, self.handle, attr)
270
271 - def utime(self, times):
272 """ 273 Set the access and modified times of this file. If 274 C{times} is C{None}, then the file's access and modified times are set 275 to the current time. Otherwise, C{times} must be a 2-tuple of numbers, 276 of the form C{(atime, mtime)}, which is used to set the access and 277 modified times, respectively. This bizarre API is mimicked from python 278 for the sake of consistency -- I apologize. 279 280 @param times: C{None} or a tuple of (access time, modified time) in 281 standard internet epoch time (seconds since 01 January 1970 GMT) 282 @type times: tuple(int) 283 """ 284 if times is None: 285 times = (time.time(), time.time()) 286 self.sftp._log(DEBUG, 'utime(%s, %r)' % (hexlify(self.handle), times)) 287 attr = SFTPAttributes() 288 attr.st_atime, attr.st_mtime = times 289 self.sftp._request(CMD_FSETSTAT, self.handle, attr)
290
291 - def truncate(self, size):
292 """ 293 Change the size of this file. This usually extends 294 or shrinks the size of the file, just like the C{truncate()} method on 295 python file objects. 296 297 @param size: the new size of the file 298 @type size: int or long 299 """ 300 self.sftp._log(DEBUG, 'truncate(%s, %r)' % (hexlify(self.handle), size)) 301 attr = SFTPAttributes() 302 attr.st_size = size 303 self.sftp._request(CMD_FSETSTAT, self.handle, attr)
304
305 - def check(self, hash_algorithm, offset=0, length=0, block_size=0):
306 """ 307 Ask the server for a hash of a section of this file. This can be used 308 to verify a successful upload or download, or for various rsync-like 309 operations. 310 311 The file is hashed from C{offset}, for C{length} bytes. If C{length} 312 is 0, the remainder of the file is hashed. Thus, if both C{offset} 313 and C{length} are zero, the entire file is hashed. 314 315 Normally, C{block_size} will be 0 (the default), and this method will 316 return a byte string representing the requested hash (for example, a 317 string of length 16 for MD5, or 20 for SHA-1). If a non-zero 318 C{block_size} is given, each chunk of the file (from C{offset} to 319 C{offset + length}) of C{block_size} bytes is computed as a separate 320 hash. The hash results are all concatenated and returned as a single 321 string. 322 323 For example, C{check('sha1', 0, 1024, 512)} will return a string of 324 length 40. The first 20 bytes will be the SHA-1 of the first 512 bytes 325 of the file, and the last 20 bytes will be the SHA-1 of the next 512 326 bytes. 327 328 @param hash_algorithm: the name of the hash algorithm to use (normally 329 C{"sha1"} or C{"md5"}) 330 @type hash_algorithm: str 331 @param offset: offset into the file to begin hashing (0 means to start 332 from the beginning) 333 @type offset: int or long 334 @param length: number of bytes to hash (0 means continue to the end of 335 the file) 336 @type length: int or long 337 @param block_size: number of bytes to hash per result (must not be less 338 than 256; 0 means to compute only one hash of the entire segment) 339 @type block_size: int 340 @return: string of bytes representing the hash of each block, 341 concatenated together 342 @rtype: str 343 344 @note: Many (most?) servers don't support this extension yet. 345 346 @raise IOError: if the server doesn't support the "check-file" 347 extension, or possibly doesn't support the hash algorithm 348 requested 349 350 @since: 1.4 351 """ 352 t, msg = self.sftp._request(CMD_EXTENDED, 'check-file', self.handle, 353 hash_algorithm, long(offset), long(length), block_size) 354 ext = msg.get_string() 355 alg = msg.get_string() 356 data = msg.get_remainder() 357 return data
358
359 - def set_pipelined(self, pipelined=True):
360 """ 361 Turn on/off the pipelining of write operations to this file. When 362 pipelining is on, paramiko won't wait for the server response after 363 each write operation. Instead, they're collected as they come in. 364 At the first non-write operation (including L{close}), all remaining 365 server responses are collected. This means that if there was an error 366 with one of your later writes, an exception might be thrown from 367 within L{close} instead of L{write}. 368 369 By default, files are I{not} pipelined. 370 371 @param pipelined: C{True} if pipelining should be turned on for this 372 file; C{False} otherwise 373 @type pipelined: bool 374 375 @since: 1.5 376 """ 377 self.pipelined = pipelined
378
379 - def prefetch(self):
380 """ 381 Pre-fetch the remaining contents of this file in anticipation of 382 future L{read} calls. If reading the entire file, pre-fetching can 383 dramatically improve the download speed by avoiding roundtrip latency. 384 The file's contents are incrementally buffered in a background thread. 385 386 The prefetched data is stored in a buffer until read via the L{read} 387 method. Once data has been read, it's removed from the buffer. The 388 data may be read in a random order (using L{seek}); chunks of the 389 buffer that haven't been read will continue to be buffered. 390 391 @since: 1.5.1 392 """ 393 size = self.stat().st_size 394 # queue up async reads for the rest of the file 395 chunks = [] 396 n = self._realpos 397 while n < size: 398 chunk = min(self.MAX_REQUEST_SIZE, size - n) 399 chunks.append((n, chunk)) 400 n += chunk 401 if len(chunks) > 0: 402 self._start_prefetch(chunks)
403
404 - def readv(self, chunks):
405 """ 406 Read a set of blocks from the file by (offset, length). This is more 407 efficient than doing a series of L{seek} and L{read} calls, since the 408 prefetch machinery is used to retrieve all the requested blocks at 409 once. 410 411 @param chunks: a list of (offset, length) tuples indicating which 412 sections of the file to read 413 @type chunks: list(tuple(long, int)) 414 @return: a list of blocks read, in the same order as in C{chunks} 415 @rtype: list(str) 416 417 @since: 1.5.4 418 """ 419 self.sftp._log(DEBUG, 'readv(%s, %r)' % (hexlify(self.handle), chunks)) 420 421 read_chunks = [] 422 for offset, size in chunks: 423 # don't fetch data that's already in the prefetch buffer 424 if self._data_in_prefetch_buffers(offset) or self._data_in_prefetch_requests(offset, size): 425 continue 426 427 # break up anything larger than the max read size 428 while size > 0: 429 chunk_size = min(size, self.MAX_REQUEST_SIZE) 430 read_chunks.append((offset, chunk_size)) 431 offset += chunk_size 432 size -= chunk_size 433 434 self._start_prefetch(read_chunks) 435 # now we can just devolve to a bunch of read()s :) 436 for x in chunks: 437 self.seek(x[0]) 438 yield self.read(x[1])
439 440 441 ### internals... 442 443
444 - def _get_size(self):
445 try: 446 return self.stat().st_size 447 except: 448 return 0
449
450 - def _start_prefetch(self, chunks):
451 self._prefetching = True 452 self._prefetch_done = False 453 454 t = threading.Thread(target=self._prefetch_thread, args=(chunks,)) 455 t.setDaemon(True) 456 t.start()
457
458 - def _prefetch_thread(self, chunks):
459 # do these read requests in a temporary thread because there may be 460 # a lot of them, so it may block. 461 for offset, length in chunks: 462 with self._prefetch_lock: 463 num = self.sftp._async_request(self, CMD_READ, self.handle, long(offset), int(length)) 464 self._prefetch_extents[num] = (offset, length)
465
466 - def _async_response(self, t, msg, num):
467 if t == CMD_STATUS: 468 # save exception and re-raise it on next file operation 469 try: 470 self.sftp._convert_status(msg) 471 except Exception, x: 472 self._saved_exception = x 473 return 474 if t != CMD_DATA: 475 raise SFTPError('Expected data') 476 data = msg.get_string() 477 with self._prefetch_lock: 478 offset, length = self._prefetch_extents[num] 479 self._prefetch_data[offset] = data 480 del self._prefetch_extents[num] 481 if len(self._prefetch_extents) == 0: 482 self._prefetch_done = True
483
484 - def _check_exception(self):
485 "if there's a saved exception, raise & clear it" 486 if self._saved_exception is not None: 487 x = self._saved_exception 488 self._saved_exception = None 489 raise x
490
491 - def __enter__(self):
492 return self
493
494 - def __exit__(self, type, value, traceback):
495 self.close()
496