Package cloudfiles :: Module storage_object
[frames] | no frames]

Source Code for Module cloudfiles.storage_object

  1  """ 
  2  Object operations 
  3   
  4  An Object is analogous to a file on a conventional filesystem. You can 
  5  read data from, or write data to your Objects. You can also associate 
  6  arbitrary metadata with them. 
  7   
  8  See COPYING for license information. 
  9  """ 
 10   
 11  try: 
 12      from hashlib import md5 
 13  except ImportError: 
 14      from md5 import md5 
 15  import StringIO 
 16  import mimetypes 
 17  import os 
 18   
 19  from urllib  import quote 
 20  from errors  import ResponseError, NoSuchObject, \ 
 21                      InvalidObjectName, IncompleteSend, \ 
 22                      InvalidMetaName, InvalidMetaValue 
 23   
 24  from socket  import timeout 
 25  import consts 
 26  from utils   import unicode_quote, requires_name 
27 28 # Because HTTPResponse objects *have* to have read() called on them 29 # before they can be used again ... 30 # pylint: disable-msg=W0612 31 32 33 -class Object(object):
34 """ 35 Storage data representing an object, (metadata and data). 36 37 @undocumented: _make_headers 38 @undocumented: _name_check 39 @undocumented: _initialize 40 @undocumented: compute_md5sum 41 @undocumented: __get_conn_for_write 42 @ivar name: the object's name (generally treat as read-only) 43 @type name: str 44 @ivar content_type: the object's content-type (set or read) 45 @type content_type: str 46 @ivar metadata: metadata associated with the object (set or read) 47 @type metadata: dict 48 @ivar size: the object's size (cached) 49 @type size: number 50 @ivar last_modified: date and time of last file modification (cached) 51 @type last_modified: str 52 @ivar container: the object's container (generally treat as read-only) 53 @type container: L{Container} 54 """ 55 # R/O support of the legacy objsum attr. 56 objsum = property(lambda self: self._etag) 57
58 - def __set_etag(self, value):
59 self._etag = value 60 self._etag_override = True
61 62 etag = property(lambda self: self._etag, __set_etag) 63
64 - def __init__(self, container, name=None, 65 force_exists=False, object_record=None):
66 """ 67 Storage objects rarely if ever need to be instantiated directly by the 68 user. 69 70 Instead, use the L{create_object<Container.create_object>}, 71 L{get_object<Container.get_object>}, 72 L{list_objects<Container.list_objects>} and other 73 methods on its parent L{Container} object. 74 """ 75 self.container = container 76 self.last_modified = None 77 self.metadata = {} 78 if object_record: 79 self.name = object_record['name'] 80 self.content_type = object_record['content_type'] 81 self.size = object_record['bytes'] 82 self.last_modified = object_record['last_modified'] 83 self._etag = object_record['hash'] 84 self._etag_override = False 85 else: 86 self.name = name 87 self.content_type = None 88 self.size = None 89 self._etag = None 90 self._etag_override = False 91 if not self._initialize() and force_exists: 92 raise NoSuchObject(self.name)
93 94 @requires_name(InvalidObjectName)
95 - def read(self, size=-1, offset=0, hdrs=None, buffer=None, callback=None):
96 """ 97 Read the content from the remote storage object. 98 99 By default this method will buffer the response in memory and 100 return it as a string. However, if a file-like object is passed 101 in using the buffer keyword, the response will be written to it 102 instead. 103 104 A callback can be passed in for reporting on the progress of 105 the download. The callback should accept two integers, the first 106 will be for the amount of data written so far, the second for 107 the total size of the transfer. Note: This option is only 108 applicable when used in conjunction with the buffer option. 109 110 >>> test_object.write('hello') 111 >>> test_object.read() 112 'hello' 113 114 @param size: combined with offset, defines the length of data to be 115 read 116 @type size: number 117 @param offset: combined with size, defines the start location to be 118 read 119 @type offset: number 120 @param hdrs: an optional dict of headers to send with the request 121 @type hdrs: dictionary 122 @param buffer: an optional file-like object to write the content to 123 @type buffer: file-like object 124 @param callback: function to be used as a progress callback 125 @type callback: callable(transferred, size) 126 @rtype: str or None 127 @return: a string of all data in the object, or None if a buffer is 128 used 129 """ 130 self._name_check() 131 if size > 0: 132 range = 'bytes=%d-%d' % (offset, (offset + size) - 1) 133 if hdrs: 134 hdrs['Range'] = range 135 else: 136 hdrs = {'Range': range} 137 response = self.container.conn.make_request('GET', 138 path=[self.container.name, self.name], hdrs=hdrs) 139 if (response.status < 200) or (response.status > 299): 140 response.read() 141 raise ResponseError(response.status, response.reason) 142 143 if hasattr(buffer, 'write'): 144 scratch = response.read(8192) 145 transferred = 0 146 147 while len(scratch) > 0: 148 buffer.write(scratch) 149 transferred += len(scratch) 150 if callable(callback): 151 callback(transferred, self.size) 152 scratch = response.read(8192) 153 return None 154 else: 155 return response.read()
156
157 - def save_to_filename(self, filename, callback=None):
158 """ 159 Save the contents of the object to filename. 160 161 >>> container = connection['container1'] 162 >>> obj = container.get_object('backup_file') 163 >>> obj.save_to_filename('./backup_file') 164 165 @param filename: name of the file 166 @type filename: str 167 @param callback: function to be used as a progress callback 168 @type callback: callable(transferred, size) 169 """ 170 fobj = open(filename, 'wb') 171 try: 172 self.read(buffer=fobj, callback=callback) 173 finally: 174 fobj.close()
175 176 @requires_name(InvalidObjectName)
177 - def stream(self, chunksize=8192, hdrs=None):
178 """ 179 Return a generator of the remote storage object's data. 180 181 Warning: The HTTP response is only complete after this generator 182 has raised a StopIteration. No other methods can be called until 183 this has occurred. 184 185 >>> test_object.write('hello') 186 >>> test_object.stream() 187 <generator object at 0xb77939cc> 188 >>> '-'.join(test_object.stream(chunksize=1)) 189 'h-e-l-l-o' 190 191 @param chunksize: size in bytes yielded by the generator 192 @type chunksize: number 193 @param hdrs: an optional dict of headers to send in the request 194 @type hdrs: dict 195 @rtype: str generator 196 @return: a generator which yields strings as the object is downloaded 197 """ 198 self._name_check() 199 response = self.container.conn.make_request('GET', 200 path=[self.container.name, self.name], hdrs=hdrs) 201 if response.status < 200 or response.status > 299: 202 buff = response.read() 203 raise ResponseError(response.status, response.reason) 204 buff = response.read(chunksize) 205 while len(buff) > 0: 206 yield buff 207 buff = response.read(chunksize) 208 # I hate you httplib 209 buff = response.read()
210 211 @requires_name(InvalidObjectName)
212 - def sync_metadata(self):
213 """ 214 Commits the metadata to the remote storage system. 215 216 >>> test_object = container['paradise_lost.pdf'] 217 >>> test_object.metadata = {'author': 'John Milton'} 218 >>> test_object.sync_metadata() 219 220 Object metadata can be set and retrieved through the object's 221 .metadata attribute. 222 """ 223 self._name_check() 224 if self.metadata: 225 headers = self._make_headers() 226 headers['Content-Length'] = "0" 227 response = self.container.conn.make_request( 228 'POST', [self.container.name, self.name], hdrs=headers, 229 data='') 230 response.read() 231 if response.status != 202: 232 raise ResponseError(response.status, response.reason)
233
234 - def __get_conn_for_write(self):
235 headers = self._make_headers() 236 237 headers['X-Auth-Token'] = self.container.conn.token 238 239 path = "/%s/%s/%s" % (self.container.conn.uri.rstrip('/'), \ 240 quote(self.container.name), quote(self.name)) 241 242 # Requests are handled a little differently for writes ... 243 http = self.container.conn.connection 244 245 # TODO: more/better exception handling please 246 http.putrequest('PUT', path) 247 for hdr in headers: 248 http.putheader(hdr, headers[hdr]) 249 http.putheader('User-Agent', self.container.conn.user_agent) 250 http.endheaders() 251 return http
252 253 # pylint: disable-msg=W0622 254 @requires_name(InvalidObjectName)
255 - def write(self, data='', verify=True, callback=None):
256 """ 257 Write data to the remote storage system. 258 259 By default, server-side verification is enabled, (verify=True), and 260 end-to-end verification is performed using an md5 checksum. When 261 verification is disabled, (verify=False), the etag attribute will 262 be set to the value returned by the server, not one calculated 263 locally. When disabling verification, there is no guarantee that 264 what you think was uploaded matches what was actually stored. Use 265 this optional carefully. You have been warned. 266 267 A callback can be passed in for reporting on the progress of 268 the upload. The callback should accept two integers, the first 269 will be for the amount of data written so far, the second for 270 the total size of the transfer. 271 272 >>> test_object = container.create_object('file.txt') 273 >>> test_object.content_type = 'text/plain' 274 >>> fp = open('./file.txt') 275 >>> test_object.write(fp) 276 277 @param data: the data to be written 278 @type data: str or file 279 @param verify: enable/disable server-side checksum verification 280 @type verify: boolean 281 @param callback: function to be used as a progress callback 282 @type callback: callable(transferred, size) 283 """ 284 self._name_check() 285 if isinstance(data, file): 286 # pylint: disable-msg=E1101 287 try: 288 data.flush() 289 except IOError: 290 pass # If the file descriptor is read-only this will fail 291 self.size = int(os.fstat(data.fileno())[6]) 292 else: 293 data = StringIO.StringIO(data) 294 self.size = data.len 295 296 # If override is set (and _etag is not None), then the etag has 297 # been manually assigned and we will not calculate our own. 298 299 if not self._etag_override: 300 self._etag = None 301 302 if not self.content_type: 303 # pylint: disable-msg=E1101 304 type = None 305 if hasattr(data, 'name'): 306 type = mimetypes.guess_type(data.name)[0] 307 self.content_type = type and type or 'application/octet-stream' 308 309 http = self.__get_conn_for_write() 310 311 response = None 312 transfered = 0 313 running_checksum = md5() 314 315 buff = data.read(4096) 316 try: 317 while len(buff) > 0: 318 http.send(buff) 319 if verify and not self._etag_override: 320 running_checksum.update(buff) 321 buff = data.read(4096) 322 transfered += len(buff) 323 if callable(callback): 324 callback(transfered, self.size) 325 response = http.getresponse() 326 buff = response.read() 327 except timeout, err: 328 if response: 329 # pylint: disable-msg=E1101 330 buff = response.read() 331 raise err 332 else: 333 if verify and not self._etag_override: 334 self._etag = running_checksum.hexdigest() 335 336 # ---------------------------------------------------------------- 337 338 if (response.status < 200) or (response.status > 299): 339 raise ResponseError(response.status, response.reason) 340 341 # If verification has been disabled for this write, then set the 342 # instances etag attribute to what the server returns to us. 343 if not verify: 344 for hdr in response.getheaders(): 345 if hdr[0].lower() == 'etag': 346 self._etag = hdr[1]
347 348 @requires_name(InvalidObjectName)
349 - def copy_to(self, container_name, name):
350 """ 351 Copy an object's contents to another location. 352 """ 353 354 self._name_check() 355 self._name_check(name) 356 357 # This method implicitly disables verification. 358 if not self._etag_override: 359 self._etag = None 360 361 headers = self._make_headers() 362 headers['Destination'] = "%s/%s" % (container_name, name) 363 headers['Content-Length'] = 0 364 response = self.container.conn.make_request( 365 'COPY', [self.container.name, self.name], hdrs=headers, data='') 366 buff = response.read() 367 368 if response.status < 200 or response.status > 299: 369 raise ResponseError(response.status, response.reason) 370 371 # Reset the etag to what the server returns. 372 for hdr in response.getheaders(): 373 if hdr[0].lower() == 'etag': 374 self._etag = hdr[1]
375 376 @requires_name(InvalidObjectName)
377 - def copy_from(self, container_name, name):
378 """ 379 Copy another object's contents to this object. 380 """ 381 382 self._name_check() 383 self._name_check(name) 384 385 # This method implicitly disables verification. 386 if not self._etag_override: 387 self._etag = None 388 389 headers = self._make_headers() 390 headers['X-Copy-From'] = "%s/%s" % (container_name, name) 391 headers['Content-Length'] = 0 392 response = self.container.conn.make_request( 393 'PUT', [self.container.name, self.name], hdrs=headers, data='') 394 buff = response.read() 395 396 if response.status < 200 or response.status > 299: 397 raise ResponseError(response.status, response.reason) 398 399 # Reset the etag to what the server returns. 400 for hdr in response.getheaders(): 401 if hdr[0].lower() == 'etag': 402 self._etag = hdr[1]
403 404 @requires_name(InvalidObjectName)
405 - def send(self, iterable):
406 """ 407 Write potentially transient data to the remote storage system using a 408 generator or stream. 409 410 If the object's size is not set, chunked transfer encoding will be 411 used to upload the file. 412 413 If the object's size attribute is set, it will be used as the 414 Content-Length. If the generator raises StopIteration prior to 415 yielding the right number of bytes, an IncompleteSend exception is 416 raised. 417 418 If the content_type attribute is not set then a value of 419 application/octet-stream will be used. 420 421 Server-side verification will be performed if an md5 checksum is 422 assigned to the etag property before calling this method, 423 otherwise no verification will be performed, (verification 424 can be performed afterward though by using the etag attribute 425 which is set to the value returned by the server). 426 427 >>> test_object = container.create_object('backup.tar.gz') 428 >>> pfd = os.popen('tar -czvf - ./data/', 'r') 429 >>> test_object.send(pfd) 430 431 @param iterable: stream or generator which yields the content to upload 432 @type iterable: generator or stream 433 """ 434 self._name_check() 435 436 if hasattr(iterable, 'read'): 437 438 def file_iterator(file): 439 chunk = file.read(4095) 440 while chunk: 441 yield chunk 442 chunk = file.read(4095) 443 raise StopIteration()
444 iterable = file_iterator(iterable) 445 446 # This method implicitly disables verification. 447 if not self._etag_override: 448 self._etag = None 449 450 if not self.content_type: 451 self.content_type = 'application/octet-stream' 452 453 path = "/%s/%s/%s" % (self.container.conn.uri.rstrip('/'), \ 454 unicode_quote(self.container.name), unicode_quote(self.name)) 455 headers = self._make_headers() 456 if self.size is None: 457 del headers['Content-Length'] 458 headers['Transfer-Encoding'] = 'chunked' 459 headers['X-Auth-Token'] = self.container.conn.token 460 headers['User-Agent'] = self.container.conn.user_agent 461 http = self.container.conn.connection 462 http.putrequest('PUT', path) 463 for key, value in headers.iteritems(): 464 http.putheader(key, value) 465 http.endheaders() 466 467 response = None 468 transferred = 0 469 try: 470 for chunk in iterable: 471 if self.size is None: 472 http.send("%X\r\n" % len(chunk)) 473 http.send(chunk) 474 http.send("\r\n") 475 else: 476 http.send(chunk) 477 transferred += len(chunk) 478 if self.size is None: 479 http.send("0\r\n\r\n") 480 # If the generator didn't yield enough data, stop, drop, and roll. 481 elif transferred < self.size: 482 raise IncompleteSend() 483 response = http.getresponse() 484 buff = response.read() 485 except timeout, err: 486 if response: 487 # pylint: disable-msg=E1101 488 response.read() 489 raise err 490 491 if (response.status < 200) or (response.status > 299): 492 raise ResponseError(response.status, response.reason) 493 494 for hdr in response.getheaders(): 495 if hdr[0].lower() == 'etag': 496 self._etag = hdr[1]
497
498 - def load_from_filename(self, filename, verify=True, callback=None):
499 """ 500 Put the contents of the named file into remote storage. 501 502 >>> test_object = container.create_object('file.txt') 503 >>> test_object.content_type = 'text/plain' 504 >>> test_object.load_from_filename('./my_file.txt') 505 506 @param filename: path to the file 507 @type filename: str 508 @param verify: enable/disable server-side checksum verification 509 @type verify: boolean 510 @param callback: function to be used as a progress callback 511 @type callback: callable(transferred, size) 512 """ 513 fobj = open(filename, 'rb') 514 self.write(fobj, verify=verify, callback=callback) 515 fobj.close()
516
517 - def _initialize(self):
518 """ 519 Initialize the Object with values from the remote service (if any). 520 """ 521 if not self.name: 522 return False 523 524 response = self.container.conn.make_request( 525 'HEAD', [self.container.name, self.name]) 526 response.read() 527 if response.status == 404: 528 return False 529 if (response.status < 200) or (response.status > 299): 530 raise ResponseError(response.status, response.reason) 531 for hdr in response.getheaders(): 532 if hdr[0].lower() == 'content-type': 533 self.content_type = hdr[1] 534 if hdr[0].lower().startswith('x-object-meta-'): 535 self.metadata[hdr[0][14:]] = hdr[1] 536 if hdr[0].lower() == 'etag': 537 self._etag = hdr[1] 538 self._etag_override = False 539 if hdr[0].lower() == 'content-length': 540 self.size = int(hdr[1]) 541 if hdr[0].lower() == 'last-modified': 542 self.last_modified = hdr[1] 543 return True
544
545 - def __str__(self):
546 return self.name
547
548 - def _name_check(self, name=None):
549 if name is None: 550 name = self.name 551 if len(name) > consts.object_name_limit: 552 raise InvalidObjectName(name)
553
554 - def _make_headers(self):
555 """ 556 Returns a dictionary representing http headers based on the 557 respective instance attributes. 558 """ 559 headers = {} 560 headers['Content-Length'] = (str(self.size) \ 561 and str(self.size) != "0") \ 562 and str(self.size) or "0" 563 if self._etag: 564 headers['ETag'] = self._etag 565 566 if self.content_type: 567 headers['Content-Type'] = self.content_type 568 else: 569 headers['Content-Type'] = 'application/octet-stream' 570 for key in self.metadata: 571 if len(key) > consts.meta_name_limit: 572 raise(InvalidMetaName(key)) 573 if len(self.metadata[key]) > consts.meta_value_limit: 574 raise(InvalidMetaValue(self.metadata[key])) 575 headers['X-Object-Meta-' + key] = self.metadata[key] 576 return headers
577 578 @classmethod
579 - def compute_md5sum(cls, fobj):
580 """ 581 Given an open file object, returns the md5 hexdigest of the data. 582 """ 583 checksum = md5() 584 buff = fobj.read(4096) 585 while buff: 586 checksum.update(buff) 587 buff = fobj.read(4096) 588 fobj.seek(0) 589 return checksum.hexdigest()
590
591 - def public_uri(self):
592 """ 593 Retrieve the URI for this object, if its container is public. 594 595 >>> container1 = connection['container1'] 596 >>> container1.make_public() 597 >>> container1.create_object('file.txt').write('testing') 598 >>> container1['file.txt'].public_uri() 599 'http://c00061.cdn.cloudfiles.rackspacecloud.com/file.txt' 600 601 @return: the public URI for this object 602 @rtype: str 603 """ 604 return "%s/%s" % (self.container.public_uri().rstrip('/'), 605 quote(self.name))
606
607 - def public_ssl_uri(self):
608 """ 609 Retrieve the SSL URI for this object, if its container is public. 610 611 >>> container1 = connection['container1'] 612 >>> container1.make_public() 613 >>> container1.create_object('file.txt').write('testing') 614 >>> container1['file.txt'].public_ssl_uri() 615 'https://c61.ssl.cf0.rackcdn.com/file.txt' 616 617 @return: the public SSL URI for this object 618 @rtype: str 619 """ 620 return "%s/%s" % (self.container.public_ssl_uri().rstrip('/'), 621 quote(self.name))
622
623 - def purge_from_cdn(self, email=None):
624 """ 625 Purge Edge cache for this object. 626 You will be notified by email if one is provided when the 627 job completes. 628 629 >>> obj.purge_from_cdn("user@dmain.com") 630 631 or 632 633 >>> obj.purge_from_cdn("user@domain.com,user2@domain.com") 634 635 or 636 637 >>> obj.purge_from_cdn() 638 639 @param email: A Valid email address 640 @type email: str 641 """ 642 if not self.container.conn.cdn_enabled: 643 raise CDNNotEnabled() 644 645 if email: 646 hdrs = {"X-Purge-Email": email} 647 response = self.container.conn.cdn_request('DELETE', 648 [self.container.name, self.name], hdrs=hdrs) 649 else: 650 response = self.container.conn.cdn_request('DELETE', 651 [self.container.name, self.name]) 652 653 if (response.status < 200) or (response.status >= 299): 654 raise ResponseError(response.status, response.reason)
655
656 657 -class ObjectResults(object):
658 """ 659 An iterable results set object for Objects. 660 661 This class implements dictionary- and list-like interfaces. 662 """
663 - def __init__(self, container, objects=None):
664 if objects is None: 665 objects = [] 666 self._names = [] 667 self._objects = [] 668 for obj in objects: 669 try: 670 self._names.append(obj['name']) 671 except KeyError: 672 # pseudo-objects from a delimiter query don't have names 673 continue 674 else: 675 self._objects.append(obj) 676 self.container = container
677
678 - def __getitem__(self, key):
679 return Object(self.container, object_record=self._objects[key])
680
681 - def __getslice__(self, i, j):
682 return [Object(self.container, object_record=k) \ 683 for k in self._objects[i:j]]
684
685 - def __contains__(self, item):
686 return item in self._objects
687
688 - def __len__(self):
689 return len(self._objects)
690
691 - def __repr__(self):
692 return 'ObjectResults: %s objects' % len(self._objects)
693 __str__ = __repr__ 694
695 - def index(self, value, *args):
696 """ 697 returns an integer for the first index of value 698 """ 699 return self._names.index(value, *args)
700
701 - def count(self, value):
702 """ 703 returns the number of occurrences of value 704 """ 705 return self._names.count(value)
706 707 # vim:set ai sw=4 ts=4 tw=0 expandtab: 708