Source code for openS3.utils

from base64 import b64encode
from contextlib import closing
from datetime import datetime
import hashlib
import hmac
from http.client import HTTPConnection
import os
import re
import urllib.parse
from wsgiref.handlers import format_date_time

from .constants import MEDIA_TYPES, ENCODING, AWS_DATETIME_FORMAT


[docs]def b64_string(byte_string): """ Return an base64 encoded byte string as an ENCODING decoded string """ return b64encode(byte_string).decode(ENCODING)
[docs]def get_valid_filename(string_to_clean): """ Returns the given string converted to a string that can be used for a clean filename. Specifically, leading and trailing spaces are removed; other spaces are converted to underscores; and anything that is not a unicode alphanumeric, dash, underscore, or dot, is removed. >>> get_valid_filename("john's portrait in 2004.jpg") 'johns_portrait_in_2004.jpg' """ string_to_clean = string_to_clean.strip().replace(' ', '_') return re.sub(r'(?u)[^-\w.]', '', string_to_clean)
[docs]def validate_values(validation_func, dic): """ Validate each value in ``dic`` by passing it through ``func``. Raise a ``ValueError`` if ``validation_func`` does not return ``True``. """ for value_name, value in dic.items(): if not validation_func(value): raise ValueError('{} can not be {}'.format(value_name, value))
[docs]def strpawstime(timestamp): """ Return datetime from parsed AWS header timestamp string. AWS Datetime Format: Wed, 28 Oct 2009 22:32:00 GMT """ return datetime.strptime(timestamp, AWS_DATETIME_FORMAT)
[docs]class S3IOError(IOError): """ Generic exception class for S3 communication errors. """
class S3FileDoesNotExistError(S3IOError): def __init__(self, name=None, msg=None): total_msg = 'File does not exist: {}'.format(name) if msg: total_msg += ' {}'.format(msg) super().__init__(total_msg)
[docs]class OpenS3(object): """ A context manager for interfacing with S3. """ def __init__(self, bucket, access_key, secret_key): """ Create a new context manager for interfacing with S3. :param bucket: An S3 bucket :param access_key: An AWS access key (eg. AEIFKEKWEFJFWA) :param secret_key: An AWS secret key. """ self.bucket = bucket self.access_key = access_key self.secret_key = secret_key validate_values(validation_func=lambda value: value is not None, dic=locals()) self.netloc = '{}.s3.amazonaws.com'.format(bucket) self.mode = None self.acl = 'public-read' # File like attributes self.object_key = '' self.buffer = '' self._mimetype = None self.headers = {} def __call__(self, object_key, mode='r', mimetype=None, acl='public-read'): """ Configure :py:class:`OpenS3` object to write to a specific file in S3. :param object_key: :param mode: :param mimetype: """ self.mode = mode self.object_key = object_key self.mimetype = mimetype self.acl = acl return self def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): if self.mode == 'w' and self.buffer: # TODO Does the old file need to be deleted # TODO from S3 before we write over it? self._put()
[docs] def read(self): """ Return a bytes object with the contents of the remote S3 object. :rtype bytes: """ self._get() return self.buffer
[docs] def write(self, content): """ Write content to file in S3. :param content: """ if self.mode != 'w': raise RuntimeError('Must open file in write mode to write to file.') self.buffer = content # TODO handle multiple writes to same file.
@property def mimetype(self): """ Return mimetype of file. If file does not have a mimetype, make a guess. """ if self._mimetype: return self._mimetype mimetype = 'binary/octet-stream' # get file extension if self.object_key: _, extension = os.path.splitext(self.object_key) extension = extension.strip('.') if extension in MEDIA_TYPES: # Make an educated guess about what the Content-Type should be. mimetype = MEDIA_TYPES[extension] return mimetype @mimetype.setter
[docs] def mimetype(self, mimetype): self._mimetype = mimetype
@property
[docs] def size(self): """ Return the size of the buffer, in bytes. """ return len(self.buffer) # TODO is the right way to get size of buffer (bytes)
@property
[docs] def url(self): """Return URL of resource""" scheme = 'http' path = self.object_key query = '' fragment = '' url_tuple = (scheme, self.netloc, path, query, fragment) return urllib.parse.urlunsplit(url_tuple)
@property
[docs] def md5hash(self): """Return the MD5 hash string of the file content""" digest = hashlib.md5(self.buffer.encode(ENCODING)).digest() return b64_string(digest)
def _head(self): request_headers = self._build_request_headers('HEAD', self.object_key) with closing(HTTPConnection(self.netloc)) as conn: conn.request('HEAD', self.url, headers=request_headers) response = conn.getresponse() return response def _get(self): """ GET contents of remote S3 object. """ request_headers = self._build_request_headers('GET', self.object_key) with closing(HTTPConnection(self.netloc)) as conn: conn.request('GET', self.object_key, headers=request_headers) response = conn.getresponse() if not response.status in (200,): if response.length is None: # length == None seems to be returned from GET requests # to non-existing files raise S3FileDoesNotExistError(self.object_key) # catch all other cases raise S3IOError( 'openS3 GET error. ' 'Response status: {}. ' 'Reason: {}. ' 'Response Text: \n' '{}'.format(response.status, response.reason, response.read())) self.buffer = response.read() self.headers = response.headers def _put(self): """PUT contents of file to remote S3 object.""" request_headers = self._build_request_headers('PUT', self.object_key) with closing(HTTPConnection(self.netloc)) as conn: conn.request('PUT', self.object_key, self.buffer, headers=request_headers) response = conn.getresponse() if response.status not in (200,): raise S3IOError( 'openS3 PUT error. ' 'Response status: {}. ' 'Reason: {}. ' 'Response Text: \n' '{}'.format(response.status, response.reason, response.read()))
[docs] def delete(self): """ Remove file from its S3 bucket. """ headers = self._build_request_headers('DELETE', self.object_key) with closing(HTTPConnection(self.netloc)) as conn: conn.request('DELETE', self.object_key, headers=headers) response = conn.getresponse() if not response.status in (204,): raise S3IOError( 'openS3 DELETE error. ' 'Response status: {}. ' 'Reason: {}. ' 'Response Text: \n' '{}'.format(response.status, response.reason, response.read()))
[docs] def exists(self): """ Return ``True`` if file exists in S3 bucket. """ response = self._head() if response.status in (200, 404): return response.status == 200 else: raise S3IOError( 'openS3 HEAD error. ' 'Response status: {}. ' 'Reason: {}. ' 'Response Text: \n' '{}'.format(response.status, response.reason, response.read()))
def _request_signature(self, string_to_sign): """ Construct a signature by making an RFC2104 HMAC-SHA1 of the following and converting it to Base64 UTF-8 encoded string. """ digest = hmac.new( self.secret_key.encode(ENCODING), string_to_sign.encode(ENCODING), hashlib.sha1 ).digest() return b64_string(digest) def _build_request_headers(self, method, object_key): headers = dict() headers['Date'] = format_date_time(datetime.now().timestamp()) headers['Content-Length'] = self.size headers['Content-MD5'] = self.md5hash headers['Content-Type'] = self.mimetype headers['x-amz-acl'] = self.acl if self.access_key and self.secret_key: string_to_sign_list = [ method, headers['Content-MD5'], headers['Content-Type'], headers['Date'], 'x-amz-acl:{}'.format(headers['x-amz-acl']), '/' + self.bucket + object_key ] signature = self._request_signature('\n'.join(string_to_sign_list)) headers['Authorization'] = ''.join(['AWS' + ' ', self.access_key, ':', signature]) return headers