Coverage for src/artemis_sg/gcloud.py: 21%
65 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-12 06:30 -0700
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-12 06:30 -0700
1#!/usr/bin/env python
3import datetime
4import logging
5import os
6import time
8import puremagic
9from google.cloud import storage
11from artemis_sg.config import CFG
13MODULE = os.path.splitext(os.path.basename(__file__))[0]
16class GCloud:
17 def __init__(self, cloud_key_file, bucket_name="default"):
18 self.cloud_api_call_count = 0
19 # This environ setting needs to stay.
20 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = cloud_key_file
21 self.storage_client = storage.Client()
22 self.bucket_name = bucket_name
23 self.bucket = self.storage_client.bucket(self.bucket_name)
25 def upload_cloud_blob(self, source_file_path, destination_blob_name):
26 blob = self.bucket.blob(destination_blob_name)
27 ret_val = blob.upload_from_filename(source_file_path)
28 self.cloud_api_call_count += 1
29 return ret_val
31 def generate_cloud_signed_url(self, blob_name):
32 """Generates a v4 signed URL for downloading a blob.
34 Note that this method requires a service account key file. You can not use
35 this if you are using Application Default Credentials from Google Compute
36 Engine or from the Google Cloud SDK.
37 """
39 blob = self.bucket.blob(blob_name)
41 url = blob.generate_signed_url(
42 version="v4",
43 expiration=datetime.timedelta(minutes=30),
44 method="GET",
45 )
46 self.cloud_api_call_count += 1
48 return url
50 def list_blobs(self, prefix):
51 # FIXME: use page_token
52 # page_token = None
53 blobs = self.storage_client.list_blobs(self.bucket_name, prefix=prefix)
54 self.cloud_api_call_count += 1
55 return blobs
57 def list_image_blobs(self, prefix):
58 blobs = self.list_blobs(prefix)
59 names = []
60 for blob in blobs:
61 # vvvvvvv FIXME: (#163) BUG: hard-coded assumption based on CFG prefix
62 if "image" in blob.content_type:
63 names.append(blob.name)
64 return names
67def upload(file_source_dir, bucket_prefix, cloud_object):
68 namespace = f"{MODULE}.{upload.__name__}"
69 # vvv TODO: (#163) CFG: freshness for image files
70 hour = 1 * 60 * 60
71 blobs = cloud_object.list_image_blobs(bucket_prefix)
72 for filename in os.listdir(file_source_dir):
73 filepath = os.path.join(file_source_dir, filename)
74 if os.path.isfile(filepath):
75 file_blob_name = f"{bucket_prefix}/{filename}"
76 # verify the file is an image, otherwise delete it
77 try:
78 kind = puremagic.from_file(filepath)
79 except puremagic.main.PureError:
80 kind = None
81 if kind not in [".jpg", ".png"]:
82 logging.error(
83 f"{namespace}: Err reading '{filename}', deleting '{filepath}'"
84 )
85 os.remove(filepath)
86 continue
87 # don't upload existing blobs unless the file is new
88 file_age = time.time() - os.path.getmtime(filepath)
89 if file_blob_name in blobs and file_age > hour:
90 logging.info(
91 f"{namespace}: File '{filename}' found in Google Cloud "
92 f"bucket, not uploading."
93 )
94 continue
95 else:
96 logging.info(
97 f"{namespace}: Uploading '{file_blob_name}' to Google Cloud bucket."
98 )
99 cloud_object.upload_cloud_blob(filepath, file_blob_name)
102def main():
103 file_source_dir = CFG["asg"]["data"]["dir"]["upload_source"]
104 bucket_name = CFG["google"]["cloud"]["bucket"]
105 bucket_prefix = CFG["google"]["cloud"]["bucket_prefix"]
106 cloud_key_file = CFG["google"]["cloud"]["key_file"]
108 cloud_object = GCloud(cloud_key_file=cloud_key_file, bucket_name=bucket_name)
109 upload(file_source_dir, bucket_prefix, cloud_object)
112if __name__ == "__main__":
113 main()