Coverage for src/artemis_sg/gcloud.py: 21%
65 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-05 09:33 -0700
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-05 09:33 -0700
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
4import datetime
5import logging
6import os
7import time
9import puremagic
10from google.cloud import storage
12from artemis_sg.config import CFG
14MODULE = os.path.splitext(os.path.basename(__file__))[0]
17class GCloud:
18 def __init__(self, cloud_key_file, bucket_name="default"):
19 self.cloud_api_call_count = 0
20 # This environ setting needs to stay.
21 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = cloud_key_file
22 self.storage_client = storage.Client()
23 self.bucket_name = bucket_name
24 self.bucket = self.storage_client.bucket(self.bucket_name)
26 def upload_cloud_blob(self, source_file_path, destination_blob_name):
27 blob = self.bucket.blob(destination_blob_name)
28 ret_val = blob.upload_from_filename(source_file_path)
29 self.cloud_api_call_count += 1
30 return ret_val
32 def generate_cloud_signed_url(self, blob_name):
33 """Generates a v4 signed URL for downloading a blob.
35 Note that this method requires a service account key file. You can not use
36 this if you are using Application Default Credentials from Google Compute
37 Engine or from the Google Cloud SDK.
38 """
40 blob = self.bucket.blob(blob_name)
42 url = blob.generate_signed_url(
43 version="v4",
44 expiration=datetime.timedelta(minutes=30),
45 method="GET",
46 )
47 self.cloud_api_call_count += 1
49 return url
51 def list_blobs(self, prefix):
52 # FIXME: use page_token
53 # page_token = None
54 blobs = self.storage_client.list_blobs(self.bucket_name, prefix=prefix)
55 self.cloud_api_call_count += 1
56 return blobs
58 def list_image_blobs(self, prefix):
59 blobs = self.list_blobs(prefix)
60 names = []
61 for blob in blobs:
62 if "image" in blob.content_type:
63 names.append(blob.name)
64 return names
67def upload(file_source_dir, bucket_prefix, cloud_object):
68 namespace = f"{MODULE}.{upload.__name__}"
69 HOUR = 1 * 60 * 60
70 blobs = cloud_object.list_image_blobs(bucket_prefix)
71 for filename in os.listdir(file_source_dir):
72 filepath = os.path.join(file_source_dir, filename)
73 if os.path.isfile(filepath):
74 file_blob_name = f"{bucket_prefix}/{filename}"
75 # verify the file is an image, otherwise delete it
76 try:
77 kind = puremagic.from_file(filepath)
78 except puremagic.main.PureError:
79 kind = None
80 if kind not in [".jpg", ".png"]:
81 logging.error(
82 f"{namespace}: Err reading '{filename}', deleting '{filepath}'"
83 )
84 os.remove(filepath)
85 continue
86 # don't upload existing blobs unless the file is new
87 file_age = time.time() - os.path.getmtime(filepath)
88 if file_blob_name in blobs and file_age > HOUR:
89 logging.info(
90 f"{namespace}: File '{filename}' found in Google Cloud bucket, not uploading."
91 )
92 continue
93 else:
94 logging.info(
95 f"{namespace}: Uploading '{file_blob_name}' to Google Cloud bucket."
96 )
97 cloud_object.upload_cloud_blob(filepath, file_blob_name)
100def main():
101 file_source_dir = CFG["asg"]["data"]["dir"]["upload_source"]
102 bucket_name = CFG["google"]["cloud"]["bucket"]
103 bucket_prefix = CFG["google"]["cloud"]["bucket_prefix"]
104 cloud_key_file = CFG["google"]["cloud"]["key_file"]
106 cloud_object = GCloud(cloud_key_file=cloud_key_file, bucket_name=bucket_name)
107 upload(file_source_dir, bucket_prefix, cloud_object)
110if __name__ == "__main__":
111 main()