Coverage for src/artemis_sg/gcloud.py: 21%

65 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-05 09:33 -0700

1#!/usr/bin/env python 

2# -*- coding: utf-8 -*- 

3 

4import datetime 

5import logging 

6import os 

7import time 

8 

9import puremagic 

10from google.cloud import storage 

11 

12from artemis_sg.config import CFG 

13 

14MODULE = os.path.splitext(os.path.basename(__file__))[0] 

15 

16 

17class GCloud: 

18 def __init__(self, cloud_key_file, bucket_name="default"): 

19 self.cloud_api_call_count = 0 

20 # This environ setting needs to stay. 

21 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = cloud_key_file 

22 self.storage_client = storage.Client() 

23 self.bucket_name = bucket_name 

24 self.bucket = self.storage_client.bucket(self.bucket_name) 

25 

26 def upload_cloud_blob(self, source_file_path, destination_blob_name): 

27 blob = self.bucket.blob(destination_blob_name) 

28 ret_val = blob.upload_from_filename(source_file_path) 

29 self.cloud_api_call_count += 1 

30 return ret_val 

31 

32 def generate_cloud_signed_url(self, blob_name): 

33 """Generates a v4 signed URL for downloading a blob. 

34 

35 Note that this method requires a service account key file. You can not use 

36 this if you are using Application Default Credentials from Google Compute 

37 Engine or from the Google Cloud SDK. 

38 """ 

39 

40 blob = self.bucket.blob(blob_name) 

41 

42 url = blob.generate_signed_url( 

43 version="v4", 

44 expiration=datetime.timedelta(minutes=30), 

45 method="GET", 

46 ) 

47 self.cloud_api_call_count += 1 

48 

49 return url 

50 

51 def list_blobs(self, prefix): 

52 # FIXME: use page_token 

53 # page_token = None 

54 blobs = self.storage_client.list_blobs(self.bucket_name, prefix=prefix) 

55 self.cloud_api_call_count += 1 

56 return blobs 

57 

58 def list_image_blobs(self, prefix): 

59 blobs = self.list_blobs(prefix) 

60 names = [] 

61 for blob in blobs: 

62 if "image" in blob.content_type: 

63 names.append(blob.name) 

64 return names 

65 

66 

67def upload(file_source_dir, bucket_prefix, cloud_object): 

68 namespace = f"{MODULE}.{upload.__name__}" 

69 HOUR = 1 * 60 * 60 

70 blobs = cloud_object.list_image_blobs(bucket_prefix) 

71 for filename in os.listdir(file_source_dir): 

72 filepath = os.path.join(file_source_dir, filename) 

73 if os.path.isfile(filepath): 

74 file_blob_name = f"{bucket_prefix}/{filename}" 

75 # verify the file is an image, otherwise delete it 

76 try: 

77 kind = puremagic.from_file(filepath) 

78 except puremagic.main.PureError: 

79 kind = None 

80 if kind not in [".jpg", ".png"]: 

81 logging.error( 

82 f"{namespace}: Err reading '{filename}', deleting '{filepath}'" 

83 ) 

84 os.remove(filepath) 

85 continue 

86 # don't upload existing blobs unless the file is new 

87 file_age = time.time() - os.path.getmtime(filepath) 

88 if file_blob_name in blobs and file_age > HOUR: 

89 logging.info( 

90 f"{namespace}: File '{filename}' found in Google Cloud bucket, not uploading." 

91 ) 

92 continue 

93 else: 

94 logging.info( 

95 f"{namespace}: Uploading '{file_blob_name}' to Google Cloud bucket." 

96 ) 

97 cloud_object.upload_cloud_blob(filepath, file_blob_name) 

98 

99 

100def main(): 

101 file_source_dir = CFG["asg"]["data"]["dir"]["upload_source"] 

102 bucket_name = CFG["google"]["cloud"]["bucket"] 

103 bucket_prefix = CFG["google"]["cloud"]["bucket_prefix"] 

104 cloud_key_file = CFG["google"]["cloud"]["key_file"] 

105 

106 cloud_object = GCloud(cloud_key_file=cloud_key_file, bucket_name=bucket_name) 

107 upload(file_source_dir, bucket_prefix, cloud_object) 

108 

109 

110if __name__ == "__main__": 

111 main()