Coverage for src/artemis_sg/gcloud.py: 21%

65 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-12 06:30 -0700

1#!/usr/bin/env python 

2 

3import datetime 

4import logging 

5import os 

6import time 

7 

8import puremagic 

9from google.cloud import storage 

10 

11from artemis_sg.config import CFG 

12 

13MODULE = os.path.splitext(os.path.basename(__file__))[0] 

14 

15 

16class GCloud: 

17 def __init__(self, cloud_key_file, bucket_name="default"): 

18 self.cloud_api_call_count = 0 

19 # This environ setting needs to stay. 

20 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = cloud_key_file 

21 self.storage_client = storage.Client() 

22 self.bucket_name = bucket_name 

23 self.bucket = self.storage_client.bucket(self.bucket_name) 

24 

25 def upload_cloud_blob(self, source_file_path, destination_blob_name): 

26 blob = self.bucket.blob(destination_blob_name) 

27 ret_val = blob.upload_from_filename(source_file_path) 

28 self.cloud_api_call_count += 1 

29 return ret_val 

30 

31 def generate_cloud_signed_url(self, blob_name): 

32 """Generates a v4 signed URL for downloading a blob. 

33 

34 Note that this method requires a service account key file. You can not use 

35 this if you are using Application Default Credentials from Google Compute 

36 Engine or from the Google Cloud SDK. 

37 """ 

38 

39 blob = self.bucket.blob(blob_name) 

40 

41 url = blob.generate_signed_url( 

42 version="v4", 

43 expiration=datetime.timedelta(minutes=30), 

44 method="GET", 

45 ) 

46 self.cloud_api_call_count += 1 

47 

48 return url 

49 

50 def list_blobs(self, prefix): 

51 # FIXME: use page_token 

52 # page_token = None 

53 blobs = self.storage_client.list_blobs(self.bucket_name, prefix=prefix) 

54 self.cloud_api_call_count += 1 

55 return blobs 

56 

57 def list_image_blobs(self, prefix): 

58 blobs = self.list_blobs(prefix) 

59 names = [] 

60 for blob in blobs: 

61 # vvvvvvv FIXME: (#163) BUG: hard-coded assumption based on CFG prefix 

62 if "image" in blob.content_type: 

63 names.append(blob.name) 

64 return names 

65 

66 

67def upload(file_source_dir, bucket_prefix, cloud_object): 

68 namespace = f"{MODULE}.{upload.__name__}" 

69 # vvv TODO: (#163) CFG: freshness for image files 

70 hour = 1 * 60 * 60 

71 blobs = cloud_object.list_image_blobs(bucket_prefix) 

72 for filename in os.listdir(file_source_dir): 

73 filepath = os.path.join(file_source_dir, filename) 

74 if os.path.isfile(filepath): 

75 file_blob_name = f"{bucket_prefix}/{filename}" 

76 # verify the file is an image, otherwise delete it 

77 try: 

78 kind = puremagic.from_file(filepath) 

79 except puremagic.main.PureError: 

80 kind = None 

81 if kind not in [".jpg", ".png"]: 

82 logging.error( 

83 f"{namespace}: Err reading '{filename}', deleting '{filepath}'" 

84 ) 

85 os.remove(filepath) 

86 continue 

87 # don't upload existing blobs unless the file is new 

88 file_age = time.time() - os.path.getmtime(filepath) 

89 if file_blob_name in blobs and file_age > hour: 

90 logging.info( 

91 f"{namespace}: File '{filename}' found in Google Cloud " 

92 f"bucket, not uploading." 

93 ) 

94 continue 

95 else: 

96 logging.info( 

97 f"{namespace}: Uploading '{file_blob_name}' to Google Cloud bucket." 

98 ) 

99 cloud_object.upload_cloud_blob(filepath, file_blob_name) 

100 

101 

102def main(): 

103 file_source_dir = CFG["asg"]["data"]["dir"]["upload_source"] 

104 bucket_name = CFG["google"]["cloud"]["bucket"] 

105 bucket_prefix = CFG["google"]["cloud"]["bucket_prefix"] 

106 cloud_key_file = CFG["google"]["cloud"]["key_file"] 

107 

108 cloud_object = GCloud(cloud_key_file=cloud_key_file, bucket_name=bucket_name) 

109 upload(file_source_dir, bucket_prefix, cloud_object) 

110 

111 

112if __name__ == "__main__": 

113 main()