Coverage for src/artemis_sg/gcloud.py: 25%

61 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2024-03-06 10:59 -0800

1#!/usr/bin/env python 

2"""artemis_sg.gcloud 

3 

4Interface for Google Cloud blobs.""" 

5 

6import datetime 

7import logging 

8import os 

9import time 

10import typing as t 

11 

12import puremagic 

13from google.cloud import storage 

14 

15from artemis_sg.config import CFG 

16 

17MODULE = os.path.splitext(os.path.basename(__file__))[0] 

18 

19 

20class GCloud: 

21 """ 

22 Object that provides Google Cloud Bucket interaction. 

23 

24 :param cloud_key_file: 

25 Path of file containing the authentication key for a Google Cloud. 

26 :param bucket_name: 

27 Name of the Google Cloud Bucket to be used by object instance. 

28 """ 

29 

30 def __init__(self, cloud_key_file: str, bucket_name: str = "default") -> None: 

31 # This environ setting needs to stay. 

32 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = cloud_key_file 

33 self.storage_client = storage.Client() 

34 self.bucket_name = bucket_name 

35 self.bucket = self.storage_client.bucket(self.bucket_name) 

36 

37 def upload_cloud_blob(self, 

38 source_file_path: str, 

39 destination_blob_name: str) -> None: 

40 """ 

41 Upload local file to Google Cloud Bucket. 

42 

43 :param source_file_path: 

44 Path of file to be uploaded to Google Cloud Bucket. 

45 :param destination_blob_name: 

46 Name of Google Cloud Bucket blob to be saved. 

47 """ 

48 

49 blob = self.bucket.blob(destination_blob_name) 

50 blob.upload_from_filename(source_file_path) 

51 

52 def generate_cloud_signed_url(self, blob_name: str) -> str: 

53 """Generates a v4 signed URL for downloading a blob. 

54 

55 Note that this method requires a service account key file. You can not use 

56 this if you are using Application Default Credentials from Google Compute 

57 Engine or from the Google Cloud SDK. 

58 

59 :param blob_name: 

60 Name of Google Cloud Bucket blob to obtain URL for. 

61 :returns: URL of blob 

62 """ 

63 

64 blob = self.bucket.blob(blob_name) 

65 

66 url = blob.generate_signed_url( 

67 version="v4", 

68 expiration=datetime.timedelta(minutes=30), 

69 method="GET", 

70 ) 

71 

72 return url 

73 

74 def list_blobs(self, prefix: str) -> t.Iterator[storage.Blob]: 

75 """ 

76 Get Iterator of blobs filtered by prefix. 

77 

78 :param prefix: 

79 Name of Google Cloud Bucket prefix used to filter blobs 

80 :returns: Iterator of matching Blob objects 

81 """ 

82 

83 # FIXME: use page_token 

84 # page_token = None 

85 blobs = self.storage_client.list_blobs(self.bucket_name, prefix=prefix) 

86 return blobs 

87 

88 def list_image_blob_names(self, prefix: str) -> t.List[str]: 

89 """ 

90 Get list of image blob names filtered by prefix. 

91 

92 :param prefix: 

93 Name of Google Cloud Bucket prefix used to filter blobs 

94 :returns: List of matching Blob names 

95 """ 

96 

97 blobs = self.list_blobs(prefix) 

98 names = [] 

99 for blob in blobs: 

100 if "image" in blob.content_type: 

101 names.append(blob.name) 

102 return names 

103 

104 

105def upload(file_source_dir: str, bucket_prefix: str, cloud_object: GCloud) -> None: 

106 """ 

107 Upload files in source directory to Google Cloud Bucket. 

108 

109 :param file_source_dir: 

110 Path to directory containing source files to upload. 

111 :param bucket_prefix: 

112 Name of Google Cloud Bucket prefix used determine storage location. 

113 :param cloud_object: 

114 Instance of artemis_sg.GCloud to handle API interactions. 

115 """ 

116 

117 namespace = f"{MODULE}.{upload.__name__}" 

118 blob_names = cloud_object.list_image_blob_names(bucket_prefix) 

119 for filename in os.listdir(file_source_dir): 

120 filepath = os.path.join(file_source_dir, filename) 

121 if os.path.isfile(filepath): 

122 file_blob_name = f"{bucket_prefix}/{filename}" 

123 # verify the file is an image, otherwise delete it 

124 try: 

125 kind = puremagic.from_file(filepath) 

126 except puremagic.main.PureError: 

127 kind = None 

128 if kind not in [".jpg", ".png"]: 

129 logging.error( 

130 f"{namespace}: Err reading '{filename}', deleting '{filepath}'" 

131 ) 

132 os.remove(filepath) 

133 continue 

134 # don't upload existing blobs unless the file is new 

135 file_age = time.time() - os.path.getmtime(filepath) 

136 if (file_blob_name in blob_names 

137 and file_age > CFG["google"]["cloud"]["new_threshold_secs"]): 

138 logging.info( 

139 f"{namespace}: File '{filename}' found in Google Cloud " 

140 f"bucket, not uploading." 

141 ) 

142 continue 

143 else: 

144 logging.info( 

145 f"{namespace}: Uploading '{file_blob_name}' to Google Cloud bucket." 

146 ) 

147 cloud_object.upload_cloud_blob(filepath, file_blob_name) 

148 

149 

150def main() -> None: 

151 """ 

152 Wrapper for uploading files to Google Cloud Bucket. 

153 """ 

154 

155 file_source_dir = CFG["asg"]["data"]["dir"]["upload_source"] 

156 bucket_name = CFG["google"]["cloud"]["bucket"] 

157 bucket_prefix = CFG["google"]["cloud"]["bucket_prefix"] 

158 cloud_key_file = CFG["google"]["cloud"]["key_file"] 

159 

160 cloud_object = GCloud(cloud_key_file=cloud_key_file, bucket_name=bucket_name) 

161 upload(file_source_dir, bucket_prefix, cloud_object) 

162 

163 

164if __name__ == "__main__": 

165 main()