Coverage for src/edwh_restic_plugin/repositories/__init__.py: 51%
199 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-10 20:54 +0100
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-10 20:54 +0100
1import abc
2import datetime
3import heapq
4import importlib
5import importlib.util
6import io
7import os
8import re
9import sys
10import typing
11from collections import OrderedDict, defaultdict
12from pathlib import Path
14import invoke
15from invoke import Context
16from print_color import print # fixme: replace with termcolor
17from tqdm import tqdm
18from typing_extensions import NotRequired
20from ..env import DOTENV, check_env, read_dotenv
21from ..helpers import camel_to_snake, fix_tags
23# the path where the restic command is going to be executed
24DEFAULT_BACKUP_FOLDER = Path("captain-hooks")
27class Repository(abc.ABC):
28 # _targets: a list of file and directory paths that should be included in the backup.
29 _targets = [".env", "./backup"]
30 # _excluded: a list of file and directory paths that should be excluded from the backup.
31 _excluded = [
32 ".git",
33 ".idea",
34 "backups",
35 "*.pyc",
36 "*.bak",
37 "../",
38 "./..",
39 "errors",
40 "sessions",
41 "__pycache__",
42 ]
44 _env_path: Path
45 env_config: dict[str, str]
47 def __init__(self, env_path: Path = DOTENV) -> None:
48 super().__init__()
49 print("repo init", self)
50 self._env_path = env_path
51 self.env_config = env = read_dotenv(env_path)
52 os.environ |= env
53 self._restichostname = env.get("RESTICHOSTNAME") # or None if it is not there
55 @property
56 def uri(self):
57 """Return the prefix required for restic to indicate the protocol, for example sftp:hostname:"""
58 raise NotImplementedError("Prefix unknown in base class")
60 @abc.abstractmethod
61 def setup(self):
62 """Ensure that the settings are in the .env file"""
63 raise NotImplementedError("Setup undefined")
65 @abc.abstractmethod
66 def prepare_for_restic(self, c):
67 """No environment variables need to be defined for local"""
68 raise NotImplementedError("Prepare for restic undefined")
69 # prepare_for_restic implementations should probably start with:
70 # env = read_dotenv(DOTENV)
71 # os.environ.update(env)
73 def check_env(
74 self,
75 key: str,
76 default: str | None,
77 comment: str,
78 prefix: str = None,
79 suffix: str = None,
80 postfix: str = None,
81 path: Path = None,
82 ):
83 return check_env(
84 key=key,
85 default=default,
86 comment=comment,
87 prefix=prefix,
88 suffix=suffix,
89 postfix=postfix,
90 path=path or self._env_path,
91 )
93 def configure(self, c):
94 """Configure the backup environment variables."""
95 self.prepare_for_restic(c)
96 print("configure")
97 # First, make sure restic is up-to-date
98 c.run("sudo restic self-update", hide=True, warn=True)
99 # This is the command used to configure the environment variables properly.
100 c.run(f"restic init --repository-version 2 -r {self.uri}")
102 @property
103 def hostarg(self):
104 """Return the host argument for restic command."""
105 return f" --host {self._restichostname} " if self._restichostname else ""
107 @property
108 def targets(self):
109 """Return the target files and directories for the backup."""
110 return " ".join(self._targets)
112 @property
113 def excluded(self):
114 """Return the excluded files and directories for the backup.
115 Here comes the files that are going to be excluded"""
116 return " --exclude ".join(self._excluded)
118 @staticmethod
119 def get_snapshot_from(stdout: str) -> str:
120 """
121 Parses the stdout from a Restic command to extract the snapshot ID.
123 Args:
124 - stdout (str): The stdout output from a Restic command.
126 Returns:
127 - The snapshot ID as a string.
128 """
129 snapshots_ids = re.findall(r"snapshot (.*?) saved", stdout)
130 return snapshots_ids[-1] if snapshots_ids else None
132 @staticmethod
133 def get_scripts(target, verb):
134 """Retrieves the scripts that contain a restic command and returns them to 'execute_files' to execute them.
136 Args:
137 - target (str): target is a string that specifies the target of the backup, can be a file, stream, directory,
138 or any other object that needs to be backed up.
139 - verb (str): is also a string that specifies the action to be performed on the target.
140 For example, the verb could be "backup" or "restore". The verb is used in combination with the target to
141 search for the backup script files that contain the restic command.
142 """
143 # get files by verb and target. EXAMPLE backup_files_*.sh
144 files = [str(file) for file in DEFAULT_BACKUP_FOLDER.glob(f"{verb}_{target}*")]
145 # check if no files are found
146 if not files:
147 print("no files found with target:", target)
148 sys.exit(255)
150 return files
152 def execute_files(
153 self,
154 c: Context,
155 target: str,
156 verb: str,
157 verbose: bool,
158 message: str = None,
159 snapshot: str = "latest",
160 ):
161 """
162 Executes the backup scripts retrieved by 'get_scripts' function.
164 Args:
165 - verbose (bool): A flag indicating whether to display verbose output.
166 - target (str): The target of the backup.
167 - verb (str): The verb associated with the backup.
168 - message (str, optional): The message to be associated with the backup.
169 If not provided, the current local time is used. Defaults to None.
170 - snapshot (str, optional): The snapshot to be used for the backup. Defaults to "latest".
171 """
172 self.prepare_for_restic(c)
174 # set snapshot available in environment for sh files
175 os.environ["SNAPSHOT"] = snapshot
177 # Here you can make a message that you will see in the snapshots list
178 if message is None:
179 # If no message is provided, use the current local time as the backup message
180 message = f"{datetime.datetime.now()} localtime"
182 # set MSG in environment for sh files
183 os.environ["MSG"] = message
185 # get files by target and verb. see self.get_scripts for more info
186 files = self.get_scripts(target, verb)
188 snapshots_created = []
189 file_codes = []
190 # run all backup/restore files
191 for file in tqdm(files):
192 if verbose:
193 print("\033[1m running", file, "\033[0m")
195 # run the script by default with pty=True,
196 # when the script crashes run the script again but then grab the stdout
198 try:
199 print(f"{file} output:")
200 ran_script: invoke.runners.Result = c.run(file, hide=True, pty=True)
201 file_codes.append(0)
202 except invoke.exceptions.UnexpectedExit as e:
203 ran_script = e.result
204 file_codes.append(e.result.exited)
206 if verbose:
207 print(f"{file} output:")
208 if ran_script.stdout:
209 print(f"stdout:{ran_script.stdout}")
210 else:
211 print("no output found!")
213 snapshot = self.get_snapshot_from(ran_script.stdout)
214 snapshots_created.append(snapshot)
216 # send message with backup. see message for more info
217 # also if a tag in tags is None it will be removed by fix_tags
218 if verb != "restore":
219 tags = fix_tags(["message", *snapshots_created])
220 c.run(
221 f"restic {self.hostarg} -r {self.uri} backup --tag {','.join(tags)} --stdin --stdin-filename message",
222 in_stream=io.StringIO(message),
223 hide=True,
224 )
226 print("\n\nfile status codes:")
228 for idx in range(len(file_codes)):
229 if file_codes[idx] == 0:
230 print(files[idx], tag="success", tag_color="green")
231 else:
232 print("in", files[idx], tag="failure", tag_color="red")
234 if worst_status_code := max(file_codes) > 0:
235 exit(worst_status_code)
237 def backup(self, c, verbose: bool, target: str, message: str):
238 """
239 Backs up the specified target.
241 Args:
242 - verbose (bool): A flag indicating whether to display verbose output.
243 - target (str): The target of the backup (e.g. 'files', 'stream'; default is all types).
244 - verb (str): The verb associated with the backup.
245 - message (str): The message to be associated with the backup.
246 """
247 self.execute_files(c, target, "backup", verbose, message)
249 def restore(self, c, verbose: bool, target: str, snapshot: str = "latest"):
250 """
251 Restores the specified target using the specified snapshot or the latest if None is given.
253 Args:
254 - verbose (bool): A flag indicating whether to display verbose output.
255 - target (str): The target of the restore.
256 - verb (str): The verb associated with the restore.
257 - snapshot (str, optional): The snapshot to be used for the restore. Defaults to "latest".
258 """
259 self.execute_files(c, target, "restore", verbose, snapshot=snapshot)
261 def check(self, c):
262 """
263 Checks the integrity of the backup repository.
264 """
265 self.prepare_for_restic(c)
266 c.run(f"restic {self.hostarg} -r {self.uri} check --read-data")
268 def snapshot(self, c: Context, tags: list[str] = None, n: int = 2, verbose: bool = False):
269 """
270 a list of all the backups with a message
272 Args:
273 - tags (list, optional): A list of tags to use for the snapshot. Defaults to None.
274 - n (int, optional): The number of latest snapshots to show. Defaults to 2.
275 - verbose (bool): Show more info about what's happening?
277 Returns:
278 None. This function only prints the output to the console.
279 """
280 # choose to see only the files or the stream snapshots
281 if tags is None:
282 tags = ["files", "stream"]
284 self.prepare_for_restic(c)
285 tags = "--tag " + " --tag ".join(tags) if tags else ""
286 command = f"restic {self.hostarg} -r {self.uri} snapshots --latest {n} {tags} -c"
287 if verbose:
288 print("$", command, file=sys.stderr)
290 stdout = c.run(
291 command,
292 hide=True,
293 ).stdout
295 if verbose:
296 print(stdout, file=sys.stderr)
298 snapshot_lines = re.findall(r"^([0-9a-z]{8})\s", stdout, re.MULTILINE)
299 main_tag_per_snapshot = {
300 snapshot: re.findall(rf"^{snapshot}.*?(\w*)$", stdout, re.MULTILINE)
301 for snapshot in snapshot_lines
302 # snapshot: re.findall(rf"^{snapshot}", stdout) for snapshot in snapshots
303 }
305 message_snapshot_per_snapshot = defaultdict(list) # key is source, value is snapshot containing the message
306 for snapshot, possible_tag_names in main_tag_per_snapshot.items():
307 tag_name = possible_tag_names[0]
308 if tag_name != "message":
309 continue
310 for _, is_message_for_snapshot_id in re.findall(rf"\n{snapshot}.*(\n\s+(.*)\n)+", stdout):
311 message_snapshot_per_snapshot[is_message_for_snapshot_id].append(snapshot)
313 for snapshot, message_snapshots in message_snapshot_per_snapshot.items():
314 # print all Restic messages
315 command = f"restic {self.hostarg} -r {self.uri} dump {message_snapshots[0]} --tag message message"
316 if verbose:
317 print("$", command, file=sys.stderr)
319 restore_output = c.run(
320 command,
321 hide=True,
322 warn=True,
323 ).stdout
325 if verbose:
326 print(restore_output, file=sys.stderr)
328 message = restore_output.strip()
329 stdout = re.sub(rf"\n{snapshot}(.*)\n", rf"\n{snapshot}\1 : [{message}]\n", stdout)
331 if verbose:
332 print("---\n", file=sys.stderr)
334 print(stdout)
336 # noop gt, lt etc methods
338 def __gt__(self, other):
339 return False
341 def __lt__(self, other):
342 return False
345class RepositoryRegistration(typing.TypedDict):
346 short_name: str | None
347 aliases: NotRequired[tuple[str, ...]]
348 priority: NotRequired[int]
351class RepositoryRegistrations:
352 def __init__(self):
353 # _queue is for internal use by heapq only!
354 # external api should use .queue !!!
355 self._queue: list[tuple[int, typing.Type[Repository], RepositoryRegistration]] = []
356 # aliases stores a reference for each name to the Repo class
357 self._aliases: dict[str, typing.Type[Repository]] = {}
359 def push(self, repo: typing.Type[Repository], settings: RepositoryRegistration):
360 priority = settings.get("priority", -1)
361 if priority < 0:
362 priority = sys.maxsize - priority # very high int
364 heapq.heappush(self._queue, (priority, repo, settings))
365 self._aliases[settings["short_name"]] = repo
366 for alias in settings.get('aliases'):
367 self._aliases[alias] = repo
369 @property
370 def queue(self):
371 if not self._queue:
372 self._find_items()
374 return self._queue
376 def clear(self):
377 self._queue = []
378 self._aliases = {}
380 def to_sorted_list(self):
381 # No need for sorting here; heapq maintains the heap property
382 return list(self)
384 def to_ordered_dict(self) -> OrderedDict[str, typing.Type[Repository]]:
385 ordered_dict = OrderedDict()
386 for _, item, settings in self.queue:
387 ordered_dict[settings["short_name"]] = item
388 return ordered_dict
390 def __iter__(self) -> typing.Generator[typing.Type[Repository], None, None]:
391 return (item[1] for item in self.queue)
393 def __bool__(self):
394 return bool(self.queue)
396 def _find_items(self) -> None:
397 # import all registrations in this folder, so @register adds them to _queue
398 package_path = Path(__file__).resolve().parent
400 for file_path in package_path.glob("*.py"):
401 pkg = file_path.stem
402 if not pkg.startswith("__"):
403 importlib.import_module(f".{pkg}", package=__name__)
406def register(
407 short_name: str = None,
408 aliases: tuple[str, ...] = (),
409 priority: int = -1,
410 # **settings: Unpack[RepositoryRegistration] # <- not really supported yet!
411) -> typing.Callable[[typing.Type[Repository]], typing.Type[Repository]]:
412 if isinstance(short_name, type):
413 raise SyntaxError("Please call @register() with parentheses!")
415 def wraps(cls: typing.Type[Repository]) -> typing.Type[Repository]:
416 if not (isinstance(cls, type) and issubclass(cls, Repository)):
417 raise TypeError(f"Decorated class {cls} must be a subclass of Repository!")
419 settings: RepositoryRegistration = {
420 "short_name": short_name or camel_to_snake(cls.__name__).removesuffix("_repository"),
421 "aliases": aliases,
422 "priority": priority,
423 }
425 registrations.push(cls, settings)
426 return cls
428 return wraps
431registrations = RepositoryRegistrations()