Coverage for common/parallel.py: 50%
14 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1"""
2crate_anon/common/parallel.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**Assistance functions for "embarrassingly parallel" job assignment.**
28"""
30import logging
31from typing import Any
33from cardinal_pythonlib.hash import hash64
35log = logging.getLogger(__name__)
38def is_my_job_by_int(value: int, tasknum: int, ntasks: int) -> bool:
39 """
40 "Is it my job to do this work?"
42 Args:
43 value: some integer value that is fairly evenly distributed, to spread
44 the workload
45 tasknum: which task number am I?
46 ntasks: how many tasks are there in total?
48 Returns:
49 is it my job?
51 Algorithm:
53 - if there's only one task: yes
54 - otherwise, return ``value % ntasks == tasknum``
56 """
57 if ntasks == 1:
58 return True
59 return value % ntasks == tasknum
62def is_my_job_by_hash(value: Any, tasknum: int, ntasks: int) -> bool:
63 """
64 "Is it my job to do this work?"
66 Args:
67 value: anything that's hashable
68 tasknum: which task number am I?
69 ntasks: how many tasks are there in total?
71 Returns:
72 is it my job?
74 Algorithm:
76 - We convert some non-integer thing into a deterministic but roughly
77 randomly distributed integer using :func:`hash64`. That produces a signed
78 integer, which is OK because ``%`` works nonetheless.
80 When we use it:
82 - We use this function to parallelize for non-integer PKs.
84 - This is less efficient than dividing the work up via SQL, because we have
85 to fetch/hash something.
87 - Perform this test ASAP in loops, for speed.
88 """
89 if ntasks == 1:
90 return True
91 return hash64(value) % ntasks == tasknum
94def is_my_job_by_hash_prehashed(
95 hashed_value: int, tasknum: int, ntasks: int
96) -> bool:
97 """
98 A version of :func:`is_my_job_by_hash` for use when you have pre-hashed
99 the value, and ``ntasks`` is guaranteed to be >1.
101 Args:
102 hashed_value: integer hashed value
103 tasknum: which task number am I?
104 ntasks: how many tasks are there in total?
106 Returns:
107 is it my job?
109 """
110 return hashed_value % ntasks == tasknum