Coverage for common/parallel.py: 50%

14 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1""" 

2crate_anon/common/parallel.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26**Assistance functions for "embarrassingly parallel" job assignment.** 

27 

28""" 

29 

30import logging 

31from typing import Any 

32 

33from cardinal_pythonlib.hash import hash64 

34 

35log = logging.getLogger(__name__) 

36 

37 

38def is_my_job_by_int(value: int, tasknum: int, ntasks: int) -> bool: 

39 """ 

40 "Is it my job to do this work?" 

41 

42 Args: 

43 value: some integer value that is fairly evenly distributed, to spread 

44 the workload 

45 tasknum: which task number am I? 

46 ntasks: how many tasks are there in total? 

47 

48 Returns: 

49 is it my job? 

50 

51 Algorithm: 

52 

53 - if there's only one task: yes 

54 - otherwise, return ``value % ntasks == tasknum`` 

55 

56 """ 

57 if ntasks == 1: 

58 return True 

59 return value % ntasks == tasknum 

60 

61 

62def is_my_job_by_hash(value: Any, tasknum: int, ntasks: int) -> bool: 

63 """ 

64 "Is it my job to do this work?" 

65 

66 Args: 

67 value: anything that's hashable 

68 tasknum: which task number am I? 

69 ntasks: how many tasks are there in total? 

70 

71 Returns: 

72 is it my job? 

73 

74 Algorithm: 

75 

76 - We convert some non-integer thing into a deterministic but roughly 

77 randomly distributed integer using :func:`hash64`. That produces a signed 

78 integer, which is OK because ``%`` works nonetheless. 

79 

80 When we use it: 

81 

82 - We use this function to parallelize for non-integer PKs. 

83 

84 - This is less efficient than dividing the work up via SQL, because we have 

85 to fetch/hash something. 

86 

87 - Perform this test ASAP in loops, for speed. 

88 """ 

89 if ntasks == 1: 

90 return True 

91 return hash64(value) % ntasks == tasknum 

92 

93 

94def is_my_job_by_hash_prehashed( 

95 hashed_value: int, tasknum: int, ntasks: int 

96) -> bool: 

97 """ 

98 A version of :func:`is_my_job_by_hash` for use when you have pre-hashed 

99 the value, and ``ntasks`` is guaranteed to be >1. 

100 

101 Args: 

102 hashed_value: integer hashed value 

103 tasknum: which task number am I? 

104 ntasks: how many tasks are there in total? 

105 

106 Returns: 

107 is it my job? 

108 

109 """ 

110 return hashed_value % ntasks == tasknum