Coverage for src/driada/information/entropy.py: 100.00%

42 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-25 15:40 +0300

1""" 

2Entropy calculation functions for discrete, continuous, and mixed variable types. 

3 

4This module provides various entropy calculation methods including: 

5- Discrete entropy 

6- Joint entropy for discrete and mixed variables 

7- Conditional entropy for different variable type combinations 

8""" 

9 

10import numpy as np 

11import scipy.stats 

12from .gcmi import ent_g 

13 

14 

15def entropy_d(x): 

16 """Calculate entropy for a discrete variable. 

17  

18 Parameters 

19 ---------- 

20 x : array-like 

21 Discrete variable values. 

22  

23 Returns 

24 ------- 

25 float 

26 Entropy in bits. 

27 """ 

28 unique_x, counts_x = np.unique(x, return_counts=True) 

29 p_x = counts_x / len(x) 

30 H_x = probs_to_entropy(p_x) 

31 return H_x 

32 

33 

34def probs_to_entropy(p): 

35 """Calculate entropy for a discrete probability distribution. 

36  

37 Parameters 

38 ---------- 

39 p : array-like 

40 Probability distribution (must sum to 1). 

41  

42 Returns 

43 ------- 

44 float 

45 Entropy in bits. 

46 """ 

47 return -np.sum(p * np.log2(p + 1e-10)) # Add small value to avoid log(0) 

48 

49 

50def joint_entropy_dd(x, y): 

51 """Calculate joint entropy for two discrete variables. 

52  

53 Parameters 

54 ---------- 

55 x : array-like 

56 First discrete variable. 

57 y : array-like 

58 Second discrete variable. 

59  

60 Returns 

61 ------- 

62 float 

63 Joint entropy H(X,Y) in bits. 

64 """ 

65 joint_prob = np.histogram2d(x, y, bins=[np.unique(x).size, np.unique(y).size], density=True)[0] 

66 joint_prob /= np.sum(joint_prob) # Normalize 

67 return probs_to_entropy(joint_prob.flatten()) 

68 

69 

70def conditional_entropy_cdd(z, x, y, k=5): 

71 """Calculate conditional differential entropy for a continuous variable given two discrete variables. 

72  

73 Computes H(Z|X,Y) where Z is continuous and X,Y are discrete. 

74  

75 Parameters 

76 ---------- 

77 z : array-like 

78 Continuous variable. 

79 x : array-like 

80 First discrete variable. 

81 y : array-like 

82 Second discrete variable. 

83 k : int, optional 

84 Number of neighbors for entropy estimation (used as threshold). Default: 5. 

85  

86 Returns 

87 ------- 

88 float 

89 Conditional entropy H(Z|X,Y). 

90 """ 

91 unique_x = np.unique(x) 

92 unique_y = np.unique(y) 

93 

94 h_conditional = 0 

95 for ux in unique_x: 

96 for uy in unique_y: 

97 # Filter z based on x and y 

98 filtered_z = z[(x == ux) & (y == uy)] 

99 if len(filtered_z) > k: 

100 # if n points is less than number of neighbors, result will be meaningless 

101 h_conditional += ent_g(filtered_z.reshape(1, -1)) * (len(filtered_z) / len(z)) 

102 

103 return h_conditional 

104 

105 

106def conditional_entropy_cd(z, x, k=5): 

107 """Calculate conditional differential entropy for a continuous variable given a discrete variable. 

108  

109 Computes H(Z|X) where Z is continuous and X is discrete. 

110  

111 Parameters 

112 ---------- 

113 z : array-like 

114 Continuous variable. 

115 x : array-like 

116 Discrete variable. 

117 k : int, optional 

118 Number of neighbors for entropy estimation (used as threshold). Default: 5. 

119  

120 Returns 

121 ------- 

122 float 

123 Conditional entropy H(Z|X). 

124 """ 

125 unique_x = np.unique(x) 

126 h_conditional = 0 

127 

128 for ux in unique_x: 

129 # Filter z based on x 

130 filtered_z = z[x == ux] 

131 if len(filtered_z) > k: 

132 # if n points is less than number of neighbors, result will be meaningless 

133 h_conditional += ent_g(filtered_z.reshape(1, -1)) * (len(filtered_z) / len(z)) 

134 

135 return h_conditional 

136 

137 

138def joint_entropy_cdd(x, y, z, k=5): 

139 """Calculate joint entropy for two discrete and one continuous variable. 

140  

141 Computes H(X,Y,Z) where X,Y are discrete and Z is continuous. 

142 Uses the chain rule: H(X,Y,Z) = H(X,Y) + H(Z|X,Y) 

143  

144 Parameters 

145 ---------- 

146 x : array-like 

147 First discrete variable. 

148 y : array-like 

149 Second discrete variable. 

150 z : array-like 

151 Continuous variable. 

152 k : int, optional 

153 Number of neighbors for entropy estimation. Default: 5. 

154  

155 Returns 

156 ------- 

157 float 

158 Joint entropy H(X,Y,Z). 

159 """ 

160 H_xy = joint_entropy_dd(x, y) 

161 H_z_given_xy = conditional_entropy_cdd(z, x, y, k=k) 

162 H_xyz = H_xy + H_z_given_xy 

163 return H_xyz 

164 

165 

166def joint_entropy_cd(x, z, k=5): 

167 """Calculate joint entropy for one discrete and one continuous variable. 

168  

169 Computes H(X,Z) where X is discrete and Z is continuous. 

170 Uses the chain rule: H(X,Z) = H(X) + H(Z|X) 

171  

172 Parameters 

173 ---------- 

174 x : array-like 

175 Discrete variable. 

176 z : array-like 

177 Continuous variable. 

178 k : int, optional 

179 Number of neighbors for entropy estimation. Default: 5. 

180  

181 Returns 

182 ------- 

183 float 

184 Joint entropy H(X,Z). 

185 """ 

186 H_x = entropy_d(x) 

187 H_z_given_x = conditional_entropy_cd(z, x, k=k) 

188 H_xz = H_x + H_z_given_x 

189 return H_xz