Coverage for src/driada/information/entropy.py: 100.00%
42 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-25 15:40 +0300
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-25 15:40 +0300
1"""
2Entropy calculation functions for discrete, continuous, and mixed variable types.
4This module provides various entropy calculation methods including:
5- Discrete entropy
6- Joint entropy for discrete and mixed variables
7- Conditional entropy for different variable type combinations
8"""
10import numpy as np
11import scipy.stats
12from .gcmi import ent_g
15def entropy_d(x):
16 """Calculate entropy for a discrete variable.
18 Parameters
19 ----------
20 x : array-like
21 Discrete variable values.
23 Returns
24 -------
25 float
26 Entropy in bits.
27 """
28 unique_x, counts_x = np.unique(x, return_counts=True)
29 p_x = counts_x / len(x)
30 H_x = probs_to_entropy(p_x)
31 return H_x
34def probs_to_entropy(p):
35 """Calculate entropy for a discrete probability distribution.
37 Parameters
38 ----------
39 p : array-like
40 Probability distribution (must sum to 1).
42 Returns
43 -------
44 float
45 Entropy in bits.
46 """
47 return -np.sum(p * np.log2(p + 1e-10)) # Add small value to avoid log(0)
50def joint_entropy_dd(x, y):
51 """Calculate joint entropy for two discrete variables.
53 Parameters
54 ----------
55 x : array-like
56 First discrete variable.
57 y : array-like
58 Second discrete variable.
60 Returns
61 -------
62 float
63 Joint entropy H(X,Y) in bits.
64 """
65 joint_prob = np.histogram2d(x, y, bins=[np.unique(x).size, np.unique(y).size], density=True)[0]
66 joint_prob /= np.sum(joint_prob) # Normalize
67 return probs_to_entropy(joint_prob.flatten())
70def conditional_entropy_cdd(z, x, y, k=5):
71 """Calculate conditional differential entropy for a continuous variable given two discrete variables.
73 Computes H(Z|X,Y) where Z is continuous and X,Y are discrete.
75 Parameters
76 ----------
77 z : array-like
78 Continuous variable.
79 x : array-like
80 First discrete variable.
81 y : array-like
82 Second discrete variable.
83 k : int, optional
84 Number of neighbors for entropy estimation (used as threshold). Default: 5.
86 Returns
87 -------
88 float
89 Conditional entropy H(Z|X,Y).
90 """
91 unique_x = np.unique(x)
92 unique_y = np.unique(y)
94 h_conditional = 0
95 for ux in unique_x:
96 for uy in unique_y:
97 # Filter z based on x and y
98 filtered_z = z[(x == ux) & (y == uy)]
99 if len(filtered_z) > k:
100 # if n points is less than number of neighbors, result will be meaningless
101 h_conditional += ent_g(filtered_z.reshape(1, -1)) * (len(filtered_z) / len(z))
103 return h_conditional
106def conditional_entropy_cd(z, x, k=5):
107 """Calculate conditional differential entropy for a continuous variable given a discrete variable.
109 Computes H(Z|X) where Z is continuous and X is discrete.
111 Parameters
112 ----------
113 z : array-like
114 Continuous variable.
115 x : array-like
116 Discrete variable.
117 k : int, optional
118 Number of neighbors for entropy estimation (used as threshold). Default: 5.
120 Returns
121 -------
122 float
123 Conditional entropy H(Z|X).
124 """
125 unique_x = np.unique(x)
126 h_conditional = 0
128 for ux in unique_x:
129 # Filter z based on x
130 filtered_z = z[x == ux]
131 if len(filtered_z) > k:
132 # if n points is less than number of neighbors, result will be meaningless
133 h_conditional += ent_g(filtered_z.reshape(1, -1)) * (len(filtered_z) / len(z))
135 return h_conditional
138def joint_entropy_cdd(x, y, z, k=5):
139 """Calculate joint entropy for two discrete and one continuous variable.
141 Computes H(X,Y,Z) where X,Y are discrete and Z is continuous.
142 Uses the chain rule: H(X,Y,Z) = H(X,Y) + H(Z|X,Y)
144 Parameters
145 ----------
146 x : array-like
147 First discrete variable.
148 y : array-like
149 Second discrete variable.
150 z : array-like
151 Continuous variable.
152 k : int, optional
153 Number of neighbors for entropy estimation. Default: 5.
155 Returns
156 -------
157 float
158 Joint entropy H(X,Y,Z).
159 """
160 H_xy = joint_entropy_dd(x, y)
161 H_z_given_xy = conditional_entropy_cdd(z, x, y, k=k)
162 H_xyz = H_xy + H_z_given_xy
163 return H_xyz
166def joint_entropy_cd(x, z, k=5):
167 """Calculate joint entropy for one discrete and one continuous variable.
169 Computes H(X,Z) where X is discrete and Z is continuous.
170 Uses the chain rule: H(X,Z) = H(X) + H(Z|X)
172 Parameters
173 ----------
174 x : array-like
175 Discrete variable.
176 z : array-like
177 Continuous variable.
178 k : int, optional
179 Number of neighbors for entropy estimation. Default: 5.
181 Returns
182 -------
183 float
184 Joint entropy H(X,Z).
185 """
186 H_x = entropy_d(x)
187 H_z_given_x = conditional_entropy_cd(z, x, k=k)
188 H_xz = H_x + H_z_given_x
189 return H_xz