QuadratiK Usage Examples#
[1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
import pandas as pd
Normality Test#
This section contains example for the Parametric and Non-parametric Normality Test based on kernel-based quadratic distances
Parametric#
[2]:
from QuadratiK.kernel_test import KernelTest
data = np.random.randn(100, 2)
normality_test = KernelTest(h=0.4, centering_type="param", random_state=42).test(data)
print("Test : {}".format(normality_test.test_type_))
print("Execution time: {:.3f}".format(normality_test.execution_time))
print("Un H0 is Rejected : {}".format(normality_test.un_h0_rejected_))
print("Un Test Statistic : {}".format(normality_test.un_test_statistic_))
print("Un Critical Value (CV) : {}".format(normality_test.vn_cv_))
print("Vn H0 is Rejected : {}".format(normality_test.vn_h0_rejected_))
print("Vn Test Statistic : {}".format(normality_test.vn_test_statistic_))
print("Vn Critical Value (CV) : {}".format(normality_test.vn_cv_))
print("Selected tuning parameter : {}".format(normality_test.h))
Test : Kernel-based quadratic distance Normality test
Execution time: 2.427
Un H0 is Rejected : False
Un Test Statistic : -116.93474728173035
Un Critical Value (CV) : 8.808524062653914
Vn H0 is Rejected : False
Vn Test Statistic : 0.6315175347522548
Vn Critical Value (CV) : 8.808524062653914
Selected tuning parameter : 0.4
[3]:
print(normality_test.summary())
Time taken for execution: 2.427 seconds
Test Results
----------------- ----------------------------------------------
Test Type Kernel-based quadratic distance Normality test
Un Test Statistic -116.93474728173035
Un Critical Value 185.334479118873
Un Reject H0 False
Vn Test Statistic 0.6315175347522548
Vn Critical Value 8.808524062653914
Vn Reject H0 False
Var Un 5.992919179649088e-06
----------------- ----------------------------------------------
Summary Statistics
Feature 0 Feature 1
------- ----------- -----------
Mean -0.1156 0.034
Std Dev 0.8563 0.9989
Median -0.0353 0.1323
IQR 1.0704 1.3333
Min -2.6197 -1.9876
Max 1.8862 2.7202
QQ Plot#
[4]:
from QuadratiK.tools import qq_plot
qq_plot(data)
[4]:

Two Sample Test#
This sections shows example for the two-sample test using normal kernel-based quadratic distance
[5]:
from QuadratiK.kernel_test import KernelTest
X = np.random.randn(100, 2)
Y = np.random.randn(100, 2)
two_sample_test = KernelTest(h=0.4, random_state=42).test(X, Y)
print("Test : {}".format(two_sample_test.test_type_))
print("Execution time: {:.3f}".format(two_sample_test.execution_time))
print("H0 is Rejected : {}".format(two_sample_test.un_h0_rejected_))
print("Test Statistic : {}".format(two_sample_test.un_test_statistic_))
print("Critical Value (CV) : {}".format(two_sample_test.un_cv_))
print("CV Method : {}".format(two_sample_test.cv_method_))
print("Selected tuning parameter : {}".format(two_sample_test.h))
Test : Kernel-based quadratic distance two-sample test
Execution time: 0.181
H0 is Rejected : [False False]
Test Statistic : [0.3639171 0.40914903]
Critical Value (CV) : [1.16208903 1.30725714]
CV Method : subsampling
Selected tuning parameter : 0.4
[6]:
print(two_sample_test.summary())
Time taken for execution: 0.181 seconds
Test Results
----------------- -----------------------------------------------
Test Type Kernel-based quadratic distance two-sample test
Un Test Statistic [0.3639171 0.40914903]
Un Critical Value [1.16208903 1.30725714]
Reject H0 [False False]
Var Un [6.42729627e-05 1.28399708e-05]
----------------- -----------------------------------------------
Summary Statistics
Group 1 Group 2 Overall
------------------------ --------- --------- ---------
('Feature 0', 'Mean') 0.1282 -0.045 0.0416
('Feature 0', 'Std Dev') 1.0396 1.025 1.0334
('Feature 0', 'Median') 0.1056 0.0118 0.0737
('Feature 0', 'IQR') 1.4912 1.2409 1.345
('Feature 0', 'Min') -3.2413 -2.4716 -3.2413
('Feature 0', 'Max') 2.3147 3.0789 3.0789
('Feature 1', 'Mean') 0.0435 -0.1263 -0.0414
('Feature 1', 'Std Dev') 0.9348 0.9656 0.9517
('Feature 1', 'Median') 0.0114 -0.1967 -0.1224
('Feature 1', 'IQR') 1.2379 1.4056 1.3208
('Feature 1', 'Min') -1.9521 -2.3019 -2.3019
('Feature 1', 'Max') 3.8527 2.2707 3.8527
K-Sample Test#
Shows examples for the kernel-based quadratic distance k-sample tests with the Normal kernel and bandwidth parameter h.
[7]:
from QuadratiK.kernel_test import KernelTest
X = np.random.randn(500, 4)
y = np.repeat(np.array([1, 2, 3, 4, 5]), 100)
k_sample_test = KernelTest(h=1.5, method="subsampling").test(X, y)
print("Test : {}".format(k_sample_test.test_type_))
print("Execution time: {:.3f} seconds".format(k_sample_test.execution_time))
print("H0 is Rejected : {}".format(k_sample_test.un_h0_rejected_))
print("Test Statistic : {}".format(k_sample_test.un_test_statistic_))
print("Critical Value (CV) : {}".format(k_sample_test.un_cv_))
print("CV Method : {}".format(k_sample_test.cv_method_))
print("Selected tuning parameter : {}".format(k_sample_test.h))
Test : Kernel-based quadratic distance K-sample test
Execution time: 4.209 seconds
H0 is Rejected : [False False]
Test Statistic : [-1.19987341 -1.22848521]
Critical Value (CV) : [1.80290935 1.84631206]
CV Method : subsampling
Selected tuning parameter : 1.5
[8]:
print(k_sample_test.summary())
Time taken for execution: 4.209 seconds
Test Results
----------------- ---------------------------------------------
Test Type Kernel-based quadratic distance K-sample test
Un Test Statistic [-1.19987341 -1.22848521]
Un Critical Value [1.80290935 1.84631206]
Reject H0 [False False]
Var Un [2.06157521e-08 7.89821748e-10]
----------------- ---------------------------------------------
Summary Statistics
Group 1 Group 2 Group 3 Group 4 Group 5 Overall
------------------------ --------- --------- --------- --------- --------- ---------
('Feature 0', 'Mean') -0.016 0.1352 0.0391 -0.0453 0.001 0.0228
('Feature 0', 'Std Dev') 0.9033 1.0094 0.8065 1.0001 1.0049 0.9465
('Feature 0', 'Median') -0.0216 0.0706 0.0854 -0.0767 0.0043 0.0217
('Feature 0', 'IQR') 1.1691 1.5099 1.0209 1.1796 1.35 1.2662
('Feature 0', 'Min') -2.651 -2.4994 -2.1727 -3.0195 -2.6357 -3.0195
('Feature 0', 'Max') 2.5269 2.4034 2.0615 2.4126 2.4267 2.5269
('Feature 1', 'Mean') 0.0767 0.0006 0.2154 0.1073 0.1408 0.1081
('Feature 1', 'Std Dev') 1.0609 1.1006 0.9916 0.9058 0.9807 1.0086
('Feature 1', 'Median') 0.1344 0.0281 0.1926 0.1695 0.095 0.1397
('Feature 1', 'IQR') 1.6374 1.2897 1.5382 1.0391 1.4977 1.4323
('Feature 1', 'Min') -2.4239 -2.8963 -1.9664 -2.0039 -2.2193 -2.8963
('Feature 1', 'Max') 2.1633 2.5797 2.6017 3.1377 3.1099 3.1377
('Feature 2', 'Mean') 0.0664 0.109 0.0648 -0.1372 0.0261 0.0258
('Feature 2', 'Std Dev') 0.9982 0.8847 1.0373 1.0405 1.0345 1.0005
('Feature 2', 'Median') 0.001 0.0707 0.0553 -0.1513 -0.0034 0.0041
('Feature 2', 'IQR') 1.1909 1.1662 1.2926 1.4705 1.3745 1.3092
('Feature 2', 'Min') -2.6969 -2.5539 -2.7032 -2.5623 -2.6042 -2.7032
('Feature 2', 'Max') 2.5734 2.1573 2.1709 2.6443 2.9491 2.9491
('Feature 3', 'Mean') 0.1473 0.0332 0.0736 -0.019 -0.0514 0.0367
('Feature 3', 'Std Dev') 0.9948 1.0394 1.1152 0.9658 0.9239 1.0083
('Feature 3', 'Median') 0.1617 0.1228 0.009 -0.0896 -0.047 -0.0062
('Feature 3', 'IQR') 1.2778 1.3332 1.4463 1.1245 1.2915 1.3144
('Feature 3', 'Min') -2.0734 -2.591 -2.9214 -2.9404 -2.2469 -2.9404
('Feature 3', 'Max') 2.6324 2.493 3.1931 2.7597 2.3639 3.1931
Poisson Kernel Test#
Shows example for perforing the the kernel-based quadratic distance Goodness-of-fit tests for Uniformity for spherical data using the Poisson kernel with concentration parameter rho.
[9]:
from QuadratiK.tools import sample_hypersphere
from QuadratiK.poisson_kernel_test import PoissonKernelTest
X = sample_hypersphere(100, 3, random_state=42)
unif_test = PoissonKernelTest(rho=0.7, random_state=42).test(X)
print("Execution time: {:.3f} seconds".format(unif_test.execution_time))
print("U Statistic Results")
print("H0 is rejected : {}".format(unif_test.u_statistic_h0_))
print("Un Statistic : {}".format(unif_test.u_statistic_un_))
print("Critical Value : {}".format(unif_test.u_statistic_cv_))
print("V Statistic Results")
print("H0 is rejected : {}".format(unif_test.v_statistic_h0_))
print("Vn Statistic : {}".format(unif_test.v_statistic_vn_))
print("Critical Value : {}".format(unif_test.v_statistic_cv_))
Execution time: 0.058 seconds
U Statistic Results
H0 is rejected : False
Un Statistic : 1.6156682048968174
Critical Value : 1.9917172670030654
V Statistic Results
H0 is rejected : False
Vn Statistic : 22.83255917641962
Critical Value : 23.229486935225513
[10]:
print(unif_test.summary())
Time taken for execution: 0.058 seconds
Test Results
-------------------------- ------------------
Test Type Poisson Kernel-based quadratic
distance test of Uniformity on the Sphere
U Statistic Un 1.6156682048968174
U Statistic Critical Value 1.9917172670030654
U Statistic Reject H0 False
V Statistic Vn 22.83255917641962
V Statistic Critical Value 23.229486935225513
V Statistic Reject H0 False
-------------------------- ------------------
Summary Statistics
Feature 0 Feature 1 Feature 2
------- ----------- ----------- -----------
Mean 0.0451 -0.1206 0.0309
Std Dev 0.509 0.5988 0.6122
Median 0.132 -0.1596 0.0879
IQR 0.8051 1.0063 1.1473
Min -0.9548 -0.9929 -0.9904
Max 0.9772 0.9738 0.9996
QQ Plot#
[11]:
from QuadratiK.tools import qq_plot
qq_plot(X, dist="uniform")
[11]:

Poisson Kernel based Clustering#
Shows example for performing the Poisson kernel-based clustering algorithm on the Sphere based on the Poisson kernel-based densities.
[12]:
from QuadratiK.datasets import load_wireless_data
from QuadratiK.spherical_clustering import PKBC
from sklearn.preprocessing import LabelEncoder
X, y = load_wireless_data(return_X_y=True)
le = LabelEncoder()
le.fit(y)
y = le.transform(y)
cluster_fit = PKBC(num_clust=4, random_state=42).fit(X)
ari, macro_precision, macro_recall, avg_silhouette_Score = cluster_fit.validation(y)
print("Estimated mixing proportions :", cluster_fit.alpha_)
print("Estimated concentration parameters: ", cluster_fit.rho_)
print("Adjusted Rand Index:", ari)
print("Macro Precision:", macro_precision)
print("Macro Recall:", macro_recall)
print("Average Silhouette Score:", avg_silhouette_Score)
Estimated mixing proportions : [0.23590339 0.24977919 0.25777522 0.25654219]
Estimated concentration parameters: [0.97773265 0.98348976 0.98226901 0.98572597]
Adjusted Rand Index: 0.9403086353805835
Macro Precision: 0.9771870612442508
Macro Recall: 0.9769999999999999
Average Silhouette Score: 0.3803089203572107
Elbow Plot using Euclidean Distance and Cosine Similarity based WCSS#
[13]:
wcss_euc = []
wcss_cos = []
for i in range(2, 10):
clus_fit = PKBC(num_clust=i).fit(X)
wcss_euc.append(clus_fit.euclidean_wcss_)
wcss_cos.append(clus_fit.cosine_wcss_)
fig = plt.figure(figsize=(6, 4))
plt.plot(list(range(2, 10)), wcss_euc, "--o")
plt.xlabel("Number of Cluster")
plt.ylabel("Within Cluster Sum of Squares (WCSS)")
plt.title("Elbow Plot for Wireless Indoor Localization dataset")
plt.show()
fig = plt.figure(figsize=(6, 4))
plt.plot(list(range(2, 10)), wcss_cos, "--o")
plt.xlabel("Number of Cluster")
plt.ylabel("Within Cluster Sum of Squares (WCSS)")
plt.title("Elbow Plot for Wireless Indoor Localization dataset")
plt.show()


Density Estimation and Sample Generation from PKBD#
[14]:
from QuadratiK.spherical_clustering import PKBD
pkbd_data = PKBD().rpkb(10, [0.5, 0], 0.5, "rejvmf", random_state=42)
dens_val = PKBD().dpkb(pkbd_data, [0.5, 0.5], 0.5)
print(dens_val)
[0.46827108 0.05479605 0.21163936 0.06195099 0.39567698 0.40473724
0.26561508 0.36791766 0.09324676 0.46847274]
Tuning Parameter \(h\) selection#
Computes the kernel bandwidth of the Gaussian kernel for the two-sample and ksample kernel-based quadratic distance (KBQD) tests.
[15]:
from QuadratiK.kernel_test import select_h
X = np.random.randn(200, 2)
y = np.random.randint(0, 2, 200)
h_selected, all_values, power_plot = select_h(
X, y, alternative="location", power_plot=True, random_state=None
)
print("Selected h is: ", h_selected)
Selected h is: 0.8
[16]:
# shows the detailed power vs h table
all_values
[16]:
h | delta | power | |
---|---|---|---|
0 | 0.4 | 0.2 | 0.08 |
1 | 0.8 | 0.2 | 0.22 |
2 | 1.2 | 0.2 | 0.26 |
3 | 1.6 | 0.2 | 0.22 |
4 | 2.0 | 0.2 | 0.28 |
5 | 2.4 | 0.2 | 0.38 |
6 | 2.8 | 0.2 | 0.40 |
7 | 3.2 | 0.2 | 0.38 |
0 | 0.4 | 0.3 | 0.26 |
1 | 0.8 | 0.3 | 0.56 |
2 | 1.2 | 0.3 | 0.56 |
3 | 1.6 | 0.3 | 0.62 |
4 | 2.0 | 0.3 | 0.64 |
5 | 2.4 | 0.3 | 0.70 |
6 | 2.8 | 0.3 | 0.78 |
7 | 3.2 | 0.3 | 0.66 |
[17]:
# shows the power plot
power_plot
[17]:
