Module grscheller_courses_distributions.gaussian
Module for the Gaussian class - derived from Udacity exercise template.
Expand source code
# Copyright 2024 Geoffrey R. Scheller
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This module contains software derived from Udacity® exercises.
# Udacity® (https://www.udacity.com/)
#
"""Module for the Gaussian class - derived from Udacity exercise template."""
from __future__ import annotations
from math import exp, pi, sqrt
import matplotlib.pyplot as plt
from .distribution import Distribution
__all__ = ['Gaussian']
class Gaussian(Distribution):
""" Class for visualizing data as Gaussian distributions.
The Gaussian, also called Normal, distribution is a continuous probability
distribution with probability density function
```
f(x) = (1/√(2πσ²))exp(-(x-μ)²/2σ²)
```
where
* μ = mu = mean value of the distribution
* σ = sigma = standard deviation of the distribution
"""
def __init__(self, mu: float=0.0, sigma: float=1.0):
if sigma <= 0:
msg = 'For a Gaussian distribution, sigma must be greater than 0'
raise ValueError(msg)
# self.mu = mu # Not using these, but conceptually
# self.sigma = sigma # similar to p and n for Binomial class.
super().__init__(mu, sigma)
def plot_histogram_data(self) -> None:
"""Produce a histogram of the data using the matplotlib pyplot library."""
fig, axis = plt.subplots()
axis.hist(self.data)
axis.set_title('Histogram of Data')
axis.set_xlabel('Data')
axis.set_ylabel('Count')
plt.show()
def pdf(self, x: float) -> float:
"""Gaussian prob density function for this Gaussian object."""
c = 1.0/sqrt(2*pi)
mu = self.mean
sigma = self.stdev
return (c/sigma)*exp(-0.5*((x - mu)/sigma)**2)
def plot_histogram_pdf(self, n_spaces: int = 100) -> tuple[list[float], list[float]]:
"""Method to plot the normalized histogram of the data and a plot of the
probability density function along the same range
Args:
n_spaces (int): number of data points to plot
Returns:
list: x values used for the pdf plot
list: y values used for the pdf plot
"""
data = self.data
pdf = self.pdf
if len(data) == 0:
return [], []
min_x, max_x = min(data), max(data)
if min_x == max_x:
min_x, max_x = min_x - 0.5, max_x + 0.5
interval = (max_x - min_x)/n_spaces
x: list[float] = list((min_x + interval*n for n in range(n_spaces + 1)))
y: list[float] = list((pdf(x) for x in x))
# make the plots
fig, axes = plt.subplots(2,sharex=True)
fig.subplots_adjust(hspace=.5)
axes[0].hist(data, density=True)
axes[0].set_title('Normed Histogram of Data')
axes[0].set_ylabel('Density')
axes[1].plot(x, y)
axes[1].set_title('Normal Distribution for the\n Sample Mean and Sample Standard Deviation')
axes[1].set_xlabel('sample mean = {}, sample stdev = {}'.format(self.mean, self.stdev))
axes[1].set_ylabel('Density')
plt.show()
return x, y
def __add__(self, other: Gaussian) -> Gaussian:
"""Add together two Gaussian distributions."""
if type(other) is not Gaussian:
msg = 'A gaussian distribution cannot be added to a {}'
msg = msg.format(type(other))
raise TypeError(msg)
return Gaussian(self.mean + other.mean, sqrt(self.stdev**2 + other.stdev**2))
def __repr__(self) -> str:
repr_str = "mean {}, standard deviation {}"
return repr_str.format(self.mean, self.stdev)
Classes
class Gaussian (mu: float = 0.0, sigma: float = 1.0)
-
Class for visualizing data as Gaussian distributions.
The Gaussian, also called Normal, distribution is a continuous probability distribution with probability density function
f(x) = (1/√(2πσ²))exp(-(x-μ)²/2σ²)
where
- μ = mu = mean value of the distribution
- σ = sigma = standard deviation of the distribution
Expand source code
class Gaussian(Distribution): """ Class for visualizing data as Gaussian distributions. The Gaussian, also called Normal, distribution is a continuous probability distribution with probability density function ``` f(x) = (1/√(2πσ²))exp(-(x-μ)²/2σ²) ``` where * μ = mu = mean value of the distribution * σ = sigma = standard deviation of the distribution """ def __init__(self, mu: float=0.0, sigma: float=1.0): if sigma <= 0: msg = 'For a Gaussian distribution, sigma must be greater than 0' raise ValueError(msg) # self.mu = mu # Not using these, but conceptually # self.sigma = sigma # similar to p and n for Binomial class. super().__init__(mu, sigma) def plot_histogram_data(self) -> None: """Produce a histogram of the data using the matplotlib pyplot library.""" fig, axis = plt.subplots() axis.hist(self.data) axis.set_title('Histogram of Data') axis.set_xlabel('Data') axis.set_ylabel('Count') plt.show() def pdf(self, x: float) -> float: """Gaussian prob density function for this Gaussian object.""" c = 1.0/sqrt(2*pi) mu = self.mean sigma = self.stdev return (c/sigma)*exp(-0.5*((x - mu)/sigma)**2) def plot_histogram_pdf(self, n_spaces: int = 100) -> tuple[list[float], list[float]]: """Method to plot the normalized histogram of the data and a plot of the probability density function along the same range Args: n_spaces (int): number of data points to plot Returns: list: x values used for the pdf plot list: y values used for the pdf plot """ data = self.data pdf = self.pdf if len(data) == 0: return [], [] min_x, max_x = min(data), max(data) if min_x == max_x: min_x, max_x = min_x - 0.5, max_x + 0.5 interval = (max_x - min_x)/n_spaces x: list[float] = list((min_x + interval*n for n in range(n_spaces + 1))) y: list[float] = list((pdf(x) for x in x)) # make the plots fig, axes = plt.subplots(2,sharex=True) fig.subplots_adjust(hspace=.5) axes[0].hist(data, density=True) axes[0].set_title('Normed Histogram of Data') axes[0].set_ylabel('Density') axes[1].plot(x, y) axes[1].set_title('Normal Distribution for the\n Sample Mean and Sample Standard Deviation') axes[1].set_xlabel('sample mean = {}, sample stdev = {}'.format(self.mean, self.stdev)) axes[1].set_ylabel('Density') plt.show() return x, y def __add__(self, other: Gaussian) -> Gaussian: """Add together two Gaussian distributions.""" if type(other) is not Gaussian: msg = 'A gaussian distribution cannot be added to a {}' msg = msg.format(type(other)) raise TypeError(msg) return Gaussian(self.mean + other.mean, sqrt(self.stdev**2 + other.stdev**2)) def __repr__(self) -> str: repr_str = "mean {}, standard deviation {}" return repr_str.format(self.mean, self.stdev)
Ancestors
Methods
def pdf(self, x: float) ‑> float
-
Gaussian prob density function for this Gaussian object.
Expand source code
def pdf(self, x: float) -> float: """Gaussian prob density function for this Gaussian object.""" c = 1.0/sqrt(2*pi) mu = self.mean sigma = self.stdev return (c/sigma)*exp(-0.5*((x - mu)/sigma)**2)
def plot_histogram_data(self) ‑> None
-
Produce a histogram of the data using the matplotlib pyplot library.
Expand source code
def plot_histogram_data(self) -> None: """Produce a histogram of the data using the matplotlib pyplot library.""" fig, axis = plt.subplots() axis.hist(self.data) axis.set_title('Histogram of Data') axis.set_xlabel('Data') axis.set_ylabel('Count') plt.show()
def plot_histogram_pdf(self, n_spaces: int = 100) ‑> tuple[list[float], list[float]]
-
Method to plot the normalized histogram of the data and a plot of the probability density function along the same range
Args
n_spaces
:int
- number of data points to plot
Returns
list
- x values used for the pdf plot
list
- y values used for the pdf plot
Expand source code
def plot_histogram_pdf(self, n_spaces: int = 100) -> tuple[list[float], list[float]]: """Method to plot the normalized histogram of the data and a plot of the probability density function along the same range Args: n_spaces (int): number of data points to plot Returns: list: x values used for the pdf plot list: y values used for the pdf plot """ data = self.data pdf = self.pdf if len(data) == 0: return [], [] min_x, max_x = min(data), max(data) if min_x == max_x: min_x, max_x = min_x - 0.5, max_x + 0.5 interval = (max_x - min_x)/n_spaces x: list[float] = list((min_x + interval*n for n in range(n_spaces + 1))) y: list[float] = list((pdf(x) for x in x)) # make the plots fig, axes = plt.subplots(2,sharex=True) fig.subplots_adjust(hspace=.5) axes[0].hist(data, density=True) axes[0].set_title('Normed Histogram of Data') axes[0].set_ylabel('Density') axes[1].plot(x, y) axes[1].set_title('Normal Distribution for the\n Sample Mean and Sample Standard Deviation') axes[1].set_xlabel('sample mean = {}, sample stdev = {}'.format(self.mean, self.stdev)) axes[1].set_ylabel('Density') plt.show() return x, y
Inherited members