Module grscheller_courses_distributions.binomial
Module for the Binomial class - derived from Udacity exercise template.
Expand source code
# Copyright 2024 Geoffrey R. Scheller
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This module contains software derived from Udacity® exercises.
# Udacity® (https://www.udacity.com/)
#
"""Module for the Binomial class - derived from Udacity exercise template."""
from __future__ import annotations
from typing import Callable, List, Tuple
from math import ceil, comb, floor, pow, sqrt
import matplotlib.pyplot as plt
from .distribution import Distribution
__all__ = ['Binomial']
class Binomial(Distribution):
""" Class for visualizing data as binomial distributions.
The binomial distribution represents the number of events with probability
p happening in n numbers of trials.
Attributes (some inherited):
* mean (float) representing the mean value of the distribution
* stdev (float) representing the standard deviation of the distribution
* data extracted from a data file (taken to be a population)
* p (float) representing the probability of an event occurring
* n (int) the total number of trials
"""
def __init__(self, p: float=0.5, n: int=20):
if not (0.0 <= p <= 1.0) or n < 1:
msg1 = 'For a binomial distribution, '
msg2 = msg3 = ''
if not (0.0 <= p <= 1.0):
msg2 = '0 <= p <= 1'
if n < 0:
msg3 = 'the number of trials n must be non-negative'
if msg2 and msg3:
msg = msg1 + msg2 + ' and ' + msg3 + '.'
else:
msg = msg1 + msg2 + msg3 + '.'
raise ValueError(msg)
self.p: float = p #: probability of a success
self.n: int = n #: number of events
super().__init__(self.calculate_mean(), self.calculate_stdev())
def calculate_mean(self) -> float:
"""Calculate the mean from p and n"""
n = self.n
p = self.p
self.mean = mean = n*p
return mean
def calculate_stdev(self) -> float:
"""Calculate the standard deviation using p and n"""
n = self.n
p = self.p
self.stdev = stdev = sqrt(n*p*(1-p))
return stdev
def read_data_file(self, file_name: str, sample: bool=False) -> None:
"""Read data from a file, DOES NOT UPDATE either `p` or `n`.
* the data is always treated as the population randomly selected with replacement
* the sample parameter is ignored (it is there for LSP)
* the data is assumed to be one float per line
* each value either 0.0 or 1.0 (0 or 1 interpreted as floats)
"""
super().read_data_file(file_name, False)
def replace_stats_with_data(self) -> tuple[float, int]:
"""Function to calculate p and n from the read in data set.
Where the read in data set is taken as the population.
"""
if self.data:
self.n = n = len(self.data)
self.p = p = sum(self.data)/n
self.mean = n*p
self.stdev = sqrt(n*p*(1-p))
return self.p, self.n
def plot_bar_data(self) -> None:
"""Produce a bar-graph of the data using the matplotlib pyplot library."""
n = self.n
p = self.p
fig, axis = plt.subplots()
axis.bar(('0', '1'), (n*(1-p), n*p), color ='maroon', width = 0.6)
axis.set_title('Failures and Successes for a sample of {}'.format(n))
axis.set_xlabel('prob = {}, n = {}'.format(p, n))
axis.set_ylabel('Sample Count')
plt.show()
def pdf(self, kf: float) -> float:
"""Binomial prob density function for this Binomial object."""
k = int(kf)
n = self.n
p = self.p
return comb(n, k)*(p**k)*(1 - p)**(n-k)
def plot_bar_pdf(self) -> Tuple[List[int], List[float]]:
"""Function to plot the pdf of the binomial distribution.
Returns:
list: x values used for the pdf plot
list: y values used for the pdf plot
"""
pdf: Callable[[int], float] = lambda ii: self.pdf(float(ii))
xs: List[int] = list(range(self.n + 1))
ys: List[float] = list(map(pdf, range(self.n + 1)))
plt.bar(list(str(x) for x in xs), ys, color ='maroon', width = 0.4)
plt.title('Probability Density of Success')
plt.xlabel('Successes for {} trials'.format(self.n))
plt.ylabel('Probability')
plt.show()
return xs, ys
def __add__(self, other: Binomial) -> Binomial:
"""Add together two Binomial distributions with equal p."""
if type(other) is not Binomial:
msg = 'A binomial distribution cannot be added to a {}'
msg = msg.format(type(other))
raise TypeError(msg)
if self.p != other.p:
msg = 'p values are not equal'
raise ValueError(msg)
return Binomial(self.p, self.n + other.n)
def __repr__(self) -> str:
"""Output the characteristics of the Binomial instance.
Returns a string showing:
* mean
* standard deviation
* p
* n
"""
repr_str = 'mean {}, standard deviation {}, p {}, n {}'
return repr_str.format(self.mean, self.stdev, self.p, self.n)
Classes
class Binomial (p: float = 0.5, n: int = 20)
-
Class for visualizing data as binomial distributions.
The binomial distribution represents the number of events with probability p happening in n numbers of trials.
Attributes (some inherited):
- mean (float) representing the mean value of the distribution
- stdev (float) representing the standard deviation of the distribution
- data extracted from a data file (taken to be a population)
- p (float) representing the probability of an event occurring
- n (int) the total number of trials
Expand source code
class Binomial(Distribution): """ Class for visualizing data as binomial distributions. The binomial distribution represents the number of events with probability p happening in n numbers of trials. Attributes (some inherited): * mean (float) representing the mean value of the distribution * stdev (float) representing the standard deviation of the distribution * data extracted from a data file (taken to be a population) * p (float) representing the probability of an event occurring * n (int) the total number of trials """ def __init__(self, p: float=0.5, n: int=20): if not (0.0 <= p <= 1.0) or n < 1: msg1 = 'For a binomial distribution, ' msg2 = msg3 = '' if not (0.0 <= p <= 1.0): msg2 = '0 <= p <= 1' if n < 0: msg3 = 'the number of trials n must be non-negative' if msg2 and msg3: msg = msg1 + msg2 + ' and ' + msg3 + '.' else: msg = msg1 + msg2 + msg3 + '.' raise ValueError(msg) self.p: float = p #: probability of a success self.n: int = n #: number of events super().__init__(self.calculate_mean(), self.calculate_stdev()) def calculate_mean(self) -> float: """Calculate the mean from p and n""" n = self.n p = self.p self.mean = mean = n*p return mean def calculate_stdev(self) -> float: """Calculate the standard deviation using p and n""" n = self.n p = self.p self.stdev = stdev = sqrt(n*p*(1-p)) return stdev def read_data_file(self, file_name: str, sample: bool=False) -> None: """Read data from a file, DOES NOT UPDATE either `p` or `n`. * the data is always treated as the population randomly selected with replacement * the sample parameter is ignored (it is there for LSP) * the data is assumed to be one float per line * each value either 0.0 or 1.0 (0 or 1 interpreted as floats) """ super().read_data_file(file_name, False) def replace_stats_with_data(self) -> tuple[float, int]: """Function to calculate p and n from the read in data set. Where the read in data set is taken as the population. """ if self.data: self.n = n = len(self.data) self.p = p = sum(self.data)/n self.mean = n*p self.stdev = sqrt(n*p*(1-p)) return self.p, self.n def plot_bar_data(self) -> None: """Produce a bar-graph of the data using the matplotlib pyplot library.""" n = self.n p = self.p fig, axis = plt.subplots() axis.bar(('0', '1'), (n*(1-p), n*p), color ='maroon', width = 0.6) axis.set_title('Failures and Successes for a sample of {}'.format(n)) axis.set_xlabel('prob = {}, n = {}'.format(p, n)) axis.set_ylabel('Sample Count') plt.show() def pdf(self, kf: float) -> float: """Binomial prob density function for this Binomial object.""" k = int(kf) n = self.n p = self.p return comb(n, k)*(p**k)*(1 - p)**(n-k) def plot_bar_pdf(self) -> Tuple[List[int], List[float]]: """Function to plot the pdf of the binomial distribution. Returns: list: x values used for the pdf plot list: y values used for the pdf plot """ pdf: Callable[[int], float] = lambda ii: self.pdf(float(ii)) xs: List[int] = list(range(self.n + 1)) ys: List[float] = list(map(pdf, range(self.n + 1))) plt.bar(list(str(x) for x in xs), ys, color ='maroon', width = 0.4) plt.title('Probability Density of Success') plt.xlabel('Successes for {} trials'.format(self.n)) plt.ylabel('Probability') plt.show() return xs, ys def __add__(self, other: Binomial) -> Binomial: """Add together two Binomial distributions with equal p.""" if type(other) is not Binomial: msg = 'A binomial distribution cannot be added to a {}' msg = msg.format(type(other)) raise TypeError(msg) if self.p != other.p: msg = 'p values are not equal' raise ValueError(msg) return Binomial(self.p, self.n + other.n) def __repr__(self) -> str: """Output the characteristics of the Binomial instance. Returns a string showing: * mean * standard deviation * p * n """ repr_str = 'mean {}, standard deviation {}, p {}, n {}' return repr_str.format(self.mean, self.stdev, self.p, self.n)
Ancestors
Instance variables
var n
-
number of events
var p
-
probability of a success
Methods
def calculate_mean(self) ‑> float
-
Calculate the mean from p and n
Expand source code
def calculate_mean(self) -> float: """Calculate the mean from p and n""" n = self.n p = self.p self.mean = mean = n*p return mean
def calculate_stdev(self) ‑> float
-
Calculate the standard deviation using p and n
Expand source code
def calculate_stdev(self) -> float: """Calculate the standard deviation using p and n""" n = self.n p = self.p self.stdev = stdev = sqrt(n*p*(1-p)) return stdev
def pdf(self, kf: float) ‑> float
-
Binomial prob density function for this Binomial object.
Expand source code
def pdf(self, kf: float) -> float: """Binomial prob density function for this Binomial object.""" k = int(kf) n = self.n p = self.p return comb(n, k)*(p**k)*(1 - p)**(n-k)
def plot_bar_data(self) ‑> None
-
Produce a bar-graph of the data using the matplotlib pyplot library.
Expand source code
def plot_bar_data(self) -> None: """Produce a bar-graph of the data using the matplotlib pyplot library.""" n = self.n p = self.p fig, axis = plt.subplots() axis.bar(('0', '1'), (n*(1-p), n*p), color ='maroon', width = 0.6) axis.set_title('Failures and Successes for a sample of {}'.format(n)) axis.set_xlabel('prob = {}, n = {}'.format(p, n)) axis.set_ylabel('Sample Count') plt.show()
def plot_bar_pdf(self) ‑> Tuple[List[int], List[float]]
-
Function to plot the pdf of the binomial distribution.
Returns
list
- x values used for the pdf plot
list
- y values used for the pdf plot
Expand source code
def plot_bar_pdf(self) -> Tuple[List[int], List[float]]: """Function to plot the pdf of the binomial distribution. Returns: list: x values used for the pdf plot list: y values used for the pdf plot """ pdf: Callable[[int], float] = lambda ii: self.pdf(float(ii)) xs: List[int] = list(range(self.n + 1)) ys: List[float] = list(map(pdf, range(self.n + 1))) plt.bar(list(str(x) for x in xs), ys, color ='maroon', width = 0.4) plt.title('Probability Density of Success') plt.xlabel('Successes for {} trials'.format(self.n)) plt.ylabel('Probability') plt.show() return xs, ys
def read_data_file(self, file_name: str, sample: bool = False) ‑> None
-
Read data from a file, DOES NOT UPDATE either
p
orn
.- the data is always treated as the population randomly selected with replacement
- the sample parameter is ignored (it is there for LSP)
- the data is assumed to be one float per line
- each value either 0.0 or 1.0 (0 or 1 interpreted as floats)
Expand source code
def read_data_file(self, file_name: str, sample: bool=False) -> None: """Read data from a file, DOES NOT UPDATE either `p` or `n`. * the data is always treated as the population randomly selected with replacement * the sample parameter is ignored (it is there for LSP) * the data is assumed to be one float per line * each value either 0.0 or 1.0 (0 or 1 interpreted as floats) """ super().read_data_file(file_name, False)
def replace_stats_with_data(self) ‑> tuple[float, int]
-
Function to calculate p and n from the read in data set.
Where the read in data set is taken as the population.
Expand source code
def replace_stats_with_data(self) -> tuple[float, int]: """Function to calculate p and n from the read in data set. Where the read in data set is taken as the population. """ if self.data: self.n = n = len(self.data) self.p = p = sum(self.data)/n self.mean = n*p self.stdev = sqrt(n*p*(1-p)) return self.p, self.n
Inherited members