Module grscheller_courses_distributions.distribution

Module for base class of classes representing probability distributions

Expand source code
# Copyright 2024 Geoffrey R. Scheller
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This module contains software derived from Udacity® exercises.
# Udacity® (https://www.udacity.com/)
#

"""Module for base class of classes representing probability distributions"""

from __future__ import annotations

import math

__all__ = ['Distribution']

class Distribution():
    """Base Class for calculating and visualizing probability distributions."""

    def __init__(self, mean: float, stdev: float):
        self.mean: float = mean      #: mean of the distribution
        self.stdev: float = stdev    #: standard deviation of the distribution``
        self.data: list[float] = []  #: data determining parameters of the distribution
        self.is_sample = False       #: whether data is a sample or the entire population

    def read_data_file(self, file_name: str, sample: bool=True) -> None:
        """Method to read in data from a text file.

        The text file should have

        * one number (float) per line
        * the numbers are stored in the data attribute
        * the mean attribute is then calculated from the data
        * if `sample` true (default), calculate a sample stdev
        * if `sample` false, calculate calculate population stdev
        """
        self.is_sample = sample

        # Read in the data from the file given
        data_list: list[float] = []
        with open(file_name) as file:
            line = file.readline()
            while line:
                data_list.append(float(line))
                line = file.readline()

        self.data = data_list
        # self.calc_data_stats(sample)

    def calc_data_stats(self, sample: bool) -> None:
        """Calculate data statistics (mean/stdev for now)

        Note: Not used for course
        """
        self.is_sample = sample
        self.calculate_stdev(sample)
        # TODO: add other statistics? Maybe median, mode, other moments?

    def calculate_mean(self) -> float:
        """From the data set, calculate & return the mean if it exists."""
        n = len(self.data)
        if n > 0:
            self.mean = sum(self.data)/n
        return self.mean

    def calculate_stdev(self, sample: bool=True) -> float:
        """From the data set, calculate & return the stdev if it exists.

        * If sample is True, calculate a sample standard deviation.
        * If sample is False, calculate a population standard deviation.

        """
        # NOTE: Retaining sample parameter to keep consistent with course's API,
        #       otherwise I don't need it and could do things more cleanly.
        self.is_sample = sample
        mu = self.calculate_mean()
        n = len(self.data)

        if sample:
            # sample standard deviation
            if n > 1:
                self.stdev = math.sqrt(sum(((x - mu)**2 for x in self.data))/(n-1))
        else:
            # population standard deviation
            if n > 0:
                self.stdev = math.sqrt(sum(((x - mu)**2 for x in self.data))/n)

        return self.stdev

Classes

class Distribution (mean: float, stdev: float)

Base Class for calculating and visualizing probability distributions.

Expand source code
class Distribution():
    """Base Class for calculating and visualizing probability distributions."""

    def __init__(self, mean: float, stdev: float):
        self.mean: float = mean      #: mean of the distribution
        self.stdev: float = stdev    #: standard deviation of the distribution``
        self.data: list[float] = []  #: data determining parameters of the distribution
        self.is_sample = False       #: whether data is a sample or the entire population

    def read_data_file(self, file_name: str, sample: bool=True) -> None:
        """Method to read in data from a text file.

        The text file should have

        * one number (float) per line
        * the numbers are stored in the data attribute
        * the mean attribute is then calculated from the data
        * if `sample` true (default), calculate a sample stdev
        * if `sample` false, calculate calculate population stdev
        """
        self.is_sample = sample

        # Read in the data from the file given
        data_list: list[float] = []
        with open(file_name) as file:
            line = file.readline()
            while line:
                data_list.append(float(line))
                line = file.readline()

        self.data = data_list
        # self.calc_data_stats(sample)

    def calc_data_stats(self, sample: bool) -> None:
        """Calculate data statistics (mean/stdev for now)

        Note: Not used for course
        """
        self.is_sample = sample
        self.calculate_stdev(sample)
        # TODO: add other statistics? Maybe median, mode, other moments?

    def calculate_mean(self) -> float:
        """From the data set, calculate & return the mean if it exists."""
        n = len(self.data)
        if n > 0:
            self.mean = sum(self.data)/n
        return self.mean

    def calculate_stdev(self, sample: bool=True) -> float:
        """From the data set, calculate & return the stdev if it exists.

        * If sample is True, calculate a sample standard deviation.
        * If sample is False, calculate a population standard deviation.

        """
        # NOTE: Retaining sample parameter to keep consistent with course's API,
        #       otherwise I don't need it and could do things more cleanly.
        self.is_sample = sample
        mu = self.calculate_mean()
        n = len(self.data)

        if sample:
            # sample standard deviation
            if n > 1:
                self.stdev = math.sqrt(sum(((x - mu)**2 for x in self.data))/(n-1))
        else:
            # population standard deviation
            if n > 0:
                self.stdev = math.sqrt(sum(((x - mu)**2 for x in self.data))/n)

        return self.stdev

Subclasses

Instance variables

var data

data determining parameters of the distribution

var is_sample

whether data is a sample or the entire population

var mean

mean of the distribution

var stdev

standard deviation of the distribution``

Methods

def calc_data_stats(self, sample: bool) ‑> None

Calculate data statistics (mean/stdev for now)

Note: Not used for course

Expand source code
def calc_data_stats(self, sample: bool) -> None:
    """Calculate data statistics (mean/stdev for now)

    Note: Not used for course
    """
    self.is_sample = sample
    self.calculate_stdev(sample)
    # TODO: add other statistics? Maybe median, mode, other moments?
def calculate_mean(self) ‑> float

From the data set, calculate & return the mean if it exists.

Expand source code
def calculate_mean(self) -> float:
    """From the data set, calculate & return the mean if it exists."""
    n = len(self.data)
    if n > 0:
        self.mean = sum(self.data)/n
    return self.mean
def calculate_stdev(self, sample: bool = True) ‑> float

From the data set, calculate & return the stdev if it exists.

  • If sample is True, calculate a sample standard deviation.
  • If sample is False, calculate a population standard deviation.
Expand source code
def calculate_stdev(self, sample: bool=True) -> float:
    """From the data set, calculate & return the stdev if it exists.

    * If sample is True, calculate a sample standard deviation.
    * If sample is False, calculate a population standard deviation.

    """
    # NOTE: Retaining sample parameter to keep consistent with course's API,
    #       otherwise I don't need it and could do things more cleanly.
    self.is_sample = sample
    mu = self.calculate_mean()
    n = len(self.data)

    if sample:
        # sample standard deviation
        if n > 1:
            self.stdev = math.sqrt(sum(((x - mu)**2 for x in self.data))/(n-1))
    else:
        # population standard deviation
        if n > 0:
            self.stdev = math.sqrt(sum(((x - mu)**2 for x in self.data))/n)

    return self.stdev
def read_data_file(self, file_name: str, sample: bool = True) ‑> None

Method to read in data from a text file.

The text file should have

  • one number (float) per line
  • the numbers are stored in the data attribute
  • the mean attribute is then calculated from the data
  • if sample true (default), calculate a sample stdev
  • if sample false, calculate calculate population stdev
Expand source code
def read_data_file(self, file_name: str, sample: bool=True) -> None:
    """Method to read in data from a text file.

    The text file should have

    * one number (float) per line
    * the numbers are stored in the data attribute
    * the mean attribute is then calculated from the data
    * if `sample` true (default), calculate a sample stdev
    * if `sample` false, calculate calculate population stdev
    """
    self.is_sample = sample

    # Read in the data from the file given
    data_list: list[float] = []
    with open(file_name) as file:
        line = file.readline()
        while line:
            data_list.append(float(line))
            line = file.readline()

    self.data = data_list
    # self.calc_data_stats(sample)