Source code for thermosteam.properties.utils

# -*- coding: utf-8 -*-
'''
All data and methods for the internal retrical of data from CAS numbers.
'''

from bisect import bisect_left
import os
import pandas as pd
import numpy as np
from typing import Dict


__all__ = ('CASDataReader', 
           'CASDataSource', 
           'get_from_retrievers',
           'MultiCheb1D',
           'CAS2int', 
           'int2CAS', 
           'to_nums')

class CASDataReader:
    
    def __init__(self, __file__, folder, parent="Data"):
        fpath = os.path
        join = fpath.join
        parent_path = join(fpath.dirname(__file__), parent)
        self.folder = join(parent_path, folder)
    
    def __call__(self, file, sep='\t', index_col=0,  **kwargs):
        df = pd.read_csv(os.path.join(self.folder, file),
                         sep=sep, index_col=index_col,
                         engine='python', **kwargs)
        return CASDataSource(df)
    

class CASDataSource:
    __slots__ = ('df', 'index', 'values')
    def __init__(self, df):
        self.df = df
        self.index = df.index
        self.values = df.values
    
    def retriever(self, var):
        return CASDataRetriever(self.df, var)
    
    def retrieve(self, CASRN, var):
        df = self.df
        if CASRN in df.index:
            value = df.at[CASRN, var]
            return None if np.isnan(value) else float(value)
        else:
            return None    
    
    @property
    def at(self):
        return self.df.at
    
    def __getitem__(self, CAS):
        return self.values[self.index.get_loc(CAS)]
    
    def __contains__(self, CAS):
        return CAS in self.index
    

class CASDataRetriever:
    __slots__ = ('df', 'var')
    
    def __init__(self, df, var):
        self.df = df
        self.var = var
        
    def __call__(self, CASRN):
        df = self.df
        if CASRN in df.index:
            value = df.at[CASRN, self.var]
            return None if np.isnan(value) else float(value)
        else:
            return None

CASDataSources = Dict[str, CASDataSource]
CASDataRetrievers = Dict[str, CASDataRetriever]

def retrievers_from_data_sources(sources: CASDataSources, var: str) -> dict:
    return {name: source.retriever(var)
            for name, source in sources.items()}

def get_from_retrievers(retrievers: CASDataRetrievers, CASRN: str, method:str):
    if method == 'All':
        value = {i: j(CASRN) for i,j in retrievers.items()}
    elif method == 'Any':
        for retriever in retrievers.values():
            value = retriever(CASRN)
            if value: break
    else:
        try:
            retriever = retrievers[method]
        except:
            raise ValueError("invalid method; method must be one of the following: "
                            f"{', '.join(retrievers)}.")
        value = retriever(CASRN)
    return value

def get_from_data_sources(sources: CASDataSources, CASRN: str, var: str, method:str):
    if method == 'All':
        value = {i: j.retrieve(CASRN, var) for i,j in sources.items()}
    elif method == 'Any':
        for source in sources.values():
            value = source.retrieve(CASRN, var)
            if value: break
    else:
        try:
            source = sources[method]
        except:
            raise ValueError("invalid method; method must be one of the following: "
                            f"{', '.join(sources)}.")
        value = source.retrieve(CASRN, var)
    return value


[docs]class MultiCheb1D(object): '''Simple class to store set of coefficients for multiple chebyshev approximations and perform calculations from them. ''' def __init__(self, points, coeffs): self.points = points self.coeffs = coeffs self.N = len(points)-1 def __call__(self, x): coeffs = self.coeffs[bisect_left(self.points, x)] return coeffs(x) # return self.chebval(x, coeffs) @staticmethod def chebval(x, c): # copied from numpy's source, slightly optimized # https://github.com/numpy/numpy/blob/v1.13.0/numpy/polynomial/chebyshev.py#L1093-L1177 x2 = 2.*x c0 = c[-2] c1 = c[-1] for i in range(3, len(c) + 1): tmp = c0 c0 = c[-i] - c1 c1 = tmp + c1*x2 return c0 + c1*x
[docs]def CAS2int(i): r'''Converts CAS number of a compounds from a string to an int. This is helpful when storing large amounts of CAS numbers, as their strings take up more memory than their numerical representational. All CAS numbers fit into 64 bit ints. Parameters ---------- CASRN : string CASRN [-] Returns ------- CASRN : int CASRN [-] Notes ----- Accomplishes conversion by removing dashes only, and then converting to an int. An incorrect CAS number will change without exception. Examples -------- >>> CAS2int('7704-34-9') 7704349 ''' return int(i.replace('-', ''))
[docs]def int2CAS(i): r'''Converts CAS number of a compounds from an int to an string. This is helpful when dealing with int CAS numbers. Parameters ---------- CASRN : int CASRN [-] Returns ------- CASRN : string CASRN [-] Notes ----- Handles CAS numbers with an unspecified number of digits. Does not work on floats. Examples -------- >>> int2CAS(7704349) '7704-34-9' ''' i = str(i) return i[:-3]+'-'+i[-3:-1]+'-'+i[-1]
[docs]def to_nums(values): r'''Legacy function to turn a list of strings into either floats (if numeric), stripped strings (if not) or None if the string is empty. Accepts any numeric formatting the float function does. Parameters ---------- values : list list of strings Returns ------- values : list list of floats, strings, and None values [-] Examples -------- >>> to_num(['1', '1.1', '1E5', '0xB4', '']) [1.0, 1.1, 100000.0, '0xB4', None] ''' return [to_num(i) for i in values]
def to_num(value): try: return float(value) except: if value == '': return None else: return value.strip()