Source code for pyphyschemtools.spectro

############################################################
#                       Absorption spectra
############################################################
from .visualID_Eng import fg, bg, hl, color
from .core import centerTitle, centertxt

import numpy as np
import matplotlib.pyplot as plt
import scipy.constants as sc

import pandas as pd
from scipy import stats
from pathlib import Path

import os

[docs] class SpectrumSimulator: """ Initializes the spectrum simulator Args: - sigma_ev (float): Gaussian half-width at half-maximum in electron-volts (eV). Default is 0.3 eV (GaussView default is 0.4 eV). - plotWH (tuple(int,int)): Width and Height of the matplotlib figures in inches. Default is (12,8). - colorS: color of the simulated spectrum (default ='#3e89be') - colorVT: color of the vertical transition line (default = '#469cd6') Returns: None: This method initializes the instance attributes. Calculates: sigmanm = half-width of the Gaussian band, in nm """ def __init__(self, sigma_ev=0.3, plotWH=(12,8), \ fontSize_axisText=14, fontSize_axisLabels=14, fontSize_legends=12, fontsize_peaks=12, colorS='#3e89be',colorVT='#469cd6' ): self.sigma_ev = sigma_ev # Conversion constante eV -> nm sigma self.ev2nm_const = (sc.h * sc.c) * 1e9 / sc.e self.sigmanm = self.ev2nm_const / self.sigma_ev self.plotW = plotWH[0] self.plotH = plotWH[1] self.colorS = colorS self.colorVT = colorVT self.fig = None self.graph = None self.fontSize_axisText = fontSize_axisText self.fontSize_axisLabels = fontSize_axisLabels self.fontSize_legends = fontSize_legends self.fontsize_peaks = fontsize_peaks print(f"sigma = {sigma_ev} eV -> sigmanm = {self.sigmanm:.1f} nm") def _initializePlot(self): fig, graph = plt.subplots(figsize=(self.plotW,self.plotH)) plt.subplots_adjust(wspace=0) plt.xticks(fontsize=self.fontSize_axisText,fontweight='bold') plt.yticks(fontsize=self.fontSize_axisText,fontweight='bold') return fig, graph def _calc_epsiG(self,lambdaX,lambdai,fi): """ calculates a Gaussian band shape around a vertical transition input: - lambdaX = wavelength variable, in nm - lambdai = vertical excitation wavelength for i_th state, in nm - fi = oscillator strength for state i (dimensionless) output : molar absorption coefficient, in L mol-1 cm-1 """ import scipy.constants as sc import numpy as np c = sc.c*1e2 #cm-1 NA = sc.N_A #mol-1 me = sc.m_e*1000 #g e = sc.e*sc.c*10 #elementary charge in esu pf = np.sqrt(np.pi)*e**2*NA/(1000*np.log(10)*c**2*me) nubarX = 1e7 / lambdaX # nm to cm-1 nubari = 1e7 / lambdai sigmabar = 1e7 / self.sigmanm epsi = pf * (fi / sigmabar) * np.exp(-((nubarX - nubari)/sigmabar)**2) return epsi def _Absorbance(self,eps,opl,cc): """ Calculates the Absorbance with the Beer-Lambert law input: - eps = molar absorption coefficient, in L mol-1 cm-1 - opl = optical path length, in cm - cc = concentration of the attenuating species, in mol.L-1 output : Absorbance, A (dimensionless) """ return eps*opl*cc def _sumStatesWithGf(self,wavel,wavelTAB,feTAB): import numpy as np sumInt = np.zeros(len(wavel)) for l in wavel: for i in range(len(wavelTAB)): sumInt[np.argwhere(l==wavel)[0][0]] += self._calc_epsiG(l,wavelTAB[i],feTAB[i]) return sumInt def _FindPeaks(self,sumInt,height,prom=1): """ Finds local maxima within the spectrum based on height and prominence. Prominence is crucial when switching between linear and logarithmic scales: - In Linear mode: A large prominence (e.g., 1 to 1000) filters out noise. - In Log mode: Data is compressed into a range of ~0 to 5. A large prominence will 'hide' real peaks. A smaller value (0.01 to 0.1) is required to detect shoulders and overlapping bands. Input: - sumInt: Array of intensities (Epsilon or Absorbance). - height: Minimum height a peak must reach to be considered. - prom: Required vertical distance between the peak and its lowest contour line. Returns: - PeakIndex: Indices of the detected peaks in the wavelength array. - PeakHeight: The intensity values at these peak positions. """ from scipy.signal import find_peaks peaks = find_peaks(sumInt, height = height, threshold = None, distance = 1, prominence=prom) PeakIndex = peaks[0] # Check if 'peak_heights' exists in the properties dictionary if 'peak_heights' in peaks[1]: PeakHeight = peaks[1]['peak_heights'] else: # If height=None, we extract values manually from the input data PeakHeight = sumInt[PeakIndex] return PeakIndex,PeakHeight def _FindShoulders(self, data, tP): """ ###not working### Detects shoulders using the second derivative. A shoulder appears as a peak in the negative second derivative. Note on scales: - If ylog is True: data should be log10(sumInt) and tP should be log10(tP). The second derivative on log data is much more sensitive to subtle inflection points in weak transitions (like n -> pi*). - If ylog is False: data is linear sumInt and tP is linear. Returns: - shoulder_idx (ndarray): Array of indices where shoulders were found. - shoulder_heights (ndarray): The intensity values at these positions extracted from the input data. """ import numpy as np # Calculate the second derivative (rate of change of the slope) d2 = np.gradient(np.gradient(data)) # We search for peaks in the opposite of the second derivative (-d2). # A local maximum in -d2 corresponds to a point of maximum curvature # (inflection), which identifies a shoulder. # We use a very low prominence threshold to capture subtle inflections. shoulder_idx, _ = self._FindPeaks(-d2, height=None, prom=0.0001) shoulder_heights = data[shoulder_idx] print(shoulder_idx, shoulder_heights ) return shoulder_idx, shoulder_heights def _pickPeak(self,wavel,peaksIndex,peaksH,color,\ shift=500,height=500,posAnnotation=200, ylog=False): """ Annotates peaks with a small vertical tick and the wavelength value. Adjusts offsets based on whether the plot is in log10 scale or linear. In log mode, peaksH must already be log10 values. """ s=shift h=height a=posAnnotation for i in range(len(peaksIndex)): x = wavel[peaksIndex[i]] y = peaksH[i] if ylog: # In log scale, we use multipliers to keep the same visual distance # 1.1 means "10% above the peak" # Adjust these factors based on your preference y_s = y * 1.1 y_h = y * 1.3 y_a = y * 1.5 self.graph.vlines(x, y_s, y_h, colors=color, linestyles='solid') self.graph.annotate(f"{x:.1f}",xy=(x,y),xytext=(x,y_a),rotation=90,size=self.fontsize_peaks,ha='center',va='bottom', color=color) else: # Classic linear offsets self.graph.vlines(x, y+s, y+s+h, colors=color, linestyles='solid') self.graph.annotate(f"{x:.1f}",xy=(x,y),xytext=(x,y+s+h+a),rotation=90,size=self.fontsize_peaks,ha='center',va='bottom',color=color) return def _setup_axes(self, lambdamin, lambdamax, ymax, ylabel="Absorbance"): self.graph.set_xlabel('wavelength / nm', size=self.fontSize_axisLabels, fontweight='bold', color='#2f6b91') self.graph.set_ylabel(ylabel, size=self.fontSize_axisLabels, fontweight='bold', color='#2f6b91') self.graph.set_xlim(lambdamin, lambdamax) self.graph.set_ylim(0, ymax) self.graph.tick_params(axis='both', labelsize=self.fontSize_axisText,labelcolor='black') for tick in self.graph.xaxis.get_majorticklabels(): tick.set_fontweight('bold') #it is both powerful # (you can specify the type of a specific tick) and annoying for tick in self.graph.yaxis.get_majorticklabels(): tick.set_fontweight('bold')
[docs] def plotTDDFTSpectrum(self,wavel,sumInt,wavelTAB,feTAB,tP,ylog,labelSpectrum,colorS='#0000ff',colorT='#0000cf'): """ Called by plotEps_lambda_TDDFT. Plots a single simulated UV-Vis spectrum, i.e. after gaussian broadening, together with the TDDFT vertical transitions (i.e. plotted as lines) Args: wavel: array of gaussian-broadened wavelengths, in nm sumInt: corresponding molar absorptiopn coefficients, in L. mol-1 cm-1 wavelTAB: wavelength of TDDFT, e.g. discretized, transitions ylog: log plot of epsilon tP: threshold for finding the peaks feTAB: TDDFT oscillator strength for each transition of wavelTAB labelSpectrum: title for the spectrum """ # # --- DEBUG START --- # if ylog: # print(f"\n--- DEBUG LOG MODE ---") # print(f"Max sumInt (linear): {np.max(sumInt):.2f}") # print(f"Max sumInt (log10): {np.log10(max(np.max(sumInt), 1e-5)):.2f}") # # --- DEBUG END --- if ylog: # Apply safety floor to the entire array self.graph.set_yscale('log') ymin_val = 1.0 # Epsilon = 1 else: self.graph.set_yscale('linear') ymin_val = 0 # vertical lines for i in range(len(wavelTAB)): val_eps = self._calc_epsiG(wavelTAB[i],wavelTAB[i],feTAB[i]) self.graph.vlines(x=wavelTAB[i], ymin=ymin_val, ymax=max(val_eps, ymin_val), colors=colorT) self.graph.plot(wavel,sumInt,linewidth=3,linestyle='-',color=colorS,label=labelSpectrum) self.graph.legend(fontsize=self.fontSize_legends) if ylog: # Use log-transformed data and log-transformed threshold # Clipping tP to 1e-5 ensures we don't take log of 0 or negative tPlog = np.log10(max(tP, 1e-5)) # prom=0.05 allows detection of peaks that are close in log-magnitude peaks, peaksH_log = self._FindPeaks(np.log10(np.clip(sumInt, 1e-5, None)), tPlog, prom=0.05) peaksH = 10**peaksH_log # shoulders, shouldersH_log = self._FindShoulders(np.log10(np.clip(sumInt, 1e-5, None)), tPlog) # all_idx = np.concatenate((peaks, shoulders)) # allH_log = np.concatenate((peaksH_log, shouldersH_log)) # allH = 10**allH_log else: peaks, peaksH = self._FindPeaks(sumInt,tP) # shoulders, shouldersH = self._FindShoulders(wavel, sumInt, tP) # all_idx = np.concatenate((peaks, shoulders)) # allH = np.concatenate((peaksH, shouldersH)) self._pickPeak(wavel,peaks,peaksH,colorS,500,500,200,ylog)
[docs] def plotEps_lambda_TDDFT(self,datFile,lambdamin=200,lambdamax=800,\ epsMax=None, titles=None, tP = 10, \ ylog=False,\ save_img=None): """ Plots a TDDFT VUV simulated spectrum (vertical transitions and transitions summed with gaussian functions) between lambdamin and lambdamax The sum of states is done in the range [lambdamin-50, lambdamax+50] nm. Args: datFile: list of pathway/names to "XXX_ExcStab.dat" files generated by 'GParser Gaussian.log -S' lambdamin, lambdamax: plot range epsMax: y axis graph limit titles: list of titles (1 per spectrum plot) tP: threshold for finding the peaks (default = 10 L. mol-1 cm-1) ylog: y logarithmic axis (default: False). save: saves in a png file (300 dpi) if True (default = False) save_img: saves figure in a 300 dpi png file if not None (default), with save_img=full pathway """ import matplotlib.ticker as ticker if self.fig is not None: graph = self.graph fig = self.fig lambdamin = self.lambdamin lambdamax = self.lambdamax epsMax = self.epsMax else: fig, graph = self._initializePlot() graph.set_prop_cycle(None) if self.fig is None: self.fig = fig self.graph = graph self.lambdamin = lambdamin self.lambdamax = lambdamax self.epsMax = epsMax graph.set_xlabel('wavelength / nm',size=self.fontSize_axisLabels,fontweight='bold',color='#2f6b91') graph.set_xlim(lambdamin,lambdamax) graph.xaxis.set_major_locator(ticker.MultipleLocator(50)) # sets a tick for every integer multiple of the base (here 250) within the view interval istate,state,wavel,fe,SSq = np.genfromtxt(datFile,skip_header=1,dtype="<U20,<U20,float,float,<U20",unpack=True) wavel = np.array(wavel) fe = np.array(fe) if wavel.size == 1: wavel = np.array([wavel]) fe = np.array([fe]) wvl = np.arange(lambdamin-50,lambdamax+50,1) sumInt = self._sumStatesWithGf(wvl,wavel,fe) self.plotTDDFTSpectrum(wvl,sumInt,wavel,fe,tP,ylog,titles,self.colorS,self.colorVT) if ylog: graph.set_ylabel('log(molar absorption coefficient / L mol$^{-1}$ cm$^{-1})$',size=self.fontSize_axisLabels,fontweight='bold',color='#2f6b91') graph.set_ylim(1, epsMax * 5 if epsMax else None) else: graph.set_yscale('linear') graph.set_ylabel('molar absorption coefficient / L mol$^{-1}$ cm$^{-1}$',size=self.fontSize_axisLabels,fontweight='bold',color='#2f6b91') graph.set_ylim(0, epsMax if epsMax else np.max(sumInt)*1.18) # Around line 336, replace the save_img logic with this: if save_img is not None: from pathlib import Path save_path = Path(save_img) if save_path.parent: save_path.parent.mkdir(parents=True, exist_ok=True) # This creates the folder! self.fig.savefig(save_path, dpi=300, bbox_inches='tight') plt.show() peaksI, peaksH = self._FindPeaks(sumInt,tP) print(f"{bg.LIGHTREDB}{titles}{bg.OFF}") for i in range(len(peaksI)): print(f"peak {i:3}. {wvl[peaksI[i]]:4} nm. epsilon_max = {peaksH[i]:.1f} L mol-1 cm-1") if ylog: print() # prom=0.05 allows detection of peaks that are close in log-magnitude peaksI, peaksH = self._FindPeaks(np.log10(np.clip(sumInt, 1e-5, None)), np.log10(max(tP, 1e-5)), prom=0.05) for i in range(len(peaksI)): print(f"peak {i:3}. {wvl[peaksI[i]]:4} nm. log10(epsilon_max) = {peaksH[i]:.1f}")
[docs] def plotAbs_lambda_TDDFT(self, datFiles=None, C0=1e-5, lambdamin=200, lambdamax=800, Amax=2.0,\ titles=None, linestyles=[], annotateP=[], tP = 0.1,\ resetColors=False,\ save_img=None): """ Plots a simulated TDDFT VUV absorbance spectrum (transitions summed with gaussian functions) between lambdamin and lambdamax (sum of states done in the range [lambdamin-50, lambdamlax+50] nm) Args: datFiles: list of pathway/name to files generated by 'GParser Gaussian.log -S' C0: list of concentrations needed to calculate A = epsilon x l x c (in mol.L-1) lambdamin, lambdamax: plot range (x axis) Amax: y axis graph limit titles: list of titles (1 per spectrum plot) linestyles: list of line styles(default = "-", i.e. a continuous line) annotateP: list of Boolean (annotate lambda max True or False. Default = True) tP: threshold for finding the peaks (default = 0.1) resetColors (bool): If True, resets the matplotlib color cycle to the first color. This allows different series (e.g., gas phase vs. solvent) to share the same color coding for each molecule across multiple calls. Default: False save: saves in a png file (300 dpi) if True (default = False) save_img: saves figure in a 300 dpi png file if not None (default), with save_img=full pathway """ if self.fig is None: fig, graph = self._initializePlot() self.fig = fig self.graph = graph self.lambdamin = lambdamin self.lambdamax = lambdamax self.Amax = Amax else: graph = self.graph fig = self.fig lambdamin = self.lambdamin lambdamax = self.lambdamax Amax = self.Amax if resetColors: graph.set_prop_cycle(None) if linestyles == []: linestyles = len(datFiles)*['-'] if annotateP == []: annotateP = len(datFiles)*[True] self._setup_axes(lambdamin, lambdamax, self.Amax, ylabel="Absorbance") wvl = np.arange(lambdamin-50,lambdamax+50,1) for f in range(len(datFiles)): istate,state,wavel,fe,SSq = np.genfromtxt(datFiles[f],skip_header=1,dtype="<U20,<U20,float,float,<U20",unpack=True) sumInt = self._sumStatesWithGf(wvl,wavel,fe) Abs = self._Absorbance(sumInt,1,C0[f]) plot=self.graph.plot(wvl,Abs,linewidth=3,linestyle=linestyles[f],label=f"{titles[f]}. TDDFT ($C_0$={C0[f]} mol/L)") peaksI, peaksH = self._FindPeaks(Abs,tP,0.01) if (annotateP[f]): self._pickPeak(wvl,peaksI,peaksH,plot[0].get_color(),0.01,0.04,0.02) print(f"{bg.LIGHTREDB}TDDFT. {titles[f]}{bg.OFF}") for i in range(len(peaksI)): print(f"peak {i:3}. {wvl[peaksI[i]]:4} nm. A = {peaksH[i]:.2f}") self.graph.legend(fontsize=self.fontSize_legends) if save_img is not None: from pathlib import Path save_path = Path(save_img) if save_path.parent: save_path.parent.mkdir(parents=True, exist_ok=True) # This creates the folder! self.fig.savefig(save_path, dpi=300, bbox_inches='tight') return
[docs] def plotAbs_lambda_exp(self, csvFiles, C0, lambdamin=200, lambdamax=800,\ Amax=2.0, titles=None, linestyles=[], annotateP=[], tP = 0.1,\ save_img=None): """ Plots an experimental VUV absorbance spectrum read from a csv file between lambdamin and lambdamax Args: - superpose: False = plots a new graph, otherwise the plot is superposed to a previously created one (probably with plotAbs_lambda_TDDFT()) - csvfiles: list of pathway/name to experimental csvFiles (see examples for the format) - C0: list of experimental concentrations, i.e. for each sample - lambdamin, lambdamax: plot range (x axis) - Amax: graph limit (y axis) - titles: list of titles (1 per spectrum plot) - linestyles: list of line styles(default = "--", i.e. a dashed line) - annotateP: list of Boolean (annotate lambda max True or False. Default = True) - tP: threshold for finding the peaks (default = 0.1) - save: saves in a png file (300 dpi) if True (default = False) - save_img: saves figure in a 300 dpi png file if not None (default), with save_img=full pathway """ if linestyles == []: linestyles = len(csvFiles)*['--'] if annotateP == []: annotateP = len(csvFiles)*[True] if self.fig is not None: graph = self.graph fig = self.fig lambdamin = self.lambdamin lambdamax = self.lambdamax Amax = self.Amax else: fig, graph = self._initializePlot() graph.set_prop_cycle(None) if self.fig is None: self.graph = graph self.fig = fig self.lambdamin = lambdamin self.lambdamax = lambdamax self.Amax = Amax self._setup_axes(lambdamin, lambdamax, self.Amax, ylabel="Absorbance") for f in range(len(csvFiles)): wavel,Abs = np.genfromtxt(csvFiles[f],skip_header=1,unpack=True,delimiter=";") wavel *= 1e9 plot=graph.plot(wavel,Abs,linewidth=3,linestyle=linestyles[f],label=f"{titles[f]}. exp ($C_0$={C0[f]} mol/L)") peaksI, peaksH = self._FindPeaks(Abs,tP,0.01) if (annotateP[f]): self._pickPeak(wavel,peaksI,peaksH,plot[0].get_color(),0.01,0.04,0.02) print(f"{bg.LIGHTREDB}exp. {titles[f]}{bg.OFF}") for i in range(len(peaksI)): print(f"peak {i:3}. {wavel[peaksI[i]]:4} nm. A = {peaksH[i]:.2f}") graph.legend(fontsize=self.fontSize_legends) if save_img is not None: from pathlib import Path save_path = Path(save_img) if save_path.parent: save_path.parent.mkdir(parents=True, exist_ok=True) # This creates the folder! self.fig.savefig(save_path, dpi=300, bbox_inches='tight') return
[docs] class QuantitativeAnalysis: """ A comprehensive tool for chemical analysis calibration (e.g., Chromatography, Spectroscopy). This class handles data ingestion from multiple sources, performs linear regression, provides statistical diagnostics (R², LOD, LOQ), and generates publication-quality plots including residual analysis and confidence intervals. Attributes ---------- x : numpy.ndarray Independent variable data (usually concentration or amount). y : numpy.ndarray Dependent variable data (usually peak area, absorbance, or signal). x_label : str Label for the x-axis used in plotting. y_label : str Label for the y-axis used in plotting. model : scipy.stats._stats_mstats_common.LinregressResult or None Storage for the regression parameters after fitting. """ def __init__(self, x=None, y=None, x_label="Concentration", y_label="Area"): self.x = np.array(x) if x is not None else None self.y = np.array(y) if y is not None else None self.x_label = x_label self.y_label = y_label self.model = None
[docs] @classmethod def from_excel(cls, file_path, x_col=1, y_col=2, sheet_name=0): """ Classmethod to create a QuantitativeAnalysis instance from an Excel or ODS file. Parameters ---------- file_path : str or Path Path to the .xlsx or .ods file. x_col : int or str, optional Column number (starting at 1) or Name (str) for X. Default is 1. y_col : int or str, optional Column number (starting at 1) or Name (str) for Y. Default is 2. sheet_name : str or int, optional The specific sheet to read from. Default is 0 (the first sheet). """ import pandas as pd from pathlib import Path path = Path(file_path) extension = path.suffix.lower() # Selecting the engine based on file extension engine = 'odf' if extension == '.ods' else None print(engine) try: df = pd.read_excel(path, sheet_name=sheet_name, engine=engine) # Handle 1-based indexing for integers, leave strings as is ix = x_col - 1 if isinstance(x_col, int) else x_col iy = y_col - 1 if isinstance(y_col, int) else y_col # Extract data and labels # .iloc for integer positions, direct access for names x_data = df.iloc[:, ix] if isinstance(ix, int) else df[ix] y_data = df.iloc[:, iy] if isinstance(iy, int) else df[iy] x_label = df.columns[ix] if isinstance(ix, int) else ix y_label = df.columns[iy] if isinstance(iy, int) else iy # Return a new instance of the class return cls(x=x_data, y=y_data, x_label=x_label, y_label=y_label) print(x_data, y_data, x_label, y_label) except ImportError as e: missing_pkg = "odfpy" if engine == "odf" else "openpyxl" raise ImportError( f"Missing dependency: install '{missing_pkg}' to read {extension} files." ) from e except Exception as e: raise ValueError(f"Error reading file '{file_path}': {e}")
[docs] def fit_linear(self): """ Perform ordinary least squares (OLS) linear regression and compute analytical diagnostics. This method calculates the best-fit line (y = ax + b) for the calibration data. It computes standard statistical metrics (Slope, Intercept, R²) and specific analytical chemistry performance indicators (MAE, LOD, LOQ) to evaluate the method's sensitivity and reliability. The results are printed to the console in a formatted table and stored within the `self.diagnostics` dictionary for programmatic access. Attributes ---------- model : scipy.stats._stats_mstats_common.LinregressResult The full regression result object containing slope, intercept, rvalue, pvalue, and stderr. diagnostics : dict A dictionary containing key analytical performance indicators: * 'slope_err': Standard error of the slope. * 'intercept_err': Standard error of the intercept. * 'r_squared': Coefficient of determination. * 'mae': Mean Absolute Error. * 'lod': Limit of Detection. * 'loq': Limit of Quantification. * 'sy_x': Residual standard deviation. Notes ----- The following diagnostics are calculated: * **MAE (Mean Absolute Error)**: Average of the absolute residuals, expressed in the same units as the signal (y). * **LOD (Limit of Detection)**: The lowest concentration detectable with statistical significance ($3 \sigma / \text{slope}$). * **LOQ (Limit of Quantification)**: The lowest concentration that can be quantified with acceptable precision ($10 \sigma / \text{slope}$). * **sy_x**: Residual standard deviation, used as an estimate for experimental noise. Example ------- >>> calib = Calibration(conc, area) >>> results = calib.fit_linear() >>> print(calib.diagnostics['lod']) """ # Core regression res = stats.linregress(self.x, self.y) self.model = res slope_err = res.stderr intercept_err = res.intercept_stderr # Predictions and Residuals y_pred = res.slope * self.x + res.intercept residuals = self.y - y_pred # Diagnostics calculations r_squared = res.rvalue**2 mae = np.mean(np.abs(residuals)) # LOD/LOQ calculation (Standard Error of the Estimate) n = len(self.x) sy_x = np.sqrt(np.sum(residuals**2) / (n - 2)) lod = (3 * sy_x) / res.slope loq = (10 * sy_x) / res.slope # Store diagnostics and results self.diagnostics = { 'slope_err': slope_err, 'intercept_err': intercept_err, 'r_squared': r_squared, 'mae': mae, 'lod': lod, 'loq': loq, 'sy_x': sy_x } # --- FORMATTED TABLE OUTPUT --- header = f"{'Parameter':<15} | {'Value':<25} | {'Unit/Note':<15}" sep = "-" * len(header) slope_str = f"{res.slope:.4e} ± {slope_err:.4e}" intercept_str = f"{res.intercept:.4e} ± {intercept_err:.4e}" print(f"\nREGRESSION REPORT: {self.y_label} = f({self.x_label})") print(sep) print(header) print(sep) # On utilise maintenant les versions "_str" préparées print(f"{'Slope':<15} | {slope_str:<25} | (Sensitivity)") print(f"{'Intercept':<15} | {intercept_str:<25} | ({self.y_label})") print(f"{'R²':<15} | {r_squared:<25.5f} | (Linearity)") print(f"{'MAE':<15} | {mae:<25.4e} | ({self.y_label})") print(f"{'LOD':<15} | {lod:<25.4e} | ({self.x_label})") print(f"{'LOQ':<15} | {loq:<25.4e} | ({self.x_label})") print(sep + "\n")
[docs] def plot_calibration(self, save_img=None, figsize=(20,20), scientific=True, marker="o", color_marker="#780000", color_fit="#0b78b3", linestyle="-", fontsize=12): """ Generates a diagnostic plot featuring the calibration curve and residual analysis. The figure consists of two subplots: 1. Top Panel: Regression Line & Uncertainty Displays the experimental data points (x, y), the calculated OLS regression line, and a 95% Prediction Interval. The interval is calculated as 1.96 * sigma, where sigma (sy_x) is the Standard Deviation of the Residuals. 2. Bottom Panel: Residual Analysis Plots the residuals (y_exp - y_calc) against concentration. This panel is essential for checking homoscedasticity. A random distribution around the zero-line validates the linear model, whereas a structured pattern (e.g., U-shape) indicates non-linearity. Parameters ---------- save_img : str, optional Path or filename to save the figure (e.g., 'plot.png', 'report.svg'). If the file exists, a manual confirmation will be requested. Vectorial formats (.svg, .pdf) are recommended for publications. figsize : tuple, default (20, 20) Width and height of the figure, in cm. scientific : bool, default True If True, forces scientific notation (e.g., 1e+05) on the Y-axis. marker : str, default "o" Style of the experimental data points. color_marker : str, default "#780000" Color of the experimental scatter points. color_fit : str, default "#0b78b3" Color of the regression line and the prediction interval. linestyle : str, default "-" Line style for the linear fit. fontsize : int, default 12 Base font size for all text elements in the figure. The titles are set to `fontsize + 2`, while the axis labels use `fontsize`. The legend and tick labels are automatically scaled to `fontsize - 2` and `fontsize - 1` respectively. Raises ------ ValueError If the method is called before fitting the model with `fit_linear()`. Notes ----- The 95% Prediction Interval represents the range within which a new single observation is expected to fall. It is wider than a Confidence Interval as it accounts for both the model uncertainty and the inherent variance (sigma) of the residuals. """ from matplotlib.ticker import ScalarFormatter if self.model is None: raise ValueError("Model not fitted. Call fit_linear() before plotting.") width_in = figsize[0] / 2.54 height_in = figsize[1] / 2.54 # 1. Prepare data x = self.x y = self.y y_pred = self.model.slope * x + self.model.intercept residuals = y - y_pred # 2. Setup Figure (2 rows, 1 column) fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(width_in, height_in), gridspec_kw={'height_ratios': [3, 1]}) # --- TOP PANEL: Regression & Uncertainty --- ax1.scatter(x, y, color=color_marker, marker=marker, label='Experimental Data') ax1.plot(x, y_pred, color=color_fit, linestyle=linestyle, label=f'Linear Fit (R²={self.diagnostics["r_squared"]:.5f})') # Visualizing Uncertainty: Prediction Interval (approximate 95%) # Using the residual standard deviation (sy_x) stored in diagnostics for ax in [ax1, ax2]: if scientific: formatter = ScalarFormatter(useMathText=True) formatter.set_scientific(True) formatter.set_powerlimits((-3, 3)) ax.yaxis.set_major_formatter(formatter) formatter_x = ScalarFormatter(useMathText=True) formatter_x.set_scientific(True) formatter_x.set_powerlimits((-3, 3)) ax.xaxis.set_major_formatter(formatter_x) # Ensure the exponent label (e.g., x10^6) matches your style ax.yaxis.get_offset_text().set_fontsize(fontsize - 2) ax.yaxis.get_offset_text().set_fontweight('bold') ax.xaxis.get_offset_text().set_fontsize(fontsize - 2) ax.xaxis.get_offset_text().set_fontweight('bold') else: # Standard scalar formatting (no scientific notation) ax.yaxis.set_major_formatter(ScalarFormatter(useMathText=False)) ax.tick_params(axis='both', labelsize=fontsize - 1) for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontweight('bold') sy_x = self.diagnostics['sy_x'] prediction_interval = 1.96 * sy_x ax1.fill_between(x, y_pred - prediction_interval, y_pred + prediction_interval, color=color_fit, alpha=0.1, label='95% Prediction Interval') ax1.set_ylabel(self.y_label, fontsize=fontsize) ax1.set_title(f"Calibration Tool: {self.y_label} vs {self.x_label}") ax1.legend(loc='best', fontsize=fontsize-2) ax1.grid(True, linestyle=':', alpha=0.6) # --- BOTTOM PANEL: Residuals --- ax2.scatter(x, residuals, color=color_marker, marker=marker, edgecolor='white', s=50) ax2.axhline(y=0, color='black', linestyle='--', linewidth=1) ax2.set_ylabel("Residuals", fontsize=fontsize) ax2.set_xlabel(self.x_label, fontsize=fontsize) ax2.grid(True, linestyle=':', alpha=0.6) plt.tight_layout() if save_img: save_path = Path(save_img) # Convert string to Path object # 1. Check if the file already exists if save_path.exists(): # Prompt the user for confirmation print(f"{color.RED}{hl.BOLD}⚠️ File '{save_img}' already exists. Overwrite? (y/n): {color.OFF}", end="") response = input().lower().strip() if response != 'y': print("💾 Save cancelled. Plot not saved.") plt.show() return # 2. Security: ensure the target directory exists # save_path.parent gives the directory containing the file save_path.parent.mkdir(parents=True, exist_ok=True) # 3. Universal save plt.savefig(save_path, dpi=300, bbox_inches='tight') print(f"💾 Plot saved as: {hl.BOLD}{save_img}{color.OFF}") plt.show()
[docs] def predict(self, signals, sample_name="Sample"): """ Prediction of concentration statistics from analytical signals. Calculates the mean and standard deviation of the provided signals, applies the inverse linear model, prints a formatted analysis report, and store values in attributes. Parameters ---------- signals : float or list/numpy.ndarray The analytical signal(s) measured (e.g., Absorbance, Area). sample_name : str, optional A name for the sample for the report (default is "Sample"). Notes ----- This method updates the following instance attributes: * **mean_conc**: The calculated mean concentration. * **std_conc**: The estimated uncertainty of the concentration. * **mean_signal**: The average value of the input signals. """ if self.model is None: raise ValueError("Model not fitted. Call fit_linear() first.") # Convert to numpy array for easy math signals = np.atleast_1d(signals) mean_signal = np.mean(signals) std_signal = np.std(signals, ddof=1) if len(signals) > 1 else 0 # Inversion of the equation: x = (y - intercept) / slope conc = (mean_signal - self.model.intercept) / self.model.slope # Simple uncertainty estimation (based on calibration noise sy_x) # For a more rigorous approach, one would use the Working-Hotelling formula sy_x = self.diagnostics['sy_x'] conc_err = (sy_x / self.model.slope) / np.sqrt(len(signals)) # Display result print(f"\n--- ANALYSIS REPORT: {sample_name} ---") if len(signals) > 1: print(f"Signals measured: {len(signals)} replicates") print(f"Mean Signal: {mean_signal:.4e} ± {std_signal:.4e}") else: print(f"Signal measured: {mean_signal:.4e}") print(f"RESULT: {conc:.4e} ± {conc_err:.4e} {self.x_label}") print("-" * 35) self.mean_conc = conc self.std_conc = conc_err self.mean_signal = mean_signal