Source code for locpix.evaluate.metrics

"""Module for calculating performance metrics"""

from locpix.preprocessing import datastruc
import os
import polars as pl
import numpy as np
import warnings


[docs] def metric_calculation(item: datastruc.item, labels): """Calculate TP, TN, FP and FN for df for each label present Args: item (datastruc.item): Item containing the dataframe with gt and pred label labels (list) : List of labels to calculate results for Returns: dict : Results The key is the label and at each index is a dictionary containing TP, TN, FP, FN e.g. {0:{'TP': 0.6, 'FP':.3, ...}, 1:{'TP': 0.4, 'FP':.2, ...}] means we have two labels 0 and 1""" # visualise for checking # visualise(gt_df, pred_df, labels) results = {} # check no duplicates if len(item.df) != len(item.df.unique()): input( "You have duplicates in dataframe - remove this line and \ replace with value error once appropriate" ) for label in labels: tp_df = item.df.filter( (pl.col("gt_label") == label) & (pl.col("pred_label") == label) ) TP = len(tp_df) fp_df = item.df.filter( (pl.col("gt_label") != label) & (pl.col("pred_label") == label) ) FP = len(fp_df) tn_df = item.df.filter( (pl.col("gt_label") != label) & (pl.col("pred_label") != label) ) TN = len(tn_df) fn_df = item.df.filter( (pl.col("gt_label") == label) & (pl.col("pred_label") != label) ) FN = len(fn_df) results[label] = {"TP": TP, "FP": FP, "TN": TN, "FN": FN} return results
[docs] def mean_metrics(results, labels): """Calculate the mean metrics Args: results (dict) : Nested dictionary where each key is a label representing another dictionary containing the number of TP, TN, FP and FN e.g. results[2]["TP"] is number of true positives for label 2 labels (list) : List of labels""" acc_list = [] # empty list will have length = number of labels iou_list = [] recall_list = [] pr_list = [] f1_score = [] for label in labels: TP = results[label]["TP"] FP = results[label]["FP"] TN = results[label]["TN"] FN = results[label]["FN"] acc_list.append((TP + TN) / (TP + TN + FP + FN)) iou_list.append((TP) / (TP + FP + FN)) recall = TP / (TP + FN) if TP + FP == 0: warnings.warn( "No positive values therefore precision and f1 score set to zero" ) precision = 0 f1_score.append(0) else: precision = TP / (TP + FP) f1_score.append((2 * precision * recall) / (precision + recall)) recall_list.append(recall) pr_list.append(precision) macc = np.mean(acc_list) miou = np.mean(iou_list) return iou_list, acc_list, recall_list, pr_list, miou, macc, f1_score
[docs] def aggregated_metrics( files_folder, save_loc, gt_label_map, add_metrics={}, metadata={} ): """Calculate metrics over files, where each file should represent a parquet file which is a datastruc.item Args: files_folder (string): Location in memory of the datastruc.item saved as a .parquet file save_loc (string): where to save the calculated metrics gt_label_map (dict): dictionary where each key is an integer and the value is the associated label metadata (dictionary): Any additional metadata to save Returns: agg_results (dict) : Dictionary containining TP, etc. for labels""" # list items try: files = os.listdir(files_folder) except FileNotFoundError: raise ValueError("There should be some files to open") # create empty aggregated results dictionary, with each key # being a label agg_results = {} labels = gt_label_map.keys() for label in labels: agg_results[label] = {"TP": 0, "FP": 0, "TN": 0, "FN": 0} # for each file want to aggregate TP, TN, FP, FN for each class for file in files: item = datastruc.item(None, None, None, None, None) item.load_from_parquet(os.path.join(files_folder, file)) file_results = metric_calculation(item, labels) # add file results to aggregated results for label in labels: agg_results[label]["TP"] += file_results[label]["TP"] agg_results[label]["TN"] += file_results[label]["TN"] agg_results[label]["FP"] += file_results[label]["FP"] agg_results[label]["FN"] += file_results[label]["FN"] # create empty results dictionary results = {} # calculate macc/miou/oacc iou_list, acc_list, recall_list, pr_list, miou, macc, f1_score = mean_metrics( agg_results, labels ) results["iou_list"] = iou_list results["acc_list"] = acc_list results["recall_list"] = recall_list results["precision_list"] = pr_list results["macc"] = macc results["miou"] = miou results["agg_results"] = agg_results results["f1_score"] = f1_score # calculate oa tp_running_total = 0 fp_running_total = 0 for label in labels: TP = agg_results[label]["TP"] FP = agg_results[label]["FP"] tp_running_total += TP fp_running_total += FP oacc = (tp_running_total) / (tp_running_total + fp_running_total) results["oacc"] = oacc # calculate aucnpr auc = add_metrics["pr_auc"] # assume label 1 is positive label tp = agg_results[1]["TP"] fp = agg_results[1]["FP"] tn = agg_results[1]["TN"] fn = agg_results[1]["FN"] assert agg_results[1]["TP"] == agg_results[0]["TN"] assert agg_results[1]["FP"] == agg_results[0]["FN"] assert agg_results[1]["TN"] == agg_results[0]["TP"] assert agg_results[1]["FN"] == agg_results[0]["FP"] # assume label 1 is positive label ones = tp + fn zeros = fp + tn skew = ones / (zeros + ones) aucprmin = 1 + ((1 - skew) * np.log(1 - skew)) / skew add_metrics["aucnpr"] = (auc - aucprmin) / (1 - aucprmin) # save to .csv in output results # df = pl.DataFrame(results) # df.write_csv(output_results) # sklearn to check and also speed pred_list = np.array([]) gt_list = np.array([]) for file in files: item = datastruc.item(None, None, None, None, None) item.load_from_parquet(os.path.join(files_folder, file)) gt = item.df.select(pl.col("gt_label")).to_numpy() pred = item.df.select(pl.col("pred_label")).to_numpy() gt_list = np.append(gt_list, gt) pred_list = np.append(pred_list, pred) """ print("Sklearn results") acc = sklearn.metrics.accuracy_score(gt_list, pred_list) report = sklearn.metrics.classification_report(gt_list, pred_list) confusion_matrix = sklearn.metrics.confusion_matrix(gt_list, pred_list) prec = sklearn.metrics.precision_score(gt_list, pred_list) recall = sklearn.metrics.recall_score(gt_list, pred_list) print("acc", acc) print("report", report) print("confusion matrix", confusion_matrix) print("prec", prec) print("recall", recall) """ # Create text file and save overall results/configuration to this lines = ["Overall results", "-----------"] sections = [results, add_metrics, metadata] for section in sections: for key, value in section.items(): lines.append(f"{key} : {value}") lines.append(f"gt label map: {gt_label_map}") with open(save_loc, "w") as f: f.writelines("\n".join(lines)) return agg_results