Source code for synthetic_aia_mia.mia.rf

"""MIA using a random forest."""

import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from functools import partial
import tempfile
import os

from ..fetch_data import Dataset

[docs] class MiaRF: """Wrapper arround sklearn random forest""" def __init__(self): self.trained = False
[docs] def fit(self, dadata): """Train a random forest. :parameter data: Dataset the will be used for training. Dataset must contain columns called "loss" used as features and "member" used as labels. :type dadata: fetch_data.Dataset """ data = dadata.load()[["loss","member"]] x = data["loss"].to_numpy().reshape(-1,1) self.scaler = StandardScaler() self.scaler.fit(x) x = self.scaler.transform(x) x = pd.DataFrame(x,columns=["loss"]) data.replace(x,inplace=True) self.model = RandomForestClassifier() self.model.fit(data["loss"].to_numpy().reshape(-1,1),data["member"].to_numpy()) self.trained = True
[docs] def predict(self, dadata): """Use a trained TabularNN to predict label of dataset. :param dadata: Dataset to evaluate. :type dadata: fetch_data.Dataset :return: Input dataset completed with mia result as a column called "mia". :rtype: fetch_data.Dataset """ if not(self.trained): raise AssertionError(f"{self} must be trained prioir to predict") data = dadata.load() yhard = self.model.predict(data["loss"].to_numpy().reshape(-1,1)) data["mia"] = yhard dadata.update(data) return dadata