Module PdmContext.utils.simulate_stream
Expand source code
import pandas as pd
class simulate_stream():
def __init__(self,TimeSeries:[(str,list[float],list[pd.Timestamp])],Events:[(str,list[pd.Timestamp],str)],targetname):
"""
This method simulate the traffic of the data
**Parameters**:
**TimeSeries**: A list of time series data in form of (name,list with data, list of timestamp)
**Events**: A list of events in form of (name, list of timestamp when the event occured,type of the Event (isolated or configuration)) as expected in ContextGenerator.
"""
# build dataframe with columns dt, name, value, type
dict_df={
"dt":[],
"name":[],
"value":[],
"type":[]
}
for series in TimeSeries:
name = series[0]
data = series[1]
times = series[2]
for dd,dtt in zip(data,times):
dict_df["dt"].append(dtt)
dict_df["name"].append(name)
dict_df["value"].append(dd)
dict_df["type"].append("Univariate")
for series in Events:
name = series[0]
times = series[1]
type = series[2]
for dtt in times:
dict_df["dt"].append(dtt)
dict_df["name"].append(name)
dict_df["value"].append(None)
dict_df["type"].append(type)
self.df = pd.DataFrame(dict_df)
self.df.sort_values(by="dt",inplace=True)
self.uniquedates=self.df["dt"].unique()
list(self.uniquedates).sort()
self.targetname=targetname
self.liststream=[]
self.current=0
for date in self.uniquedates:
rows = self.df[self.df["dt"] == date]
keep = {}
for ind, row in rows.iterrows():
if row["name"] == self.targetname:
keep["name"] = row["name"]
keep["type"] = row["type"]
keep["dt"] = row["dt"]
keep["value"] = row["value"]
else:
if row["type"] != "Univariate":
tempvalue = None
else:
tempvalue = row["value"]
self.liststream.append({"name": row["name"], "type": row["type"], "timestamp": row["dt"], "value": tempvalue})
if len(keep.keys()):
self.liststream.append({"name": keep["name"], "type": keep["type"], "timestamp": keep["dt"], "value": keep["value"]})
def __iter__(self):
return self
def __next__(self):
self.current += 1
if self.current < len(self.liststream):
return self.liststream[self.current]
raise StopIteration
def simulate_from_df(df: pd.DataFrame,eventTypes :[(str,str)],target_name: str):
'''
**Parameters**:
**df**: Dataframe with the data to simulate stream, the index has to be of Date type.
**eventTypes**: Which columns are representing events and of what type example [("column1","isolated"),("column3","configuration")]
**target_name**: the target name data
**return**: an itterator
'''
#TimeSeries:[(str,list[float],list[pd.Timestamp])]
Events=[]
dropcols=[]
for col,type in eventTypes:
timestamps=[dt for dt in df[df[col]==1].index]
Events.append((col,timestamps,type))
dropcols.append(col)
if len(dropcols)>0:
dfn=df.drop(dropcols,axis=1)
else:
dfn=df
timeseries=[]
for col in dfn.columns:
timeseries.append((col, dfn[col].values, [dt for dt in dfn.index]))
return simulate_stream(TimeSeries=timeseries,Events=Events,targetname=target_name)
Functions
def simulate_from_df(df: pandas.core.frame.DataFrame, eventTypes: [(
, )], target_name: str) -
Parameters:
df: Dataframe with the data to simulate stream, the index has to be of Date type.
eventTypes: Which columns are representing events and of what type example [("column1","isolated"),("column3","configuration")]
target_name: the target name data
return: an itterator
Expand source code
def simulate_from_df(df: pd.DataFrame,eventTypes :[(str,str)],target_name: str): ''' **Parameters**: **df**: Dataframe with the data to simulate stream, the index has to be of Date type. **eventTypes**: Which columns are representing events and of what type example [("column1","isolated"),("column3","configuration")] **target_name**: the target name data **return**: an itterator ''' #TimeSeries:[(str,list[float],list[pd.Timestamp])] Events=[] dropcols=[] for col,type in eventTypes: timestamps=[dt for dt in df[df[col]==1].index] Events.append((col,timestamps,type)) dropcols.append(col) if len(dropcols)>0: dfn=df.drop(dropcols,axis=1) else: dfn=df timeseries=[] for col in dfn.columns: timeseries.append((col, dfn[col].values, [dt for dt in dfn.index])) return simulate_stream(TimeSeries=timeseries,Events=Events,targetname=target_name)
Classes
class simulate_stream (TimeSeries: [(
, list[float], list[pandas._libs.tslibs.timestamps.Timestamp])], Events: [( , list[pandas._libs.tslibs.timestamps.Timestamp], )], targetname) -
This method simulate the traffic of the data
Parameters:
TimeSeries: A list of time series data in form of (name,list with data, list of timestamp)
Events: A list of events in form of (name, list of timestamp when the event occured,type of the Event (isolated or configuration)) as expected in ContextGenerator.
Expand source code
class simulate_stream(): def __init__(self,TimeSeries:[(str,list[float],list[pd.Timestamp])],Events:[(str,list[pd.Timestamp],str)],targetname): """ This method simulate the traffic of the data **Parameters**: **TimeSeries**: A list of time series data in form of (name,list with data, list of timestamp) **Events**: A list of events in form of (name, list of timestamp when the event occured,type of the Event (isolated or configuration)) as expected in ContextGenerator. """ # build dataframe with columns dt, name, value, type dict_df={ "dt":[], "name":[], "value":[], "type":[] } for series in TimeSeries: name = series[0] data = series[1] times = series[2] for dd,dtt in zip(data,times): dict_df["dt"].append(dtt) dict_df["name"].append(name) dict_df["value"].append(dd) dict_df["type"].append("Univariate") for series in Events: name = series[0] times = series[1] type = series[2] for dtt in times: dict_df["dt"].append(dtt) dict_df["name"].append(name) dict_df["value"].append(None) dict_df["type"].append(type) self.df = pd.DataFrame(dict_df) self.df.sort_values(by="dt",inplace=True) self.uniquedates=self.df["dt"].unique() list(self.uniquedates).sort() self.targetname=targetname self.liststream=[] self.current=0 for date in self.uniquedates: rows = self.df[self.df["dt"] == date] keep = {} for ind, row in rows.iterrows(): if row["name"] == self.targetname: keep["name"] = row["name"] keep["type"] = row["type"] keep["dt"] = row["dt"] keep["value"] = row["value"] else: if row["type"] != "Univariate": tempvalue = None else: tempvalue = row["value"] self.liststream.append({"name": row["name"], "type": row["type"], "timestamp": row["dt"], "value": tempvalue}) if len(keep.keys()): self.liststream.append({"name": keep["name"], "type": keep["type"], "timestamp": keep["dt"], "value": keep["value"]}) def __iter__(self): return self def __next__(self): self.current += 1 if self.current < len(self.liststream): return self.liststream[self.current] raise StopIteration