import pandas as pd
from tables import (
model_to_dataframe, process_main_data, process_inclusion_criteria, process_source_disclosure_data,
process_incident_type_data, process_receivers_data, process_attributions_data, process_attributions_bases_data,
process_attributions_types_data, process_attribution_countries_data, process_attribution_actors_data,
process_attribution_companies_data, process_attribution_legal_references_data, process_initiators_data,
process_initiators_categories_data, process_cyber_conflict_issues_data, process_offline_conflict_issues_data,
process_offline_conflict_intensities_data, process_political_responses_data, process_political_responses_type_data,
process_technical_codings_data, process_cyber_intensity_data, process_mitre_initial_access_data,
process_mitre_impact_data, process_impact_indicator_data, process_legal_codings_data,
process_il_breach_indicator_data, process_legal_responses_data, process_legal_responses_type_data,
process_source_urls_data, process_source_names_data, process_sources_attributions_data, clean_initiators)
[docs]
class IncidentDataFrames:
"""
A class used for returning pandas DataFrames for specific variables/aspects of the incident to enable analysis.
The different dfs can subsequently be joined through the `incident_id` column. Refer to the EuRepoC Codebook
for more information about the substantive meaning of each variable (i.e. df column).
Parameters:
incident_data (list): List of incident data dictionaries, retrieved by using the `get_data()` method of the `IncidentDataFrames` class.
Note
----------
The data within the returned dfs are "exploded" over multiple rows for variables that contain multiple
possible values. When performing analyses with pandas counting numbers of incidents, remember to use
the `nunique()` method instead of the `count()` method.
"""
def __init__(self, incident_data=None):
self.data = incident_data
[docs]
def main_data(self):
"""Columns: incident_id, name, description, added_to_db, start_date, end_date, operation_type, status, updated_at,
number_attributions, number_political_responses, number_legal_responses, casualties"""
return model_to_dataframe(process_main_data(self.data))
[docs]
def inclusion_criteria(self):
"""Columns: incident_id, inclusion_criterion, inclusion_criterion_subcode"""
return model_to_dataframe(process_inclusion_criteria(self.data))
[docs]
def source_of_disclosure(self):
"""Columns: incident_id, source_disclosure"""
return model_to_dataframe(process_source_disclosure_data(self.data))
[docs]
def operation_types(self):
"""Columns: incident_id, operation_type"""
operation_types = []
for incident in self.data:
incident_dict = {
'incident_id': incident['id'],
'operation_type': incident['incident_type_clean'],
}
operation_types.append(incident_dict)
return pd.DataFrame(operation_types)
[docs]
def incident_types(self):
"""Columns: incident_id, incident_type"""
return model_to_dataframe(process_incident_type_data(self.data))
[docs]
def receivers(self):
"""Columns: receiver_id, incident_id, receiver_name, receiver_country, receiver_country_alpha_2_code, receiver_country_alpha_3_code,
receiver_category, receiver_subcategory, receiver_regions (this is a list)"""
return model_to_dataframe(process_receivers_data(self.data))
[docs]
def attributions(self):
"""Columns: attribution_id, incident_id, settled_attribution, attribution_date, attribution_updated_at,
attribution_created_at, attribution_basis, attribution_type, attribution_subtype, attributing_country, attributing_actor,
attributing_company, attribution_legal_reference, attribution_legal_reference_subcode, initiator_id,
initiator_name, initiator_country, initiator_category, initiator_subcategory"""
attributions_main = model_to_dataframe(process_attributions_data(self.data))
attribution_bases = model_to_dataframe(process_attributions_bases_data(self.data))
attribution_types = model_to_dataframe(process_attributions_types_data(self.data))
attributing_country = model_to_dataframe(process_attribution_countries_data(self.data))
attributing_actors = model_to_dataframe(process_attribution_actors_data(self.data))
attributing_companies = model_to_dataframe(process_attribution_companies_data(self.data))
attribution_legal_ref = model_to_dataframe(process_attribution_legal_references_data(self.data))
attributions_df = attributions_main.merge(attribution_bases, on='attribution_id', how='outer')
attributions_df = attributions_df.merge(attribution_types, on='attribution_id', how='outer')
attributions_df = attributions_df.merge(attributing_country, on='attribution_id', how='outer')
attributions_df = attributions_df.merge(attributing_actors, on='attribution_id', how='outer')
attributions_df = attributions_df.merge(attributing_companies, on='attribution_id', how='outer')
attributions_df = attributions_df.merge(attribution_legal_ref, on='attribution_id', how='outer')
initiators_data = self.initiators(settled=False)
attributions_df = attributions_df.merge(initiators_data, on='attribution_id', how='outer', suffixes=('', '_drop'))
attributions_df = attributions_df.drop(columns=[col for col in attributions_df if col.endswith('_drop')])
return attributions_df.drop_duplicates()
[docs]
def initiators(self, settled=True):
"""Columns: initiator_id, incident_id, attribution_id, settled_initiator, initiator_name,
initiator_country, initiator_category, initiator_subcategory"""
initiator_main = model_to_dataframe(process_initiators_data(self.data))
initiator_categories = model_to_dataframe(process_initiators_categories_data(self.data))
initiators_df = initiator_main.merge(initiator_categories, on='initiator_id', how='outer')
return clean_initiators(initiators_df.drop_duplicates(), settled=settled)
[docs]
def cyber_conflict_issues(self):
"""Columns: incident_id, cyber_conflict_issue"""
return model_to_dataframe(process_cyber_conflict_issues_data(self.data))
[docs]
def offline_conflicts(self):
"""Columns: incident_id, offline_conflict_issue, offline_conflict_name, offline_conflict_intensity,
offline_conflict_intensity_subcode"""
issues = model_to_dataframe(process_offline_conflict_issues_data(self.data))
conflict_intensity = model_to_dataframe(process_offline_conflict_intensities_data(self.data))
conflicts_df = issues.merge(conflict_intensity, on='incident_id', how='outer')
return conflicts_df.drop_duplicates()
[docs]
def political_responses(self):
"""Columns: political_response_id, incident_id, political_response_date, political_response_responding_country,
political_response_responding_actor, political_response_type, political_response_subtype"""
pol_responses_main = model_to_dataframe(process_political_responses_data(self.data))
pol_responses_type = model_to_dataframe(process_political_responses_type_data(self.data))
pol_responses_df = pol_responses_main.merge(
pol_responses_type,
on='political_response_id',
how='outer',
suffixes=('', '_drop')
)
cleaned_df = pol_responses_df.drop_duplicates().drop(columns=[col for col in pol_responses_df if col.endswith('_drop')])
return cleaned_df
[docs]
def technical_variables(self):
"""Columns: incident_id, zero_days, zero_days_subcode, has_disruption, user_interaction"""
return model_to_dataframe(process_technical_codings_data(self.data))
[docs]
def cyber_intensity(self):
"""Columns: incident_id, disruption, hijacking, data_theft, physical_effects_spatial, physical_effects_temporal,
target_multiplier, unweighted_intensity, weighted_intensity"""
return model_to_dataframe(process_cyber_intensity_data(self.data))
[docs]
def mitre_initial_access(self):
"""Columns: incident_id, mitre_initial_access"""
return model_to_dataframe(process_mitre_initial_access_data(self.data))
[docs]
def mitre_impact(self):
"""Columns: incident_id, mitre_impact"""
return model_to_dataframe(process_mitre_impact_data(self.data))
[docs]
def impact_indicator(self):
"""Columns: incident_id, impact_indicator_score, impact_indicator_label, functional_impact, intelligence_impact,
economic_impact, economic_impact_value, economic_impact_currency, affected_entities, affected_entities_value,
affected_eu_countries, affected_eu_countries_value, affected_third_countries, affected_third_countries_value"""
return model_to_dataframe(process_impact_indicator_data(self.data))
[docs]
def legal_variables(self):
"""Columns: incident_id, state_responsibility_actor, evidence_for_sanctions_indicator, response_indicator"""
return model_to_dataframe(process_legal_codings_data(self.data))
[docs]
def il_breach_indicator(self):
"""Columns: incident_id, il_breach_indicator, il_breach_indicator_subcode"""
return model_to_dataframe(process_il_breach_indicator_data(self.data))
[docs]
def legal_responses(self):
"""Columns: legal_response_id, incident_id, legal_response_date, legal_response_responding_country,
legal_response_responding_actor, legal_response_type, legal_response_subtype"""
leg_responses_main = model_to_dataframe(process_legal_responses_data(self.data))
leg_responses_type = model_to_dataframe(process_legal_responses_type_data(self.data))
leg_responses_df = leg_responses_main.merge(
leg_responses_type,
on='legal_response_id',
how='outer',
suffixes=('', '_drop')
)
cleaned_df = leg_responses_df.drop_duplicates().drop(columns=[col for col in leg_responses_df if col.endswith('_drop')])
return cleaned_df
[docs]
def sources_urls(self):
"""Columns: source_urls_id, incident_id, source_url"""
return model_to_dataframe(process_source_urls_data(self.data))
[docs]
def source_names(self):
"""Columns: source_url_id, incident_id, source_name"""
return model_to_dataframe(process_source_names_data(self.data))
[docs]
def attribution_sources(self):
"""Columns: incident_id, attribution_source_url"""
return model_to_dataframe(process_sources_attributions_data(self.data))