diff --git a/features/communication.py b/features/communication.py index b41e068..87594fb 100644 --- a/features/communication.py +++ b/features/communication.py @@ -8,6 +8,21 @@ from setup import db_engine, session call_types = {1: "incoming", 2: "outgoing", 3: "missed"} sms_types = {1: "received", 2: "sent"} +FEATURES_CALLS = ( + ["no_calls_all"] + + ["no_" + call_type for call_type in call_types.values()] + + ["duration_total_" + call_types.get(1), "duration_total_" + call_types.get(2)] + + ["duration_max_" + call_types.get(1), "duration_max_" + call_types.get(2)] + + ["no_" + call_types.get(1) + "_ratio", "no_" + call_types.get(2) + "_ratio"] + + ["no_contacts"] +) + +# FEATURES_CALLS = ["no_all", +# "no_incoming", "no_outgoing", "no_missed", +# "duration_total_incoming", "duration_total_outgoing", +# "duration_max_incoming", "duration_max_outgoing", +# "no_incoming_ratio", "no_outgoing_ratio"] + def get_call_data(usernames: Collection) -> pd.DataFrame: """ @@ -203,7 +218,7 @@ def contact_features(comm_df: pd.DataFrame) -> pd.DataFrame: Parameters ---------- - df_enumerated: pd.DataFrame + comm_df: pd.DataFrame A dataframe of calls or SMSes. Returns @@ -216,7 +231,7 @@ def contact_features(comm_df: pd.DataFrame) -> pd.DataFrame: df_enumerated.groupby(["participant_id", "contact_id"]).size().reset_index() ) # Check whether df contains calls or SMS data since some - # features we want to calculate are type-specyfic + # features we want to calculate are type-specific if "call_duration" in df_enumerated: # Add a column with the total duration of calls between two people duration_count = (