From 414b30b7e1a5bfc65ec37f34724cc718e0ef6359 Mon Sep 17 00:00:00 2001 From: junos Date: Tue, 6 Apr 2021 16:54:08 +0200 Subject: [PATCH] Remove missing call duration and document. --- features/communication.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/features/communication.py b/features/communication.py index 66dfa07..69eb319 100644 --- a/features/communication.py +++ b/features/communication.py @@ -88,7 +88,20 @@ def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame: def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame: + """ + Calculate frequencies (and duration) of messages (or calls), grouped by their types. + Parameters + ---------- + comm_df: pd.DataFrame + A dataframe of calls or SMSes. + + Returns + ------- + comm_features: pd.DataFrame + A list of communication features for every participant. + + """ if "call_type" in comm_df: comm_counts = ( comm_df.value_counts(subset=["participant_id", "call_type"]) @@ -104,6 +117,11 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame: .add_prefix("duration_") ) comm_features = comm_counts.join(comm_duration) + try: comm_features.drop(columns="duration_" + call_types[3], inplace=True) + # The missed calls are always of 0 duration. + except KeyError: pass + # If there were no missed calls, this exception is raised. + # But we are dropping the column anyway, so no need to deal with the exception. elif "message_type" in comm_df: comm_counts = ( comm_df.value_counts(subset=["participant_id", "message_type"])