diff --git a/features/communication.py b/features/communication.py index 66dfa07..69eb319 100644 --- a/features/communication.py +++ b/features/communication.py @@ -88,7 +88,20 @@ def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame: def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame: + """ + Calculate frequencies (and duration) of messages (or calls), grouped by their types. + Parameters + ---------- + comm_df: pd.DataFrame + A dataframe of calls or SMSes. + + Returns + ------- + comm_features: pd.DataFrame + A list of communication features for every participant. + + """ if "call_type" in comm_df: comm_counts = ( comm_df.value_counts(subset=["participant_id", "call_type"]) @@ -104,6 +117,11 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame: .add_prefix("duration_") ) comm_features = comm_counts.join(comm_duration) + try: comm_features.drop(columns="duration_" + call_types[3], inplace=True) + # The missed calls are always of 0 duration. + except KeyError: pass + # If there were no missed calls, this exception is raised. + # But we are dropping the column anyway, so no need to deal with the exception. elif "message_type" in comm_df: comm_counts = ( comm_df.value_counts(subset=["participant_id", "message_type"])