diff --git a/features/communication.py b/features/communication.py index 2c8b42b..c5d5056 100644 --- a/features/communication.py +++ b/features/communication.py @@ -164,16 +164,8 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame: .add_prefix("duration_max_") ) # Max call duration by type - comm_contacts_counts = ( - enumerate_contacts(comm_df) - .groupby(["participant_id"]) - .nunique()["contact_id"] - .rename("no_contacts") - ) - # Number of communication contacts comm_features = comm_counts.join(comm_duration_total) comm_features = comm_features.join(comm_duration_max) - comm_features = comm_features.join(comm_contacts_counts) try: comm_features.drop(columns="duration_total_" + call_types[3], inplace=True) comm_features.drop(columns="duration_max_" + call_types[3], inplace=True) @@ -196,16 +188,16 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame: no_sent_ratio=lambda x: x.no_sent / x.no_all, ) # Ratio of incoming and outgoing messages to all messages. - comm_contacts_counts = ( - enumerate_contacts(comm_df) - .groupby(["participant_id"]) - .nunique()["contact_id"] - .rename("no_contacts") - ) - # Number of communication contacts - comm_features = comm_features.join(comm_contacts_counts) else: raise KeyError("The dataframe contains neither call_type or message_type") + comm_contacts_counts = ( + enumerate_contacts(comm_df) + .groupby(["participant_id"]) + .nunique()["contact_id"] + .rename("no_contacts") + ) + # Number of communication contacts + comm_features = comm_features.join(comm_contacts_counts) return comm_features