From cca5a294833d6766663afa43b6c1c84c58c4cd50 Mon Sep 17 00:00:00 2001 From: junos Date: Fri, 6 Aug 2021 18:51:13 +0200 Subject: [PATCH] Rename features and add one for missed calls. --- features/communication.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/features/communication.py b/features/communication.py index a3c917c..0c44259 100644 --- a/features/communication.py +++ b/features/communication.py @@ -255,14 +255,16 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF df_calls_sms: pd.DataFrame The list of features relating calls and sms data for every participant. These are: - * no_calls_no_sms_ratio: + * proportion_calls: proportion of calls in total number of communications - * no_incoming_calls_no_recieved_sms_ratio: + * proportion_calls_incoming: proportion of incoming calls in total number of incoming/recieved communications - * no_outgoing_calls_no_sent_sms_ratio: + * proportion_calls_outgoing: proportion of outgoing calls in total number of outgoing/sent communications - * no_calls_contacts_no_sms_contacts_ratio: - proportion of calls contacts in total number of communication contacts + * proportion_calls_missed_sms_received: + proportion of missed calls to the number of received messages + * proportion_calls_contacts: + proportion of calls contacts in total number of communication contacts """ count_calls = count_comms(df_calls) @@ -274,20 +276,25 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF ) # Merge calls and sms features .reset_index() # Make participant_id a regular column .assign( - no_calls_no_sms_ratio=( + proportion_calls=( lambda x: x.no_all_calls / (x.no_all_calls + x.no_all_sms) ), - no_incoming_calls_no_recieved_sms_ratio=( - lambda x: x.no_received / (x.no_incoming + x.no_received) + proportion_calls_incoming=( + lambda x: x.no_incoming / (x.no_incoming + x.no_received) ), - no_outgoing_calls_no_sent_sms_ratio=( + proportion_calls_missed_sms_received=( + lambda x: x.no_missed / (x.no_missed + x.no_received) + ), + proportion_calls_outgoing=( lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent) - ) # Calculate new features and create additional columns + ) + # Calculate new features and create additional columns )[ ["participant_id", - "no_calls_no_sms_ratio", - "no_incoming_calls_no_recieved_sms_ratio", - "no_outgoing_calls_no_sent_sms_ratio"] + "proportion_calls", + "proportion_calls_incoming", + "proportion_calls_outgoing", + "proportion_calls_missed_sms_received"] ] # Filter out only the relevant feautres ) @@ -300,13 +307,13 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF ) # Merge calls and sms features .reset_index() # Make participand_id a regular column .assign( - no_calls_contacts_no_sms_contacts_ratio=( + proportion_calls_contacts=( lambda x: x.no_contacts_calls / (x.no_contacts_calls + x.no_contacts_sms) ) # Calculate new features and create additional columns )[ ["participant_id", - "no_calls_contacts_no_sms_contacts_ratio"] + "proportion_calls_contacts"] ] # Filter out only the relevant feautres # Since we are interested only in some features and ignored # others, a lot of duplicate rows were created. Remove them.