Rename features and add one for missed calls.

communication
junos 2021-08-06 18:51:13 +02:00
parent e3d735163f
commit cca5a29483
1 changed files with 22 additions and 15 deletions

View File

@ -255,14 +255,16 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF
df_calls_sms: pd.DataFrame df_calls_sms: pd.DataFrame
The list of features relating calls and sms data for every participant. The list of features relating calls and sms data for every participant.
These are: These are:
* no_calls_no_sms_ratio: * proportion_calls:
proportion of calls in total number of communications proportion of calls in total number of communications
* no_incoming_calls_no_recieved_sms_ratio: * proportion_calls_incoming:
proportion of incoming calls in total number of incoming/recieved communications proportion of incoming calls in total number of incoming/recieved communications
* no_outgoing_calls_no_sent_sms_ratio: * proportion_calls_outgoing:
proportion of outgoing calls in total number of outgoing/sent communications proportion of outgoing calls in total number of outgoing/sent communications
* no_calls_contacts_no_sms_contacts_ratio: * proportion_calls_missed_sms_received:
proportion of calls contacts in total number of communication contacts proportion of missed calls to the number of received messages
* proportion_calls_contacts:
proportion of calls contacts in total number of communication contacts
""" """
count_calls = count_comms(df_calls) count_calls = count_comms(df_calls)
@ -274,20 +276,25 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF
) # Merge calls and sms features ) # Merge calls and sms features
.reset_index() # Make participant_id a regular column .reset_index() # Make participant_id a regular column
.assign( .assign(
no_calls_no_sms_ratio=( proportion_calls=(
lambda x: x.no_all_calls / (x.no_all_calls + x.no_all_sms) lambda x: x.no_all_calls / (x.no_all_calls + x.no_all_sms)
), ),
no_incoming_calls_no_recieved_sms_ratio=( proportion_calls_incoming=(
lambda x: x.no_received / (x.no_incoming + x.no_received) lambda x: x.no_incoming / (x.no_incoming + x.no_received)
), ),
no_outgoing_calls_no_sent_sms_ratio=( proportion_calls_missed_sms_received=(
lambda x: x.no_missed / (x.no_missed + x.no_received)
),
proportion_calls_outgoing=(
lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent) lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent)
) # Calculate new features and create additional columns )
# Calculate new features and create additional columns
)[ )[
["participant_id", ["participant_id",
"no_calls_no_sms_ratio", "proportion_calls",
"no_incoming_calls_no_recieved_sms_ratio", "proportion_calls_incoming",
"no_outgoing_calls_no_sent_sms_ratio"] "proportion_calls_outgoing",
"proportion_calls_missed_sms_received"]
] # Filter out only the relevant feautres ] # Filter out only the relevant feautres
) )
@ -300,13 +307,13 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF
) # Merge calls and sms features ) # Merge calls and sms features
.reset_index() # Make participand_id a regular column .reset_index() # Make participand_id a regular column
.assign( .assign(
no_calls_contacts_no_sms_contacts_ratio=( proportion_calls_contacts=(
lambda x: x.no_contacts_calls / lambda x: x.no_contacts_calls /
(x.no_contacts_calls + x.no_contacts_sms) (x.no_contacts_calls + x.no_contacts_sms)
) # Calculate new features and create additional columns ) # Calculate new features and create additional columns
)[ )[
["participant_id", ["participant_id",
"no_calls_contacts_no_sms_contacts_ratio"] "proportion_calls_contacts"]
] # Filter out only the relevant feautres ] # Filter out only the relevant feautres
# Since we are interested only in some features and ignored # Since we are interested only in some features and ignored
# others, a lot of duplicate rows were created. Remove them. # others, a lot of duplicate rows were created. Remove them.