Rename features and add one for missed calls.
parent
e3d735163f
commit
cca5a29483
|
@ -255,13 +255,15 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF
|
||||||
df_calls_sms: pd.DataFrame
|
df_calls_sms: pd.DataFrame
|
||||||
The list of features relating calls and sms data for every participant.
|
The list of features relating calls and sms data for every participant.
|
||||||
These are:
|
These are:
|
||||||
* no_calls_no_sms_ratio:
|
* proportion_calls:
|
||||||
proportion of calls in total number of communications
|
proportion of calls in total number of communications
|
||||||
* no_incoming_calls_no_recieved_sms_ratio:
|
* proportion_calls_incoming:
|
||||||
proportion of incoming calls in total number of incoming/recieved communications
|
proportion of incoming calls in total number of incoming/recieved communications
|
||||||
* no_outgoing_calls_no_sent_sms_ratio:
|
* proportion_calls_outgoing:
|
||||||
proportion of outgoing calls in total number of outgoing/sent communications
|
proportion of outgoing calls in total number of outgoing/sent communications
|
||||||
* no_calls_contacts_no_sms_contacts_ratio:
|
* proportion_calls_missed_sms_received:
|
||||||
|
proportion of missed calls to the number of received messages
|
||||||
|
* proportion_calls_contacts:
|
||||||
proportion of calls contacts in total number of communication contacts
|
proportion of calls contacts in total number of communication contacts
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -274,20 +276,25 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF
|
||||||
) # Merge calls and sms features
|
) # Merge calls and sms features
|
||||||
.reset_index() # Make participant_id a regular column
|
.reset_index() # Make participant_id a regular column
|
||||||
.assign(
|
.assign(
|
||||||
no_calls_no_sms_ratio=(
|
proportion_calls=(
|
||||||
lambda x: x.no_all_calls / (x.no_all_calls + x.no_all_sms)
|
lambda x: x.no_all_calls / (x.no_all_calls + x.no_all_sms)
|
||||||
),
|
),
|
||||||
no_incoming_calls_no_recieved_sms_ratio=(
|
proportion_calls_incoming=(
|
||||||
lambda x: x.no_received / (x.no_incoming + x.no_received)
|
lambda x: x.no_incoming / (x.no_incoming + x.no_received)
|
||||||
),
|
),
|
||||||
no_outgoing_calls_no_sent_sms_ratio=(
|
proportion_calls_missed_sms_received=(
|
||||||
|
lambda x: x.no_missed / (x.no_missed + x.no_received)
|
||||||
|
),
|
||||||
|
proportion_calls_outgoing=(
|
||||||
lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent)
|
lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent)
|
||||||
) # Calculate new features and create additional columns
|
)
|
||||||
|
# Calculate new features and create additional columns
|
||||||
)[
|
)[
|
||||||
["participant_id",
|
["participant_id",
|
||||||
"no_calls_no_sms_ratio",
|
"proportion_calls",
|
||||||
"no_incoming_calls_no_recieved_sms_ratio",
|
"proportion_calls_incoming",
|
||||||
"no_outgoing_calls_no_sent_sms_ratio"]
|
"proportion_calls_outgoing",
|
||||||
|
"proportion_calls_missed_sms_received"]
|
||||||
] # Filter out only the relevant feautres
|
] # Filter out only the relevant feautres
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -300,13 +307,13 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF
|
||||||
) # Merge calls and sms features
|
) # Merge calls and sms features
|
||||||
.reset_index() # Make participand_id a regular column
|
.reset_index() # Make participand_id a regular column
|
||||||
.assign(
|
.assign(
|
||||||
no_calls_contacts_no_sms_contacts_ratio=(
|
proportion_calls_contacts=(
|
||||||
lambda x: x.no_contacts_calls /
|
lambda x: x.no_contacts_calls /
|
||||||
(x.no_contacts_calls + x.no_contacts_sms)
|
(x.no_contacts_calls + x.no_contacts_sms)
|
||||||
) # Calculate new features and create additional columns
|
) # Calculate new features and create additional columns
|
||||||
)[
|
)[
|
||||||
["participant_id",
|
["participant_id",
|
||||||
"no_calls_contacts_no_sms_contacts_ratio"]
|
"proportion_calls_contacts"]
|
||||||
] # Filter out only the relevant feautres
|
] # Filter out only the relevant feautres
|
||||||
# Since we are interested only in some features and ignored
|
# Since we are interested only in some features and ignored
|
||||||
# others, a lot of duplicate rows were created. Remove them.
|
# others, a lot of duplicate rows were created. Remove them.
|
||||||
|
|
Loading…
Reference in New Issue