From aed73bb7ed0adb4da4f05aadfa30cae95460b7bd Mon Sep 17 00:00:00 2001 From: junos Date: Sat, 21 Aug 2021 18:12:02 +0200 Subject: [PATCH] Add fill values for communication for rows with no calls/smses. --- features/communication.py | 51 +++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/features/communication.py b/features/communication.py index d92bd29..e9d5c37 100644 --- a/features/communication.py +++ b/features/communication.py @@ -8,14 +8,21 @@ from setup import db_engine, session call_types = {1: "incoming", 2: "outgoing", 3: "missed"} sms_types = {1: "received", 2: "sent"} -FEATURES_CALLS = ( - ["no_calls_all"] - + ["no_" + call_type for call_type in call_types.values()] - + ["duration_total_" + call_types.get(1), "duration_total_" + call_types.get(2)] - + ["duration_max_" + call_types.get(1), "duration_max_" + call_types.get(2)] - + ["no_" + call_types.get(1) + "_ratio", "no_" + call_types.get(2) + "_ratio"] - + ["no_contacts_calls"] -) +FILL_NA_CALLS = { + "no_calls_all": 0, + "no_" + call_types.get(1): 0, + "no_" + call_types.get(2): 0, + "no_" + call_types.get(3): 0, + "duration_total_" + call_types.get(1): 0, + "duration_total_" + call_types.get(2): 0, + "duration_max_" + call_types.get(1): 0, + "duration_max_" + call_types.get(2): 0, + "no_" + call_types.get(1) + "_ratio": 1 / 3, # Three different types + "no_" + call_types.get(2) + "_ratio": 1 / 3, + "no_contacts_calls": 0, +} + +FEATURES_CALLS = list(FILL_NA_CALLS.keys()) # FEATURES_CALLS = # ["no_calls_all", @@ -23,19 +30,24 @@ FEATURES_CALLS = ( # "duration_total_incoming", "duration_total_outgoing", # "duration_max_incoming", "duration_max_outgoing", # "no_incoming_ratio", "no_outgoing_ratio", -# "no_contacts"] +# "no_contacts_calls"] + +FILL_NA_SMS = { + "no_sms_all": 0, + "no_" + sms_types.get(1): 0, + "no_" + sms_types.get(2): 0, + "no_" + sms_types.get(1) + "_ratio": 1 / 2, # Two different types + "no_" + sms_types.get(2) + "_ratio": 1 / 2, + "no_contacts_sms": 0, +} + +FEATURES_SMS = list(FILL_NA_SMS.keys()) -FEATURES_SMS = ( - ["no_sms_all"] - + ["no_" + sms_type for sms_type in sms_types.values()] - + ["no_" + sms_types.get(1) + "_ratio", "no_" + sms_types.get(2) + "_ratio"] - + ["no_contacts_sms"] -) # FEATURES_SMS = # ["no_sms_all", # "no_received", "no_sent", # "no_received_ratio", "no_sent_ratio", -# "no_contacts"] +# "no_contacts_sms"] FEATURES_CALLS_SMS_PROP = [ "proportion_calls_all", @@ -45,8 +57,15 @@ FEATURES_CALLS_SMS_PROP = [ "proportion_calls_missed_sms_received", ] +FILL_NA_CALLS_SMS_PROP = { + key: 1 / 2 for key in FEATURES_CALLS_SMS_PROP +} # All of the form of a / (a + b). + FEATURES_CALLS_SMS_ALL = FEATURES_CALLS + FEATURES_SMS + FEATURES_CALLS_SMS_PROP +FILL_NA_CALLS_SMS_ALL = FILL_NA_CALLS | FILL_NA_SMS | FILL_NA_CALLS_SMS_PROP +# As per PEP-584 a union for dicts was implemented in Python 3.9.0. + def get_call_data(usernames: Collection) -> pd.DataFrame: """