Add fill values for communication for rows with no calls/smses.

rapids
junos 2021-08-21 18:12:02 +02:00
parent 8507ff5761
commit aed73bb7ed
1 changed files with 35 additions and 16 deletions

View File

@ -8,14 +8,21 @@ from setup import db_engine, session
call_types = {1: "incoming", 2: "outgoing", 3: "missed"}
sms_types = {1: "received", 2: "sent"}
FEATURES_CALLS = (
["no_calls_all"]
+ ["no_" + call_type for call_type in call_types.values()]
+ ["duration_total_" + call_types.get(1), "duration_total_" + call_types.get(2)]
+ ["duration_max_" + call_types.get(1), "duration_max_" + call_types.get(2)]
+ ["no_" + call_types.get(1) + "_ratio", "no_" + call_types.get(2) + "_ratio"]
+ ["no_contacts_calls"]
)
FILL_NA_CALLS = {
"no_calls_all": 0,
"no_" + call_types.get(1): 0,
"no_" + call_types.get(2): 0,
"no_" + call_types.get(3): 0,
"duration_total_" + call_types.get(1): 0,
"duration_total_" + call_types.get(2): 0,
"duration_max_" + call_types.get(1): 0,
"duration_max_" + call_types.get(2): 0,
"no_" + call_types.get(1) + "_ratio": 1 / 3, # Three different types
"no_" + call_types.get(2) + "_ratio": 1 / 3,
"no_contacts_calls": 0,
}
FEATURES_CALLS = list(FILL_NA_CALLS.keys())
# FEATURES_CALLS =
# ["no_calls_all",
@ -23,19 +30,24 @@ FEATURES_CALLS = (
# "duration_total_incoming", "duration_total_outgoing",
# "duration_max_incoming", "duration_max_outgoing",
# "no_incoming_ratio", "no_outgoing_ratio",
# "no_contacts"]
# "no_contacts_calls"]
FILL_NA_SMS = {
"no_sms_all": 0,
"no_" + sms_types.get(1): 0,
"no_" + sms_types.get(2): 0,
"no_" + sms_types.get(1) + "_ratio": 1 / 2, # Two different types
"no_" + sms_types.get(2) + "_ratio": 1 / 2,
"no_contacts_sms": 0,
}
FEATURES_SMS = list(FILL_NA_SMS.keys())
FEATURES_SMS = (
["no_sms_all"]
+ ["no_" + sms_type for sms_type in sms_types.values()]
+ ["no_" + sms_types.get(1) + "_ratio", "no_" + sms_types.get(2) + "_ratio"]
+ ["no_contacts_sms"]
)
# FEATURES_SMS =
# ["no_sms_all",
# "no_received", "no_sent",
# "no_received_ratio", "no_sent_ratio",
# "no_contacts"]
# "no_contacts_sms"]
FEATURES_CALLS_SMS_PROP = [
"proportion_calls_all",
@ -45,8 +57,15 @@ FEATURES_CALLS_SMS_PROP = [
"proportion_calls_missed_sms_received",
]
FILL_NA_CALLS_SMS_PROP = {
key: 1 / 2 for key in FEATURES_CALLS_SMS_PROP
} # All of the form of a / (a + b).
FEATURES_CALLS_SMS_ALL = FEATURES_CALLS + FEATURES_SMS + FEATURES_CALLS_SMS_PROP
FILL_NA_CALLS_SMS_ALL = FILL_NA_CALLS | FILL_NA_SMS | FILL_NA_CALLS_SMS_PROP
# As per PEP-584 a union for dicts was implemented in Python 3.9.0.
def get_call_data(usernames: Collection) -> pd.DataFrame:
"""