Compile a list of contact features and add a test.

communication
junos 2021-08-18 15:35:42 +02:00
parent c88336481e
commit 2d78aacd18
2 changed files with 34 additions and 35 deletions

View File

@ -37,6 +37,14 @@ FEATURES_SMS = (
# "no_received_ratio", "no_sent_ratio", # "no_received_ratio", "no_sent_ratio",
# "no_contacts"] # "no_contacts"]
FEATURES_CONTACT = [
"proportion_calls_all",
"proportion_calls_incoming",
"proportion_calls_outgoing",
"proportion_calls_contacts",
"proportion_calls_missed_sms_received",
]
def get_call_data(usernames: Collection) -> pd.DataFrame: def get_call_data(usernames: Collection) -> pd.DataFrame:
""" """
@ -287,13 +295,10 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF
""" """
count_calls = count_comms(df_calls) count_calls = count_comms(df_calls)
count_sms = count_comms(df_sms) count_sms = count_comms(df_sms)
count_joined = ( count_joined = count_calls.merge(
count_calls.merge(
count_sms, on="participant_id", suffixes=("_calls", "_sms") count_sms, on="participant_id", suffixes=("_calls", "_sms")
) # Merge calls and sms features ).assign( # Merge calls and sms features
.reset_index() # Make participant_id a regular column proportion_calls_all=(
.assign(
proportion_calls=(
lambda x: x.no_calls_all / (x.no_calls_all + x.no_sms_all) lambda x: x.no_calls_all / (x.no_calls_all + x.no_sms_all)
), ),
proportion_calls_incoming=( proportion_calls_incoming=(
@ -306,19 +311,10 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF
lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent) lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent)
), ),
proportion_calls_contacts=( proportion_calls_contacts=(
lambda x: x.no_contacts_calls lambda x: x.no_contacts_calls / (x.no_contacts_calls + x.no_contacts_sms)
/ (x.no_contacts_calls + x.no_contacts_sms)
) )
# Calculate new features and create additional columns # Calculate new features and create additional columns
)[ )[
[ FEATURES_CONTACT
"participant_id",
"proportion_calls",
"proportion_calls_incoming",
"proportion_calls_outgoing",
"proportion_calls_contacts",
"proportion_calls_missed_sms_received",
]
] # Filter out only the relevant features ] # Filter out only the relevant features
)
return count_joined return count_joined

View File

@ -86,5 +86,8 @@ class CallsFeatures(unittest.TestCase):
def test_calls_sms_features(self): def test_calls_sms_features(self):
self.features_call_sms = calls_sms_features(self.calls, self.sms) self.features_call_sms = calls_sms_features(self.calls, self.sms)
print(self.features_call_sms.columns) print(self.features_call_sms)
self.assertIsInstance(self.features_call_sms, pd.DataFrame) self.assertIsInstance(self.features_call_sms, pd.DataFrame)
self.assertCountEqual(
self.features_call_sms.columns.to_list(), FEATURES_CONTACT
)