From 2d78aacd18d520706dd8c5b94c09f6986601c481 Mon Sep 17 00:00:00 2001 From: junos Date: Wed, 18 Aug 2021 15:35:42 +0200 Subject: [PATCH] Compile a list of contact features and add a test. --- features/communication.py | 64 ++++++++++++++++++-------------------- test/test_communication.py | 5 ++- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/features/communication.py b/features/communication.py index ded410c..0009b16 100644 --- a/features/communication.py +++ b/features/communication.py @@ -37,6 +37,14 @@ FEATURES_SMS = ( # "no_received_ratio", "no_sent_ratio", # "no_contacts"] +FEATURES_CONTACT = [ + "proportion_calls_all", + "proportion_calls_incoming", + "proportion_calls_outgoing", + "proportion_calls_contacts", + "proportion_calls_missed_sms_received", +] + def get_call_data(usernames: Collection) -> pd.DataFrame: """ @@ -287,38 +295,26 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF """ count_calls = count_comms(df_calls) count_sms = count_comms(df_sms) - count_joined = ( - count_calls.merge( - count_sms, on="participant_id", suffixes=("_calls", "_sms") - ) # Merge calls and sms features - .reset_index() # Make participant_id a regular column - .assign( - proportion_calls=( - lambda x: x.no_calls_all / (x.no_calls_all + x.no_sms_all) - ), - proportion_calls_incoming=( - lambda x: x.no_incoming / (x.no_incoming + x.no_received) - ), - proportion_calls_missed_sms_received=( - lambda x: x.no_missed / (x.no_missed + x.no_received) - ), - proportion_calls_outgoing=( - lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent) - ), - proportion_calls_contacts=( - lambda x: x.no_contacts_calls - / (x.no_contacts_calls + x.no_contacts_sms) - ) - # Calculate new features and create additional columns - )[ - [ - "participant_id", - "proportion_calls", - "proportion_calls_incoming", - "proportion_calls_outgoing", - "proportion_calls_contacts", - "proportion_calls_missed_sms_received", - ] - ] # Filter out only the relevant features - ) + count_joined = count_calls.merge( + count_sms, on="participant_id", suffixes=("_calls", "_sms") + ).assign( # Merge calls and sms features + proportion_calls_all=( + lambda x: x.no_calls_all / (x.no_calls_all + x.no_sms_all) + ), + proportion_calls_incoming=( + lambda x: x.no_incoming / (x.no_incoming + x.no_received) + ), + proportion_calls_missed_sms_received=( + lambda x: x.no_missed / (x.no_missed + x.no_received) + ), + proportion_calls_outgoing=( + lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent) + ), + proportion_calls_contacts=( + lambda x: x.no_contacts_calls / (x.no_contacts_calls + x.no_contacts_sms) + ) + # Calculate new features and create additional columns + )[ + FEATURES_CONTACT + ] # Filter out only the relevant features return count_joined diff --git a/test/test_communication.py b/test/test_communication.py index bbc0011..67db5cc 100644 --- a/test/test_communication.py +++ b/test/test_communication.py @@ -86,5 +86,8 @@ class CallsFeatures(unittest.TestCase): def test_calls_sms_features(self): self.features_call_sms = calls_sms_features(self.calls, self.sms) - print(self.features_call_sms.columns) + print(self.features_call_sms) self.assertIsInstance(self.features_call_sms, pd.DataFrame) + self.assertCountEqual( + self.features_call_sms.columns.to_list(), FEATURES_CONTACT + )