From 1bc996413e2af3c3f4a93bb13c374f9b55c013e7 Mon Sep 17 00:00:00 2001 From: junos Date: Wed, 18 Aug 2021 15:23:30 +0200 Subject: [PATCH] Clarify names for no_all calls/sms feature. Add another test. --- features/communication.py | 14 +++++++------- test/test_communication.py | 8 +++++++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/features/communication.py b/features/communication.py index c5d5056..30275d3 100644 --- a/features/communication.py +++ b/features/communication.py @@ -141,11 +141,11 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame: .add_prefix("no_") ) # Count calls by type. - comm_counts["no_all"] = comm_counts.sum(axis=1) + comm_counts["no_calls_all"] = comm_counts.sum(axis=1) # Add a total count of calls. comm_counts = comm_counts.assign( - no_incoming_ratio=lambda x: x.no_incoming / x.no_all, - no_outgoing_ratio=lambda x: x.no_outgoing / x.no_all, + no_incoming_ratio=lambda x: x.no_incoming / x.no_calls_all, + no_outgoing_ratio=lambda x: x.no_outgoing / x.no_calls_all, ) # Ratio of incoming and outgoing calls to all calls. comm_duration_total = ( @@ -181,11 +181,11 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame: .rename(columns=sms_types) .add_prefix("no_") ) - comm_counts["no_all"] = comm_counts.sum(axis=1) + comm_counts["no_sms_all"] = comm_counts.sum(axis=1) # Add a total count of messages. comm_features = comm_counts.assign( - no_received_ratio=lambda x: x.no_received / x.no_all, - no_sent_ratio=lambda x: x.no_sent / x.no_all, + no_received_ratio=lambda x: x.no_received / x.no_sms_all, + no_sent_ratio=lambda x: x.no_sent / x.no_sms_all, ) # Ratio of incoming and outgoing messages to all messages. else: @@ -280,7 +280,7 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF .reset_index() # Make participant_id a regular column .assign( proportion_calls=( - lambda x: x.no_all_calls / (x.no_all_calls + x.no_all_sms) + lambda x: x.no_calls_all / (x.no_calls_all + x.no_sms_all) ), proportion_calls_incoming=( lambda x: x.no_incoming / (x.no_incoming + x.no_received) diff --git a/test/test_communication.py b/test/test_communication.py index a767255..9e951d2 100644 --- a/test/test_communication.py +++ b/test/test_communication.py @@ -5,7 +5,7 @@ import pandas as pd from numpy.random import default_rng from pandas.testing import assert_series_equal -from features.communication import count_comms, enumerate_contacts, get_call_data +from features.communication import * rng = default_rng() @@ -78,8 +78,14 @@ class CallsFeatures(unittest.TestCase): self.features = count_comms(self.calls) print(self.features) self.assertIsInstance(self.features, pd.DataFrame) + self.assertCountEqual(self.features.columns.to_list(), FEATURES_CALLS) def test_count_comms_sms(self): self.features = count_comms(self.sms) print(self.features) self.assertIsInstance(self.features, pd.DataFrame) + + def test_calls_sms_features(self): + self.features_call_sms = calls_sms_features(self.calls, self.sms) + print(self.features_call_sms.columns) + self.assertIsInstance(self.features_call_sms, pd.DataFrame)