diff --git a/features/communication.py b/features/communication.py index a2acfdb..771e08e 100644 --- a/features/communication.py +++ b/features/communication.py @@ -164,8 +164,13 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame: .rename(columns=sms_types) .add_prefix("no_") ) - comm_features = comm_counts - # TODO Add ratio of outgoing and incoming texts. + comm_counts["no_all"] = comm_counts.sum(axis=1) + # Add a total count of messages. + comm_features = comm_counts.assign( + no_received_ratio=lambda x: x.no_received / x.no_all, + no_sent_ratio=lambda x: x.no_sent / x.no_all, + ) + # Ratio of incoming and outgoing messages to all messages. else: raise KeyError("The dataframe contains neither call_type or message_type") return comm_features diff --git a/test/test_communication.py b/test/test_communication.py index 6d6d751..a767255 100644 --- a/test/test_communication.py +++ b/test/test_communication.py @@ -24,7 +24,7 @@ class CallsFeatures(unittest.TestCase): axis=None, ) rng.shuffle(callers) - cls.calls = pd.DataFrame( + cls.comm = pd.DataFrame( { "id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4") + 100, "_id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4"), @@ -32,12 +32,17 @@ class CallsFeatures(unittest.TestCase): rng.integers(1612169903000, 1614556703000, size=call_rows) ), "device_id": "device1", - "call_type": rng.integers(1, 3, size=call_rows, endpoint=True), - "call_duration": rng.integers(0, 600, size=call_rows), - "trace": callers, "participant_id": 29, + "trace": callers, } ) + cls.calls = cls.comm.assign( + call_type=rng.integers(1, 3, size=call_rows, endpoint=True), + call_duration=rng.integers(0, 600, size=call_rows), + ) + cls.sms = cls.comm.assign( + message_type=rng.integers(1, 2, size=call_rows, endpoint=True) + ) @classmethod def assertSeriesEqual(cls, a, b, msg=None, **optional): @@ -69,7 +74,12 @@ class CallsFeatures(unittest.TestCase): check_category_order=False, ) - def test_count_comms(self): + def test_count_comms_calls(self): self.features = count_comms(self.calls) print(self.features) self.assertIsInstance(self.features, pd.DataFrame) + + def test_count_comms_sms(self): + self.features = count_comms(self.sms) + print(self.features) + self.assertIsInstance(self.features, pd.DataFrame)