From 9e16d329dd944f6a6e609e12aa9c919e6b2e6bee Mon Sep 17 00:00:00 2001 From: junos Date: Tue, 6 Apr 2021 11:48:37 +0200 Subject: [PATCH] Improve formatting. --- features/communication.py | 6 ++++- test/test_communication.py | 46 ++++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/features/communication.py b/features/communication.py index f2453df..eb4d536 100644 --- a/features/communication.py +++ b/features/communication.py @@ -19,7 +19,11 @@ def get_call_data(usernames: List) -> pd.DataFrame: def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame: """" Count contacts (callers, senders) and enumerate them by their frequency. """ - contact_counts = comm_df["trace"].value_counts(sort=True, ascending=False).to_frame(name="frequency") + contact_counts = ( + comm_df["trace"] + .value_counts(sort=True, ascending=False) + .to_frame(name="frequency") + ) # A frequency table of different traces (contacts). contact_counts["contact_id"] = list(range(len(contact_counts.index))) contact_code = contact_counts["contact_id"].to_dict() diff --git a/test/test_communication.py b/test/test_communication.py index 0fb69ab..4b2eda1 100644 --- a/test/test_communication.py +++ b/test/test_communication.py @@ -11,26 +11,33 @@ rng = default_rng() class CallsFeatures(unittest.TestCase): - @classmethod def setUpClass(cls) -> None: call_rows = 10 - callers = np.concatenate(( + callers = np.concatenate( + ( np.repeat("caller1", 2), np.repeat("caller2", 3), np.repeat("caller3", 4), - np.repeat("caller4", 1)), axis=None) + np.repeat("caller4", 1), + ), + axis=None, + ) rng.shuffle(callers) - cls.calls = pd.DataFrame({ - "id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4") + 100, - "_id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4"), - "timestamp": np.sort(rng.integers(1612169903000, 1614556703000, size=call_rows)), - "device_id": "device1", - "call_type": rng.integers(1, 3, size=call_rows), - "call_duration": rng.integers(0, 600, size=call_rows), - "trace": callers, - "participant_id": 29 - }) + cls.calls = pd.DataFrame( + { + "id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4") + 100, + "_id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4"), + "timestamp": np.sort( + rng.integers(1612169903000, 1614556703000, size=call_rows) + ), + "device_id": "device1", + "call_type": rng.integers(1, 3, size=call_rows), + "call_duration": rng.integers(0, 600, size=call_rows), + "trace": callers, + "participant_id": 29, + } + ) @classmethod def assertSeriesEqual(cls, a, b, msg=None, **optional): @@ -48,12 +55,13 @@ class CallsFeatures(unittest.TestCase): def test_enumeration(self): self.calls["contact_id_manual"] = self.calls["trace"].astype("category") - self.calls["contact_id_manual"] = self.calls["contact_id_manual"].cat.rename_categories( - {"caller1": 2, - "caller2": 1, - "caller3": 0, - "caller4": 3} + self.calls["contact_id_manual"] = self.calls[ + "contact_id_manual" + ].cat.rename_categories( + {"caller1": 2, "caller2": 1, "caller3": 0, "caller4": 3} ) # Enumerate callers manually by their frequency as set in setUpClass. self.calls = enumerate_contacts(self.calls) - self.assertSeriesEqual(self.calls["contact_id_manual"], self.calls["contact_id"], check_names=False) + self.assertSeriesEqual( + self.calls["contact_id_manual"], self.calls["contact_id"], check_names=False + )