Improve formatting.

communication
junos 2021-04-06 11:48:37 +02:00
parent e4b6dc90e2
commit 9e16d329dd
2 changed files with 32 additions and 20 deletions

View File

@ -19,7 +19,11 @@ def get_call_data(usernames: List) -> pd.DataFrame:
def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame: def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame:
"""" Count contacts (callers, senders) and enumerate them by their frequency. """ """" Count contacts (callers, senders) and enumerate them by their frequency. """
contact_counts = comm_df["trace"].value_counts(sort=True, ascending=False).to_frame(name="frequency") contact_counts = (
comm_df["trace"]
.value_counts(sort=True, ascending=False)
.to_frame(name="frequency")
)
# A frequency table of different traces (contacts). # A frequency table of different traces (contacts).
contact_counts["contact_id"] = list(range(len(contact_counts.index))) contact_counts["contact_id"] = list(range(len(contact_counts.index)))
contact_code = contact_counts["contact_id"].to_dict() contact_code = contact_counts["contact_id"].to_dict()

View File

@ -11,26 +11,33 @@ rng = default_rng()
class CallsFeatures(unittest.TestCase): class CallsFeatures(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls) -> None: def setUpClass(cls) -> None:
call_rows = 10 call_rows = 10
callers = np.concatenate(( callers = np.concatenate(
(
np.repeat("caller1", 2), np.repeat("caller1", 2),
np.repeat("caller2", 3), np.repeat("caller2", 3),
np.repeat("caller3", 4), np.repeat("caller3", 4),
np.repeat("caller4", 1)), axis=None) np.repeat("caller4", 1),
),
axis=None,
)
rng.shuffle(callers) rng.shuffle(callers)
cls.calls = pd.DataFrame({ cls.calls = pd.DataFrame(
{
"id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4") + 100, "id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4") + 100,
"_id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4"), "_id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4"),
"timestamp": np.sort(rng.integers(1612169903000, 1614556703000, size=call_rows)), "timestamp": np.sort(
rng.integers(1612169903000, 1614556703000, size=call_rows)
),
"device_id": "device1", "device_id": "device1",
"call_type": rng.integers(1, 3, size=call_rows), "call_type": rng.integers(1, 3, size=call_rows),
"call_duration": rng.integers(0, 600, size=call_rows), "call_duration": rng.integers(0, 600, size=call_rows),
"trace": callers, "trace": callers,
"participant_id": 29 "participant_id": 29,
}) }
)
@classmethod @classmethod
def assertSeriesEqual(cls, a, b, msg=None, **optional): def assertSeriesEqual(cls, a, b, msg=None, **optional):
@ -48,12 +55,13 @@ class CallsFeatures(unittest.TestCase):
def test_enumeration(self): def test_enumeration(self):
self.calls["contact_id_manual"] = self.calls["trace"].astype("category") self.calls["contact_id_manual"] = self.calls["trace"].astype("category")
self.calls["contact_id_manual"] = self.calls["contact_id_manual"].cat.rename_categories( self.calls["contact_id_manual"] = self.calls[
{"caller1": 2, "contact_id_manual"
"caller2": 1, ].cat.rename_categories(
"caller3": 0, {"caller1": 2, "caller2": 1, "caller3": 0, "caller4": 3}
"caller4": 3}
) )
# Enumerate callers manually by their frequency as set in setUpClass. # Enumerate callers manually by their frequency as set in setUpClass.
self.calls = enumerate_contacts(self.calls) self.calls = enumerate_contacts(self.calls)
self.assertSeriesEqual(self.calls["contact_id_manual"], self.calls["contact_id"], check_names=False) self.assertSeriesEqual(
self.calls["contact_id_manual"], self.calls["contact_id"], check_names=False
)