Improve formatting.
parent
e4b6dc90e2
commit
9e16d329dd
|
@ -19,7 +19,11 @@ def get_call_data(usernames: List) -> pd.DataFrame:
|
|||
|
||||
def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""" Count contacts (callers, senders) and enumerate them by their frequency. """
|
||||
contact_counts = comm_df["trace"].value_counts(sort=True, ascending=False).to_frame(name="frequency")
|
||||
contact_counts = (
|
||||
comm_df["trace"]
|
||||
.value_counts(sort=True, ascending=False)
|
||||
.to_frame(name="frequency")
|
||||
)
|
||||
# A frequency table of different traces (contacts).
|
||||
contact_counts["contact_id"] = list(range(len(contact_counts.index)))
|
||||
contact_code = contact_counts["contact_id"].to_dict()
|
||||
|
|
|
@ -11,26 +11,33 @@ rng = default_rng()
|
|||
|
||||
|
||||
class CallsFeatures(unittest.TestCase):
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls) -> None:
|
||||
call_rows = 10
|
||||
callers = np.concatenate((
|
||||
callers = np.concatenate(
|
||||
(
|
||||
np.repeat("caller1", 2),
|
||||
np.repeat("caller2", 3),
|
||||
np.repeat("caller3", 4),
|
||||
np.repeat("caller4", 1)), axis=None)
|
||||
np.repeat("caller4", 1),
|
||||
),
|
||||
axis=None,
|
||||
)
|
||||
rng.shuffle(callers)
|
||||
cls.calls = pd.DataFrame({
|
||||
"id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4") + 100,
|
||||
"_id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4"),
|
||||
"timestamp": np.sort(rng.integers(1612169903000, 1614556703000, size=call_rows)),
|
||||
"device_id": "device1",
|
||||
"call_type": rng.integers(1, 3, size=call_rows),
|
||||
"call_duration": rng.integers(0, 600, size=call_rows),
|
||||
"trace": callers,
|
||||
"participant_id": 29
|
||||
})
|
||||
cls.calls = pd.DataFrame(
|
||||
{
|
||||
"id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4") + 100,
|
||||
"_id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4"),
|
||||
"timestamp": np.sort(
|
||||
rng.integers(1612169903000, 1614556703000, size=call_rows)
|
||||
),
|
||||
"device_id": "device1",
|
||||
"call_type": rng.integers(1, 3, size=call_rows),
|
||||
"call_duration": rng.integers(0, 600, size=call_rows),
|
||||
"trace": callers,
|
||||
"participant_id": 29,
|
||||
}
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def assertSeriesEqual(cls, a, b, msg=None, **optional):
|
||||
|
@ -48,12 +55,13 @@ class CallsFeatures(unittest.TestCase):
|
|||
|
||||
def test_enumeration(self):
|
||||
self.calls["contact_id_manual"] = self.calls["trace"].astype("category")
|
||||
self.calls["contact_id_manual"] = self.calls["contact_id_manual"].cat.rename_categories(
|
||||
{"caller1": 2,
|
||||
"caller2": 1,
|
||||
"caller3": 0,
|
||||
"caller4": 3}
|
||||
self.calls["contact_id_manual"] = self.calls[
|
||||
"contact_id_manual"
|
||||
].cat.rename_categories(
|
||||
{"caller1": 2, "caller2": 1, "caller3": 0, "caller4": 3}
|
||||
)
|
||||
# Enumerate callers manually by their frequency as set in setUpClass.
|
||||
self.calls = enumerate_contacts(self.calls)
|
||||
self.assertSeriesEqual(self.calls["contact_id_manual"], self.calls["contact_id"], check_names=False)
|
||||
self.assertSeriesEqual(
|
||||
self.calls["contact_id_manual"], self.calls["contact_id"], check_names=False
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue