93 lines
3.2 KiB
Python
93 lines
3.2 KiB
Python
import unittest
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from numpy.random import default_rng
|
|
from pandas.testing import assert_series_equal
|
|
|
|
from features.communication import *
|
|
|
|
rng = default_rng()
|
|
|
|
|
|
class CallsFeatures(unittest.TestCase):
|
|
@classmethod
|
|
def setUpClass(cls) -> None:
|
|
call_rows = 10
|
|
callers = np.concatenate(
|
|
(
|
|
np.repeat("caller1", 2),
|
|
np.repeat("caller2", 3),
|
|
np.repeat("caller3", 4),
|
|
np.repeat("caller4", 1),
|
|
),
|
|
axis=None,
|
|
)
|
|
rng.shuffle(callers)
|
|
cls.comm = pd.DataFrame(
|
|
{
|
|
"id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4") + 100,
|
|
"_id": np.linspace(0, call_rows - 1, num=call_rows, dtype="u4"),
|
|
"timestamp": np.sort(
|
|
rng.integers(1612169903000, 1614556703000, size=call_rows)
|
|
),
|
|
"device_id": "device1",
|
|
"participant_id": 29,
|
|
"trace": callers,
|
|
}
|
|
)
|
|
cls.calls = cls.comm.assign(
|
|
call_type=rng.integers(1, 3, size=call_rows, endpoint=True),
|
|
call_duration=rng.integers(0, 600, size=call_rows),
|
|
)
|
|
cls.sms = cls.comm.assign(
|
|
message_type=rng.integers(1, 2, size=call_rows, endpoint=True)
|
|
)
|
|
|
|
@classmethod
|
|
def assertSeriesEqual(cls, a, b, msg=None, **optional):
|
|
try:
|
|
assert_series_equal(a, b, **optional)
|
|
except AssertionError as e:
|
|
raise cls.failureException(msg) from e
|
|
|
|
def setUp(self):
|
|
self.addTypeEqualityFunc(pd.DataFrame, self.assertSeriesEqual)
|
|
|
|
def test_get_calls_data(self):
|
|
calls_from_db = get_call_data(["nokia_0000003"])
|
|
self.assertIsNotNone(calls_from_db)
|
|
|
|
def test_enumeration(self):
|
|
self.calls["contact_id_manual"] = self.calls["trace"].astype("category")
|
|
self.calls["contact_id_manual"] = self.calls[
|
|
"contact_id_manual"
|
|
].cat.rename_categories(
|
|
{"caller1": 2, "caller2": 1, "caller3": 0, "caller4": 3}
|
|
)
|
|
# Enumerate callers manually by their frequency as set in setUpClass.
|
|
self.calls = enumerate_contacts(self.calls)
|
|
self.assertSeriesEqual(
|
|
self.calls["contact_id_manual"],
|
|
self.calls["contact_id"].astype("category"),
|
|
check_names=False,
|
|
check_category_order=False,
|
|
)
|
|
|
|
def test_count_comms_calls(self):
|
|
self.features = count_comms(self.calls)
|
|
self.assertIsInstance(self.features, pd.DataFrame)
|
|
self.assertCountEqual(self.features.columns.to_list(), FEATURES_CALLS)
|
|
|
|
def test_count_comms_sms(self):
|
|
self.features = count_comms(self.sms)
|
|
self.assertIsInstance(self.features, pd.DataFrame)
|
|
self.assertCountEqual(self.features.columns.to_list(), FEATURES_SMS)
|
|
|
|
def test_calls_sms_features(self):
|
|
self.features_call_sms = calls_sms_features(self.calls, self.sms)
|
|
self.assertIsInstance(self.features_call_sms, pd.DataFrame)
|
|
self.assertCountEqual(
|
|
self.features_call_sms.columns.to_list(), FEATURES_CALLS_SMS_ALL
|
|
)
|