Compile a list of contact features and add a test.
parent
c88336481e
commit
2d78aacd18
|
@ -37,6 +37,14 @@ FEATURES_SMS = (
|
||||||
# "no_received_ratio", "no_sent_ratio",
|
# "no_received_ratio", "no_sent_ratio",
|
||||||
# "no_contacts"]
|
# "no_contacts"]
|
||||||
|
|
||||||
|
FEATURES_CONTACT = [
|
||||||
|
"proportion_calls_all",
|
||||||
|
"proportion_calls_incoming",
|
||||||
|
"proportion_calls_outgoing",
|
||||||
|
"proportion_calls_contacts",
|
||||||
|
"proportion_calls_missed_sms_received",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def get_call_data(usernames: Collection) -> pd.DataFrame:
|
def get_call_data(usernames: Collection) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
|
@ -287,13 +295,10 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF
|
||||||
"""
|
"""
|
||||||
count_calls = count_comms(df_calls)
|
count_calls = count_comms(df_calls)
|
||||||
count_sms = count_comms(df_sms)
|
count_sms = count_comms(df_sms)
|
||||||
count_joined = (
|
count_joined = count_calls.merge(
|
||||||
count_calls.merge(
|
|
||||||
count_sms, on="participant_id", suffixes=("_calls", "_sms")
|
count_sms, on="participant_id", suffixes=("_calls", "_sms")
|
||||||
) # Merge calls and sms features
|
).assign( # Merge calls and sms features
|
||||||
.reset_index() # Make participant_id a regular column
|
proportion_calls_all=(
|
||||||
.assign(
|
|
||||||
proportion_calls=(
|
|
||||||
lambda x: x.no_calls_all / (x.no_calls_all + x.no_sms_all)
|
lambda x: x.no_calls_all / (x.no_calls_all + x.no_sms_all)
|
||||||
),
|
),
|
||||||
proportion_calls_incoming=(
|
proportion_calls_incoming=(
|
||||||
|
@ -306,19 +311,10 @@ def calls_sms_features(df_calls: pd.DataFrame, df_sms: pd.DataFrame) -> pd.DataF
|
||||||
lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent)
|
lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent)
|
||||||
),
|
),
|
||||||
proportion_calls_contacts=(
|
proportion_calls_contacts=(
|
||||||
lambda x: x.no_contacts_calls
|
lambda x: x.no_contacts_calls / (x.no_contacts_calls + x.no_contacts_sms)
|
||||||
/ (x.no_contacts_calls + x.no_contacts_sms)
|
|
||||||
)
|
)
|
||||||
# Calculate new features and create additional columns
|
# Calculate new features and create additional columns
|
||||||
)[
|
)[
|
||||||
[
|
FEATURES_CONTACT
|
||||||
"participant_id",
|
|
||||||
"proportion_calls",
|
|
||||||
"proportion_calls_incoming",
|
|
||||||
"proportion_calls_outgoing",
|
|
||||||
"proportion_calls_contacts",
|
|
||||||
"proportion_calls_missed_sms_received",
|
|
||||||
]
|
|
||||||
] # Filter out only the relevant features
|
] # Filter out only the relevant features
|
||||||
)
|
|
||||||
return count_joined
|
return count_joined
|
||||||
|
|
|
@ -86,5 +86,8 @@ class CallsFeatures(unittest.TestCase):
|
||||||
|
|
||||||
def test_calls_sms_features(self):
|
def test_calls_sms_features(self):
|
||||||
self.features_call_sms = calls_sms_features(self.calls, self.sms)
|
self.features_call_sms = calls_sms_features(self.calls, self.sms)
|
||||||
print(self.features_call_sms.columns)
|
print(self.features_call_sms)
|
||||||
self.assertIsInstance(self.features_call_sms, pd.DataFrame)
|
self.assertIsInstance(self.features_call_sms, pd.DataFrame)
|
||||||
|
self.assertCountEqual(
|
||||||
|
self.features_call_sms.columns.to_list(), FEATURES_CONTACT
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in New Issue