List calls features.
parent
bb75abcb9b
commit
b1ad8d1309
|
@ -8,6 +8,21 @@ from setup import db_engine, session
|
||||||
call_types = {1: "incoming", 2: "outgoing", 3: "missed"}
|
call_types = {1: "incoming", 2: "outgoing", 3: "missed"}
|
||||||
sms_types = {1: "received", 2: "sent"}
|
sms_types = {1: "received", 2: "sent"}
|
||||||
|
|
||||||
|
FEATURES_CALLS = (
|
||||||
|
["no_calls_all"]
|
||||||
|
+ ["no_" + call_type for call_type in call_types.values()]
|
||||||
|
+ ["duration_total_" + call_types.get(1), "duration_total_" + call_types.get(2)]
|
||||||
|
+ ["duration_max_" + call_types.get(1), "duration_max_" + call_types.get(2)]
|
||||||
|
+ ["no_" + call_types.get(1) + "_ratio", "no_" + call_types.get(2) + "_ratio"]
|
||||||
|
+ ["no_contacts"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# FEATURES_CALLS = ["no_all",
|
||||||
|
# "no_incoming", "no_outgoing", "no_missed",
|
||||||
|
# "duration_total_incoming", "duration_total_outgoing",
|
||||||
|
# "duration_max_incoming", "duration_max_outgoing",
|
||||||
|
# "no_incoming_ratio", "no_outgoing_ratio"]
|
||||||
|
|
||||||
|
|
||||||
def get_call_data(usernames: Collection) -> pd.DataFrame:
|
def get_call_data(usernames: Collection) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
|
@ -203,7 +218,7 @@ def contact_features(comm_df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
df_enumerated: pd.DataFrame
|
comm_df: pd.DataFrame
|
||||||
A dataframe of calls or SMSes.
|
A dataframe of calls or SMSes.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
|
@ -216,7 +231,7 @@ def contact_features(comm_df: pd.DataFrame) -> pd.DataFrame:
|
||||||
df_enumerated.groupby(["participant_id", "contact_id"]).size().reset_index()
|
df_enumerated.groupby(["participant_id", "contact_id"]).size().reset_index()
|
||||||
)
|
)
|
||||||
# Check whether df contains calls or SMS data since some
|
# Check whether df contains calls or SMS data since some
|
||||||
# features we want to calculate are type-specyfic
|
# features we want to calculate are type-specific
|
||||||
if "call_duration" in df_enumerated:
|
if "call_duration" in df_enumerated:
|
||||||
# Add a column with the total duration of calls between two people
|
# Add a column with the total duration of calls between two people
|
||||||
duration_count = (
|
duration_count = (
|
||||||
|
|
Loading…
Reference in New Issue