Merge branch 'master' into ml_pipeline
commit
0b85ee8fdc
|
@ -166,7 +166,7 @@ def count_comms(comm_df: pd.DataFrame, group_by=None) -> pd.DataFrame:
|
||||||
data_type = "calls"
|
data_type = "calls"
|
||||||
comm_counts = (
|
comm_counts = (
|
||||||
comm_df.value_counts(subset=group_by + ["participant_id", "call_type"])
|
comm_df.value_counts(subset=group_by + ["participant_id", "call_type"])
|
||||||
.unstack()
|
.unstack(level="call_type", fill_value=0)
|
||||||
.rename(columns=call_types)
|
.rename(columns=call_types)
|
||||||
.add_prefix("no_")
|
.add_prefix("no_")
|
||||||
)
|
)
|
||||||
|
@ -181,7 +181,7 @@ def count_comms(comm_df: pd.DataFrame, group_by=None) -> pd.DataFrame:
|
||||||
comm_duration_total = (
|
comm_duration_total = (
|
||||||
comm_df.groupby(group_by + ["participant_id", "call_type"])
|
comm_df.groupby(group_by + ["participant_id", "call_type"])
|
||||||
.sum()["call_duration"]
|
.sum()["call_duration"]
|
||||||
.unstack()
|
.unstack(level="call_type", fill_value=0)
|
||||||
.rename(columns=call_types)
|
.rename(columns=call_types)
|
||||||
.add_prefix("duration_total_")
|
.add_prefix("duration_total_")
|
||||||
)
|
)
|
||||||
|
@ -189,7 +189,7 @@ def count_comms(comm_df: pd.DataFrame, group_by=None) -> pd.DataFrame:
|
||||||
comm_duration_max = (
|
comm_duration_max = (
|
||||||
comm_df.groupby(group_by + ["participant_id", "call_type"])
|
comm_df.groupby(group_by + ["participant_id", "call_type"])
|
||||||
.max()["call_duration"]
|
.max()["call_duration"]
|
||||||
.unstack()
|
.unstack(level="call_type", fill_value=0)
|
||||||
.rename(columns=call_types)
|
.rename(columns=call_types)
|
||||||
.add_prefix("duration_max_")
|
.add_prefix("duration_max_")
|
||||||
)
|
)
|
||||||
|
@ -208,7 +208,7 @@ def count_comms(comm_df: pd.DataFrame, group_by=None) -> pd.DataFrame:
|
||||||
data_type = "sms"
|
data_type = "sms"
|
||||||
comm_counts = (
|
comm_counts = (
|
||||||
comm_df.value_counts(subset=group_by + ["participant_id", "message_type"])
|
comm_df.value_counts(subset=group_by + ["participant_id", "message_type"])
|
||||||
.unstack()
|
.unstack(level="message_type", fill_value=0)
|
||||||
.rename(columns=sms_types)
|
.rename(columns=sms_types)
|
||||||
.add_prefix("no_")
|
.add_prefix("no_")
|
||||||
)
|
)
|
||||||
|
@ -311,9 +311,16 @@ def calls_sms_features(
|
||||||
group_by = []
|
group_by = []
|
||||||
count_calls = count_comms(df_calls, group_by)
|
count_calls = count_comms(df_calls, group_by)
|
||||||
count_sms = count_comms(df_sms, group_by)
|
count_sms = count_comms(df_sms, group_by)
|
||||||
count_joined = count_calls.merge(
|
count_joined = (
|
||||||
count_sms, how="outer", left_index=True, right_index=True, validate="one_to_one"
|
count_calls.merge(
|
||||||
).assign(
|
count_sms,
|
||||||
|
how="outer",
|
||||||
|
left_index=True,
|
||||||
|
right_index=True,
|
||||||
|
validate="one_to_one",
|
||||||
|
)
|
||||||
|
.fillna(0, downcast="infer")
|
||||||
|
.assign(
|
||||||
proportion_calls_all=(
|
proportion_calls_all=(
|
||||||
lambda x: x.no_calls_all / (x.no_calls_all + x.no_sms_all)
|
lambda x: x.no_calls_all / (x.no_calls_all + x.no_sms_all)
|
||||||
),
|
),
|
||||||
|
@ -327,8 +334,11 @@ def calls_sms_features(
|
||||||
lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent)
|
lambda x: x.no_outgoing / (x.no_outgoing + x.no_sent)
|
||||||
),
|
),
|
||||||
proportion_calls_contacts=(
|
proportion_calls_contacts=(
|
||||||
lambda x: x.no_contacts_calls / (x.no_contacts_calls + x.no_contacts_sms)
|
lambda x: x.no_contacts_calls
|
||||||
|
/ (x.no_contacts_calls + x.no_contacts_sms)
|
||||||
)
|
)
|
||||||
# Calculate new features and create additional columns
|
# Calculate new features and create additional columns
|
||||||
)
|
)
|
||||||
|
.fillna(0.5, downcast="infer")
|
||||||
|
)
|
||||||
return count_joined
|
return count_joined
|
||||||
|
|
Loading…
Reference in New Issue