diff --git a/exploration/expl_proximity.py b/exploration/expl_proximity.py index f3be825..fe85124 100644 --- a/exploration/expl_proximity.py +++ b/exploration/expl_proximity.py @@ -16,6 +16,7 @@ # %% # %matplotlib inline import datetime +import importlib import os import sys @@ -32,13 +33,16 @@ import participants.query_db TZ_LJ = timezone("Europe/Ljubljana") # %% -from features.proximity import * +from features import helper, proximity + +# %% +importlib.reload(proximity) # %% [markdown] # # Basic characteristics # %% -df_proximity_nokia = get_proximity_data(["nokia_0000003"]) +df_proximity_nokia = proximity.get_proximity_data(["nokia_0000003"]) print(df_proximity_nokia) # %% @@ -53,7 +57,7 @@ df_proximity_nokia.double_proximity.value_counts() # %% participants_inactive_usernames = participants.query_db.get_usernames() -df_proximity_inactive = get_proximity_data(participants_inactive_usernames) +df_proximity_inactive = proximity.get_proximity_data(participants_inactive_usernames) # %% df_proximity_inactive.double_proximity.describe() @@ -110,3 +114,13 @@ df_proximity_combinations[ (df_proximity_combinations[5.0] != 0) & (df_proximity_combinations[5.00030517578125] != 0) ] + +# %% [markdown] +# # Features + +# %% +df_proximity_inactive = helper.get_date_from_timestamp(df_proximity_inactive) + +# %% +df_proximity_features = proximity.count_proximity(df_proximity_inactive, ["date_lj"]) +display(df_proximity_features) diff --git a/features/proximity.py b/features/proximity.py index aabef74..1884f29 100644 --- a/features/proximity.py +++ b/features/proximity.py @@ -78,11 +78,11 @@ def count_proximity( A dataframe with the count of "near" proximity values and their relative count. """ if group_by is None: - group_by = ["participant_id"] + group_by = [] if "bool_prox_near" not in df_proximity: df_proximity = recode_proximity(df_proximity) df_proximity["bool_prox_far"] = ~df_proximity["bool_prox_near"] - df_proximity_features = df_proximity.groupby(group_by).sum()[ + df_proximity_features = df_proximity.groupby(["participant_id"] + group_by).sum()[ ["bool_prox_near", "bool_prox_far"] ] df_proximity_features = df_proximity_features.assign(