Make group_by consistent with communication.

rapids
junos 2021-08-20 17:52:31 +02:00
parent d6337e82ac
commit 72b16af75c
2 changed files with 19 additions and 5 deletions

View File

@ -16,6 +16,7 @@
# %% # %%
# %matplotlib inline # %matplotlib inline
import datetime import datetime
import importlib
import os import os
import sys import sys
@ -32,13 +33,16 @@ import participants.query_db
TZ_LJ = timezone("Europe/Ljubljana") TZ_LJ = timezone("Europe/Ljubljana")
# %% # %%
from features.proximity import * from features import helper, proximity
# %%
importlib.reload(proximity)
# %% [markdown] # %% [markdown]
# # Basic characteristics # # Basic characteristics
# %% # %%
df_proximity_nokia = get_proximity_data(["nokia_0000003"]) df_proximity_nokia = proximity.get_proximity_data(["nokia_0000003"])
print(df_proximity_nokia) print(df_proximity_nokia)
# %% # %%
@ -53,7 +57,7 @@ df_proximity_nokia.double_proximity.value_counts()
# %% # %%
participants_inactive_usernames = participants.query_db.get_usernames() participants_inactive_usernames = participants.query_db.get_usernames()
df_proximity_inactive = get_proximity_data(participants_inactive_usernames) df_proximity_inactive = proximity.get_proximity_data(participants_inactive_usernames)
# %% # %%
df_proximity_inactive.double_proximity.describe() df_proximity_inactive.double_proximity.describe()
@ -110,3 +114,13 @@ df_proximity_combinations[
(df_proximity_combinations[5.0] != 0) (df_proximity_combinations[5.0] != 0)
& (df_proximity_combinations[5.00030517578125] != 0) & (df_proximity_combinations[5.00030517578125] != 0)
] ]
# %% [markdown]
# # Features
# %%
df_proximity_inactive = helper.get_date_from_timestamp(df_proximity_inactive)
# %%
df_proximity_features = proximity.count_proximity(df_proximity_inactive, ["date_lj"])
display(df_proximity_features)

View File

@ -78,11 +78,11 @@ def count_proximity(
A dataframe with the count of "near" proximity values and their relative count. A dataframe with the count of "near" proximity values and their relative count.
""" """
if group_by is None: if group_by is None:
group_by = ["participant_id"] group_by = []
if "bool_prox_near" not in df_proximity: if "bool_prox_near" not in df_proximity:
df_proximity = recode_proximity(df_proximity) df_proximity = recode_proximity(df_proximity)
df_proximity["bool_prox_far"] = ~df_proximity["bool_prox_near"] df_proximity["bool_prox_far"] = ~df_proximity["bool_prox_near"]
df_proximity_features = df_proximity.groupby(group_by).sum()[ df_proximity_features = df_proximity.groupby(["participant_id"] + group_by).sum()[
["bool_prox_near", "bool_prox_far"] ["bool_prox_near", "bool_prox_far"]
] ]
df_proximity_features = df_proximity_features.assign( df_proximity_features = df_proximity_features.assign(