Make group_by consistent with communication.

rapids
junos 2021-08-20 17:52:31 +02:00
parent d6337e82ac
commit 72b16af75c
2 changed files with 19 additions and 5 deletions

View File

@ -16,6 +16,7 @@
# %%
# %matplotlib inline
import datetime
import importlib
import os
import sys
@ -32,13 +33,16 @@ import participants.query_db
TZ_LJ = timezone("Europe/Ljubljana")
# %%
from features.proximity import *
from features import helper, proximity
# %%
importlib.reload(proximity)
# %% [markdown]
# # Basic characteristics
# %%
df_proximity_nokia = get_proximity_data(["nokia_0000003"])
df_proximity_nokia = proximity.get_proximity_data(["nokia_0000003"])
print(df_proximity_nokia)
# %%
@ -53,7 +57,7 @@ df_proximity_nokia.double_proximity.value_counts()
# %%
participants_inactive_usernames = participants.query_db.get_usernames()
df_proximity_inactive = get_proximity_data(participants_inactive_usernames)
df_proximity_inactive = proximity.get_proximity_data(participants_inactive_usernames)
# %%
df_proximity_inactive.double_proximity.describe()
@ -110,3 +114,13 @@ df_proximity_combinations[
(df_proximity_combinations[5.0] != 0)
& (df_proximity_combinations[5.00030517578125] != 0)
]
# %% [markdown]
# # Features
# %%
df_proximity_inactive = helper.get_date_from_timestamp(df_proximity_inactive)
# %%
df_proximity_features = proximity.count_proximity(df_proximity_inactive, ["date_lj"])
display(df_proximity_features)

View File

@ -78,11 +78,11 @@ def count_proximity(
A dataframe with the count of "near" proximity values and their relative count.
"""
if group_by is None:
group_by = ["participant_id"]
group_by = []
if "bool_prox_near" not in df_proximity:
df_proximity = recode_proximity(df_proximity)
df_proximity["bool_prox_far"] = ~df_proximity["bool_prox_near"]
df_proximity_features = df_proximity.groupby(group_by).sum()[
df_proximity_features = df_proximity.groupby(["participant_id"] + group_by).sum()[
["bool_prox_near", "bool_prox_far"]
]
df_proximity_features = df_proximity_features.assign(