Add a parameter for grouping.

communication
junos 2021-08-12 15:07:20 +02:00
parent 98f1df81c6
commit c8bb481508
2 changed files with 26 additions and 2 deletions

View File

@ -61,6 +61,9 @@ df_proximity = proximity.get_proximity_data(ptcp_2)
df_proximity = helper.get_date_from_timestamp(df_proximity)
df_proximity = proximity.recode_proximity(df_proximity)
# %% [markdown]
# ## 1.3 Standardization/personalization
# %% [markdown]
# # 2. Grouping/segmentation
@ -71,3 +74,19 @@ df_esm_PANAS_daily_means = (
.reset_index()
.rename(columns={"esm_user_answer_numeric": "esm_numeric_mean"})
)
# %%
df_proximity_daily_counts = proximity.count_proximity(
df_proximity, ["participant_id", "date_lj"]
)
# %%
df_proximity_daily_counts
# %% [markdown]
# # 3. Join features (and export to csv?)
# %% [markdown]
# # 4. Machine learning model and parameters
# %%

View File

@ -55,7 +55,9 @@ def recode_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
return df_proximity
def count_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
def count_proximity(
df_proximity: pd.DataFrame, group_by: Collection = ["participant_id"]
) -> pd.DataFrame:
"""
The function counts how many times a "near" value occurs in proximity
and calculates the proportion of this counts to all proximity values (i.e. relative count).
@ -64,6 +66,9 @@ def count_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
----------
df_proximity: pd.DataFrame
A dataframe of proximity data.
group_by: Collection
A list of strings, specifying by which parameters to group.
By default, the features are calculated per participant, but could be "date_lj" etc.
Returns
-------
@ -73,7 +78,7 @@ def count_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
if "bool_prox_near" not in df_proximity:
df_proximity = recode_proximity(df_proximity)
df_proximity["bool_prox_far"] = ~df_proximity["bool_prox_near"]
df_proximity_features = df_proximity.groupby("participant_id").sum()[
df_proximity_features = df_proximity.groupby(group_by).sum()[
["bool_prox_near", "bool_prox_far"]
]
df_proximity_features = df_proximity_features.assign(