Add a parameter for grouping.

communication
junos 2021-08-12 15:07:20 +02:00
parent 98f1df81c6
commit c8bb481508
2 changed files with 26 additions and 2 deletions

View File

@ -61,6 +61,9 @@ df_proximity = proximity.get_proximity_data(ptcp_2)
df_proximity = helper.get_date_from_timestamp(df_proximity) df_proximity = helper.get_date_from_timestamp(df_proximity)
df_proximity = proximity.recode_proximity(df_proximity) df_proximity = proximity.recode_proximity(df_proximity)
# %% [markdown]
# ## 1.3 Standardization/personalization
# %% [markdown] # %% [markdown]
# # 2. Grouping/segmentation # # 2. Grouping/segmentation
@ -71,3 +74,19 @@ df_esm_PANAS_daily_means = (
.reset_index() .reset_index()
.rename(columns={"esm_user_answer_numeric": "esm_numeric_mean"}) .rename(columns={"esm_user_answer_numeric": "esm_numeric_mean"})
) )
# %%
df_proximity_daily_counts = proximity.count_proximity(
df_proximity, ["participant_id", "date_lj"]
)
# %%
df_proximity_daily_counts
# %% [markdown]
# # 3. Join features (and export to csv?)
# %% [markdown]
# # 4. Machine learning model and parameters
# %%

View File

@ -55,7 +55,9 @@ def recode_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
return df_proximity return df_proximity
def count_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame: def count_proximity(
df_proximity: pd.DataFrame, group_by: Collection = ["participant_id"]
) -> pd.DataFrame:
""" """
The function counts how many times a "near" value occurs in proximity The function counts how many times a "near" value occurs in proximity
and calculates the proportion of this counts to all proximity values (i.e. relative count). and calculates the proportion of this counts to all proximity values (i.e. relative count).
@ -64,6 +66,9 @@ def count_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
---------- ----------
df_proximity: pd.DataFrame df_proximity: pd.DataFrame
A dataframe of proximity data. A dataframe of proximity data.
group_by: Collection
A list of strings, specifying by which parameters to group.
By default, the features are calculated per participant, but could be "date_lj" etc.
Returns Returns
------- -------
@ -73,7 +78,7 @@ def count_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
if "bool_prox_near" not in df_proximity: if "bool_prox_near" not in df_proximity:
df_proximity = recode_proximity(df_proximity) df_proximity = recode_proximity(df_proximity)
df_proximity["bool_prox_far"] = ~df_proximity["bool_prox_near"] df_proximity["bool_prox_far"] = ~df_proximity["bool_prox_near"]
df_proximity_features = df_proximity.groupby("participant_id").sum()[ df_proximity_features = df_proximity.groupby(group_by).sum()[
["bool_prox_near", "bool_prox_far"] ["bool_prox_near", "bool_prox_far"]
] ]
df_proximity_features = df_proximity_features.assign( df_proximity_features = df_proximity_features.assign(