Add a parameter for grouping.
parent
98f1df81c6
commit
c8bb481508
|
@ -61,6 +61,9 @@ df_proximity = proximity.get_proximity_data(ptcp_2)
|
|||
df_proximity = helper.get_date_from_timestamp(df_proximity)
|
||||
df_proximity = proximity.recode_proximity(df_proximity)
|
||||
|
||||
# %% [markdown]
|
||||
# ## 1.3 Standardization/personalization
|
||||
|
||||
# %% [markdown]
|
||||
# # 2. Grouping/segmentation
|
||||
|
||||
|
@ -71,3 +74,19 @@ df_esm_PANAS_daily_means = (
|
|||
.reset_index()
|
||||
.rename(columns={"esm_user_answer_numeric": "esm_numeric_mean"})
|
||||
)
|
||||
|
||||
# %%
|
||||
df_proximity_daily_counts = proximity.count_proximity(
|
||||
df_proximity, ["participant_id", "date_lj"]
|
||||
)
|
||||
|
||||
# %%
|
||||
df_proximity_daily_counts
|
||||
|
||||
# %% [markdown]
|
||||
# # 3. Join features (and export to csv?)
|
||||
|
||||
# %% [markdown]
|
||||
# # 4. Machine learning model and parameters
|
||||
|
||||
# %%
|
||||
|
|
|
@ -55,7 +55,9 @@ def recode_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
|
|||
return df_proximity
|
||||
|
||||
|
||||
def count_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
|
||||
def count_proximity(
|
||||
df_proximity: pd.DataFrame, group_by: Collection = ["participant_id"]
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
The function counts how many times a "near" value occurs in proximity
|
||||
and calculates the proportion of this counts to all proximity values (i.e. relative count).
|
||||
|
@ -64,6 +66,9 @@ def count_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
|
|||
----------
|
||||
df_proximity: pd.DataFrame
|
||||
A dataframe of proximity data.
|
||||
group_by: Collection
|
||||
A list of strings, specifying by which parameters to group.
|
||||
By default, the features are calculated per participant, but could be "date_lj" etc.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
@ -73,7 +78,7 @@ def count_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
|
|||
if "bool_prox_near" not in df_proximity:
|
||||
df_proximity = recode_proximity(df_proximity)
|
||||
df_proximity["bool_prox_far"] = ~df_proximity["bool_prox_near"]
|
||||
df_proximity_features = df_proximity.groupby("participant_id").sum()[
|
||||
df_proximity_features = df_proximity.groupby(group_by).sum()[
|
||||
["bool_prox_near", "bool_prox_far"]
|
||||
]
|
||||
df_proximity_features = df_proximity_features.assign(
|
||||
|
|
Loading…
Reference in New Issue