Add a parameter for grouping.
parent
98f1df81c6
commit
c8bb481508
|
@ -61,6 +61,9 @@ df_proximity = proximity.get_proximity_data(ptcp_2)
|
||||||
df_proximity = helper.get_date_from_timestamp(df_proximity)
|
df_proximity = helper.get_date_from_timestamp(df_proximity)
|
||||||
df_proximity = proximity.recode_proximity(df_proximity)
|
df_proximity = proximity.recode_proximity(df_proximity)
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# ## 1.3 Standardization/personalization
|
||||||
|
|
||||||
# %% [markdown]
|
# %% [markdown]
|
||||||
# # 2. Grouping/segmentation
|
# # 2. Grouping/segmentation
|
||||||
|
|
||||||
|
@ -71,3 +74,19 @@ df_esm_PANAS_daily_means = (
|
||||||
.reset_index()
|
.reset_index()
|
||||||
.rename(columns={"esm_user_answer_numeric": "esm_numeric_mean"})
|
.rename(columns={"esm_user_answer_numeric": "esm_numeric_mean"})
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_proximity_daily_counts = proximity.count_proximity(
|
||||||
|
df_proximity, ["participant_id", "date_lj"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_proximity_daily_counts
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# # 3. Join features (and export to csv?)
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# # 4. Machine learning model and parameters
|
||||||
|
|
||||||
|
# %%
|
||||||
|
|
|
@ -55,7 +55,9 @@ def recode_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
|
||||||
return df_proximity
|
return df_proximity
|
||||||
|
|
||||||
|
|
||||||
def count_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
|
def count_proximity(
|
||||||
|
df_proximity: pd.DataFrame, group_by: Collection = ["participant_id"]
|
||||||
|
) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
The function counts how many times a "near" value occurs in proximity
|
The function counts how many times a "near" value occurs in proximity
|
||||||
and calculates the proportion of this counts to all proximity values (i.e. relative count).
|
and calculates the proportion of this counts to all proximity values (i.e. relative count).
|
||||||
|
@ -64,6 +66,9 @@ def count_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
|
||||||
----------
|
----------
|
||||||
df_proximity: pd.DataFrame
|
df_proximity: pd.DataFrame
|
||||||
A dataframe of proximity data.
|
A dataframe of proximity data.
|
||||||
|
group_by: Collection
|
||||||
|
A list of strings, specifying by which parameters to group.
|
||||||
|
By default, the features are calculated per participant, but could be "date_lj" etc.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
@ -73,7 +78,7 @@ def count_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
|
||||||
if "bool_prox_near" not in df_proximity:
|
if "bool_prox_near" not in df_proximity:
|
||||||
df_proximity = recode_proximity(df_proximity)
|
df_proximity = recode_proximity(df_proximity)
|
||||||
df_proximity["bool_prox_far"] = ~df_proximity["bool_prox_near"]
|
df_proximity["bool_prox_far"] = ~df_proximity["bool_prox_near"]
|
||||||
df_proximity_features = df_proximity.groupby("participant_id").sum()[
|
df_proximity_features = df_proximity.groupby(group_by).sum()[
|
||||||
["bool_prox_near", "bool_prox_far"]
|
["bool_prox_near", "bool_prox_far"]
|
||||||
]
|
]
|
||||||
df_proximity_features = df_proximity_features.assign(
|
df_proximity_features = df_proximity_features.assign(
|
||||||
|
|
Loading…
Reference in New Issue