Specify numeric_only for pandas.core.groupby.DataFrameGroupBy.mean.
This parameter used to be None by default, but this usage is deprecated since pandas 2.0. See [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.core.groupby.DataFrameGroupBy.mean.html): > Changed in version 2.0.0: numeric_only no longer accepts None and defaults to False.update_libs
parent
d678be0641
commit
1d903f3629
|
@ -15,13 +15,13 @@ def extract_second_order_features(intraday_features, so_features_names, prefix="
|
||||||
so_features = pd.DataFrame()
|
so_features = pd.DataFrame()
|
||||||
#print(intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).nsmallest())
|
#print(intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).nsmallest())
|
||||||
if "mean" in so_features_names:
|
if "mean" in so_features_names:
|
||||||
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).mean().add_suffix("_SO_mean")], axis=1)
|
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).mean(numeric_only=True).add_suffix("_SO_mean")], axis=1)
|
||||||
|
|
||||||
if "median" in so_features_names:
|
if "median" in so_features_names:
|
||||||
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).median().add_suffix("_SO_median")], axis=1)
|
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).median(numeric_only=True).add_suffix("_SO_median")], axis=1)
|
||||||
|
|
||||||
if "sd" in so_features_names:
|
if "sd" in so_features_names:
|
||||||
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).std().fillna(0).add_suffix("_SO_sd")], axis=1)
|
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).std(numeric_only=True).fillna(0).add_suffix("_SO_sd")], axis=1)
|
||||||
|
|
||||||
if "nlargest" in so_features_names: # largest 5 -- maybe there is a faster groupby solution?
|
if "nlargest" in so_features_names: # largest 5 -- maybe there is a faster groupby solution?
|
||||||
for column in intraday_features.loc[:, ~intraday_features.columns.isin(groupby_cols+[prefix+"level_1"])]:
|
for column in intraday_features.loc[:, ~intraday_features.columns.isin(groupby_cols+[prefix+"level_1"])]:
|
||||||
|
|
|
@ -26,7 +26,7 @@ def calculate_empatica_data_yield(features): # TODO
|
||||||
# Assigns 1 to values that are over 1 (in case of windows not being filled fully)
|
# Assigns 1 to values that are over 1 (in case of windows not being filled fully)
|
||||||
features[empatica_data_yield_cols] = features[empatica_data_yield_cols].apply(lambda x: [y if y <= 1 or np.isnan(y) else 1 for y in x])
|
features[empatica_data_yield_cols] = features[empatica_data_yield_cols].apply(lambda x: [y if y <= 1 or np.isnan(y) else 1 for y in x])
|
||||||
|
|
||||||
features["empatica_data_yield"] = features[empatica_data_yield_cols].mean(axis=1).fillna(0)
|
features["empatica_data_yield"] = features[empatica_data_yield_cols].mean(axis=1, numeric_only=True).fillna(0)
|
||||||
features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)
|
features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)
|
||||||
|
|
||||||
return features
|
return features
|
||||||
|
|
|
@ -140,8 +140,8 @@ def extract_ers(esm_df):
|
||||||
|
|
||||||
# Extracted 3 targets that will be transfered in the csv file to the cleaning script.
|
# Extracted 3 targets that will be transfered in the csv file to the cleaning script.
|
||||||
se_stressfulness_event_tg = esm_df[esm_df.questionnaire_id == 87.].set_index(['device_id', 'esm_session'])['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_stressfulness_event'})
|
se_stressfulness_event_tg = esm_df[esm_df.questionnaire_id == 87.].set_index(['device_id', 'esm_session'])['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_stressfulness_event'})
|
||||||
se_threat_tg = esm_df[esm_df.questionnaire_id == 88.].groupby(["device_id", "esm_session"]).mean()['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_threat'})
|
se_threat_tg = esm_df[esm_df.questionnaire_id == 88.].groupby(["device_id", "esm_session"]).mean(numeric_only=True)['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_threat'})
|
||||||
se_challenge_tg = esm_df[esm_df.questionnaire_id == 89.].groupby(["device_id", "esm_session"]).mean()['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_challenge'})
|
se_challenge_tg = esm_df[esm_df.questionnaire_id == 89.].groupby(["device_id", "esm_session"]).mean(numeric_only=True)['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_challenge'})
|
||||||
|
|
||||||
# All relevant features are joined by inner join to remove standalone columns (e.g., stressfulness event target has larger count)
|
# All relevant features are joined by inner join to remove standalone columns (e.g., stressfulness event target has larger count)
|
||||||
extracted_ers = extracted_ers.join(session_start_timestamp, on=['device_id', 'esm_session'], how='inner') \
|
extracted_ers = extracted_ers.join(session_start_timestamp, on=['device_id', 'esm_session'], how='inner') \
|
||||||
|
|
Loading…
Reference in New Issue