Add 18 hour daily data and slightly modify jupyter script.

ml_pipeline
Primoz 2022-10-18 10:29:59 +02:00
parent cdff4da930
commit 9f7fa0c8e0
13 changed files with 4002 additions and 1 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -50,7 +50,7 @@ import machine_learning.model
# ## PANAS negative affect
# %% jupyter={"source_hidden": true}
model_input = pd.read_csv("../data/input_PANAS_negative_affect_mean.csv")
model_input = pd.read_csv("../data/daily_18_hours_all_targets/input_PANAS_negative_affect_mean.csv")
# %% jupyter={"source_hidden": true}
index_columns = ["local_segment", "local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]
@ -105,6 +105,9 @@ sum(data_y.isna())
# ### Baseline: Dummy Regression (mean)
dummy_regr = DummyRegressor(strategy="mean")
# %% jupyter={"source_hidden": true}
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
# %% jupyter={"source_hidden": true}
lin_reg_scores = cross_validate(
dummy_regr,