diff --git a/.gitignore b/.gitignore index 0275f1e..71f7604 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ __pycache__/ /data/features/ /data/baseline/ /data/*input*.csv +/data/daily* +/data/intradaily* diff --git a/machine_learning/final_results/d18NArfr_PCA.pdf b/machine_learning/final_results/d18NArfr_PCA.pdf new file mode 100644 index 0000000..17fa422 Binary files /dev/null and b/machine_learning/final_results/d18NArfr_PCA.pdf differ diff --git a/machine_learning/final_results/d18NArfr_hist.pdf b/machine_learning/final_results/d18NArfr_hist.pdf new file mode 100644 index 0000000..e4122d6 Binary files /dev/null and b/machine_learning/final_results/d18NArfr_hist.pdf differ diff --git a/machine_learning/final_results/d18NArfr_relplot.pdf b/machine_learning/final_results/d18NArfr_relplot.pdf new file mode 100644 index 0000000..b61986d Binary files /dev/null and b/machine_learning/final_results/d18NArfr_relplot.pdf differ diff --git a/machine_learning/final_results/d18demandBayRidge_PCA.pdf b/machine_learning/final_results/d18demandBayRidge_PCA.pdf new file mode 100644 index 0000000..518ee4e Binary files /dev/null and b/machine_learning/final_results/d18demandBayRidge_PCA.pdf differ diff --git a/machine_learning/final_results/d18demandBayRidge_relplot.pdf b/machine_learning/final_results/d18demandBayRidge_relplot.pdf new file mode 100644 index 0000000..355ade6 Binary files /dev/null and b/machine_learning/final_results/d18demandBayRidge_relplot.pdf differ diff --git a/machine_learning/final_results/d18demandBayridge_hist.pdf b/machine_learning/final_results/d18demandBayridge_hist.pdf new file mode 100644 index 0000000..1f2c1b8 Binary files /dev/null and b/machine_learning/final_results/d18demandBayridge_hist.pdf differ diff --git a/machine_learning/final_results/daily_24_hours_JCQ_job_demand_Bayesian Ridge__PCA.pdf b/machine_learning/final_results/daily_24_hours_JCQ_job_demand_Bayesian Ridge__PCA.pdf new file mode 100644 index 0000000..798e89e Binary files /dev/null and b/machine_learning/final_results/daily_24_hours_JCQ_job_demand_Bayesian Ridge__PCA.pdf differ diff --git a/machine_learning/final_results/daily_24_hours_JCQ_job_demand_Bayesian Ridge__relplot.pdf b/machine_learning/final_results/daily_24_hours_JCQ_job_demand_Bayesian Ridge__relplot.pdf new file mode 100644 index 0000000..f8d7b71 Binary files /dev/null and b/machine_learning/final_results/daily_24_hours_JCQ_job_demand_Bayesian Ridge__relplot.pdf differ diff --git a/machine_learning/final_results/daily_24_hours_JCQ_job_demand_Bayesian Ridge_hist.pdf b/machine_learning/final_results/daily_24_hours_JCQ_job_demand_Bayesian Ridge_hist.pdf new file mode 100644 index 0000000..df53cca Binary files /dev/null and b/machine_learning/final_results/daily_24_hours_JCQ_job_demand_Bayesian Ridge_hist.pdf differ diff --git a/machine_learning/final_results/daily_24_hours_PANAS_negative_affect_Bayesian Ridge__PCA.pdf b/machine_learning/final_results/daily_24_hours_PANAS_negative_affect_Bayesian Ridge__PCA.pdf new file mode 100644 index 0000000..a82dfb5 Binary files /dev/null and b/machine_learning/final_results/daily_24_hours_PANAS_negative_affect_Bayesian Ridge__PCA.pdf differ diff --git a/machine_learning/final_results/daily_24_hours_PANAS_negative_affect_Bayesian Ridge__relplot.pdf b/machine_learning/final_results/daily_24_hours_PANAS_negative_affect_Bayesian Ridge__relplot.pdf new file mode 100644 index 0000000..be248dc Binary files /dev/null and b/machine_learning/final_results/daily_24_hours_PANAS_negative_affect_Bayesian Ridge__relplot.pdf differ diff --git a/machine_learning/final_results/daily_24_hours_PANAS_negative_affect_Bayesian Ridge_hist.pdf b/machine_learning/final_results/daily_24_hours_PANAS_negative_affect_Bayesian Ridge_hist.pdf new file mode 100644 index 0000000..07879e9 Binary files /dev/null and b/machine_learning/final_results/daily_24_hours_PANAS_negative_affect_Bayesian Ridge_hist.pdf differ diff --git a/machine_learning/final_results/intradaily_30_min_JCQ_job_demand_Bayesian Ridge__PCA.pdf b/machine_learning/final_results/intradaily_30_min_JCQ_job_demand_Bayesian Ridge__PCA.pdf new file mode 100644 index 0000000..7c0f4fb Binary files /dev/null and b/machine_learning/final_results/intradaily_30_min_JCQ_job_demand_Bayesian Ridge__PCA.pdf differ diff --git a/machine_learning/final_results/intradaily_30_min_JCQ_job_demand_Bayesian Ridge__relplot.pdf b/machine_learning/final_results/intradaily_30_min_JCQ_job_demand_Bayesian Ridge__relplot.pdf new file mode 100644 index 0000000..0058145 Binary files /dev/null and b/machine_learning/final_results/intradaily_30_min_JCQ_job_demand_Bayesian Ridge__relplot.pdf differ diff --git a/machine_learning/final_results/intradaily_30_min_JCQ_job_demand_Bayesian Ridge_hist.pdf b/machine_learning/final_results/intradaily_30_min_JCQ_job_demand_Bayesian Ridge_hist.pdf new file mode 100644 index 0000000..232f19d Binary files /dev/null and b/machine_learning/final_results/intradaily_30_min_JCQ_job_demand_Bayesian Ridge_hist.pdf differ diff --git a/machine_learning/prox_comm_PANAS_nb.ipynb b/machine_learning/final_results/prox_comm_PANAS_nb.ipynb similarity index 100% rename from machine_learning/prox_comm_PANAS_nb.ipynb rename to machine_learning/final_results/prox_comm_PANAS_nb.ipynb diff --git a/machine_learning/results_presentation.ipynb b/machine_learning/final_results/results_presentation.ipynb similarity index 69% rename from machine_learning/results_presentation.ipynb rename to machine_learning/final_results/results_presentation.ipynb index 0a7f78f..5ad20fe 100644 --- a/machine_learning/results_presentation.ipynb +++ b/machine_learning/final_results/results_presentation.ipynb @@ -16,7 +16,6 @@ from sklearn.metrics import mean_squared_error, r2_score from sklearn.impute import SimpleImputer from sklearn.dummy import DummyRegressor from sklearn.decomposition import PCA -import xgboost as xg from IPython.core.interactiveshell import InteractiveShell InteractiveShell.ast_node_interactivity = "all" @@ -27,7 +26,10 @@ if nb_dir not in sys.path: import machine_learning.helper # %% -csv_name = "./data/daily_18_hours_all_targets/input_JCQ_job_demand_mean.csv" +segment = "intradaily_30_min" +target = "JCQ_job_demand" +csv_name = "./data/" + segment + "_all_targets/input_" + target + "_mean.csv" +#csv_name = "./data/daily_18_hours_all_targets/input_JCQ_job_demand_mean.csv" # %% data_x, data_y, data_groups = machine_learning.helper.prepare_model_input(csv_name) @@ -66,7 +68,9 @@ print("Negative Mean Absolute Error", np.median(lin_reg_scores['test_neg_mean_ab print("Negative Root Mean Squared Error", np.median(lin_reg_scores['test_neg_root_mean_squared_error'])) print("R2", np.median(lin_reg_scores['test_r2'])) +################## # %% +chosen_model = "Random Forest" rfr = ensemble.RandomForestRegressor(max_features=0.3, n_jobs=-1) rfr_score = cross_validate( rfr, @@ -84,13 +88,25 @@ print("R2", np.median(rfr_score['test_r2'])) # %% y_predicted = cross_val_predict(rfr, data_x, data_y, groups=data_groups, cv=logo) +######################### +# %% +chosen_model = "Bayesian Ridge" +bayesian_ridge_reg = linear_model.BayesianRidge() +bayesian_ridge_reg_score = cross_validate( + bayesian_ridge_reg, + X=data_x, + y=data_y, + groups=data_groups, + cv=logo, + n_jobs=-1, + scoring=('r2', 'neg_mean_squared_error', 'neg_mean_absolute_error', 'neg_root_mean_squared_error') +) +print("Negative Mean Absolute Error", np.median(bayesian_ridge_reg_score['test_neg_mean_absolute_error'])) +print("Negative Root Mean Squared Error", np.median(bayesian_ridge_reg_score['test_neg_root_mean_squared_error'])) +print("R2", np.median(bayesian_ridge_reg_score['test_r2'])) # %% -g1 = sns.relplot(data=data_y, x="y_true", y="y_predicted") -#g1.set_axis_labels("true", "predicted") -g1.set(title="Negative affect, Random Forest") -display(g1) -g1.savefig("d18NArfr_relplot.pdf") +y_predicted = cross_val_predict(bayesian_ridge_reg, data_x, data_y, groups=data_groups, cv=logo) # %% data_y = pd.DataFrame(pd.concat([data_y, data_groups], axis=1)) @@ -100,6 +116,14 @@ data_y["y_predicted"] = y_predicted # %% data_y.head() +# %% +g1 = sns.relplot(data=data_y, x="y_true", y="y_predicted") +#g1.set_axis_labels("true", "predicted") +#g1.map(plt.axhline, y=0, color=".7", dashes=(2, 1), zorder=0) +#g1.map(plt.axline, xy1=(0,0), slope=1) +g1.set(title=",".join([segment, target, chosen_model])) +display(g1) +g1.savefig("_".join([segment, target, chosen_model, "_relplot.pdf"])) # %% data_y_long = pd.wide_to_long( @@ -116,8 +140,8 @@ data_y_long.head() # %% g2 = sns.displot(data_y_long, x="y", hue="value", binwidth=0.1, height=5, aspect=1.5) sns.move_legend(g2, "upper left", bbox_to_anchor=(.55, .45)) -g2.set(title="Negative affect, Random Forest") -g2.savefig("d18NArfr_hist.pdf") +g2.set(title=",".join([segment, target, chosen_model])) +g2.savefig("_".join([segment, target, chosen_model, "hist.pdf"])) # %% pca = PCA(n_components=2) @@ -133,6 +157,7 @@ data_pca # %% g3 = sns.relplot(data = data_pca, x = "pca_0", y = "pca_1", hue = "y_true", palette = sns.color_palette("Spectral", as_cmap=True)) -g3.savefig("d18NArfr_PCA.pdf") +g3.set(title=",".join([segment, target, chosen_model]) + "\n variance explained = " + str(round(sum(pca.explained_variance_ratio_), 2))) +g3.savefig("_".join([segment, target, chosen_model, "_PCA.pdf"])) # %%