Save results.
parent
ae2d7a038d
commit
71e1fcf8ca
|
@ -9,3 +9,5 @@ __pycache__/
|
||||||
/data/features/
|
/data/features/
|
||||||
/data/baseline/
|
/data/baseline/
|
||||||
/data/*input*.csv
|
/data/*input*.csv
|
||||||
|
/data/daily*
|
||||||
|
/data/intradaily*
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -16,7 +16,6 @@ from sklearn.metrics import mean_squared_error, r2_score
|
||||||
from sklearn.impute import SimpleImputer
|
from sklearn.impute import SimpleImputer
|
||||||
from sklearn.dummy import DummyRegressor
|
from sklearn.dummy import DummyRegressor
|
||||||
from sklearn.decomposition import PCA
|
from sklearn.decomposition import PCA
|
||||||
import xgboost as xg
|
|
||||||
from IPython.core.interactiveshell import InteractiveShell
|
from IPython.core.interactiveshell import InteractiveShell
|
||||||
InteractiveShell.ast_node_interactivity = "all"
|
InteractiveShell.ast_node_interactivity = "all"
|
||||||
|
|
||||||
|
@ -27,7 +26,10 @@ if nb_dir not in sys.path:
|
||||||
import machine_learning.helper
|
import machine_learning.helper
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
csv_name = "./data/daily_18_hours_all_targets/input_JCQ_job_demand_mean.csv"
|
segment = "intradaily_30_min"
|
||||||
|
target = "JCQ_job_demand"
|
||||||
|
csv_name = "./data/" + segment + "_all_targets/input_" + target + "_mean.csv"
|
||||||
|
#csv_name = "./data/daily_18_hours_all_targets/input_JCQ_job_demand_mean.csv"
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
data_x, data_y, data_groups = machine_learning.helper.prepare_model_input(csv_name)
|
data_x, data_y, data_groups = machine_learning.helper.prepare_model_input(csv_name)
|
||||||
|
@ -66,7 +68,9 @@ print("Negative Mean Absolute Error", np.median(lin_reg_scores['test_neg_mean_ab
|
||||||
print("Negative Root Mean Squared Error", np.median(lin_reg_scores['test_neg_root_mean_squared_error']))
|
print("Negative Root Mean Squared Error", np.median(lin_reg_scores['test_neg_root_mean_squared_error']))
|
||||||
print("R2", np.median(lin_reg_scores['test_r2']))
|
print("R2", np.median(lin_reg_scores['test_r2']))
|
||||||
|
|
||||||
|
##################
|
||||||
# %%
|
# %%
|
||||||
|
chosen_model = "Random Forest"
|
||||||
rfr = ensemble.RandomForestRegressor(max_features=0.3, n_jobs=-1)
|
rfr = ensemble.RandomForestRegressor(max_features=0.3, n_jobs=-1)
|
||||||
rfr_score = cross_validate(
|
rfr_score = cross_validate(
|
||||||
rfr,
|
rfr,
|
||||||
|
@ -84,13 +88,25 @@ print("R2", np.median(rfr_score['test_r2']))
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
y_predicted = cross_val_predict(rfr, data_x, data_y, groups=data_groups, cv=logo)
|
y_predicted = cross_val_predict(rfr, data_x, data_y, groups=data_groups, cv=logo)
|
||||||
|
#########################
|
||||||
|
# %%
|
||||||
|
chosen_model = "Bayesian Ridge"
|
||||||
|
bayesian_ridge_reg = linear_model.BayesianRidge()
|
||||||
|
bayesian_ridge_reg_score = cross_validate(
|
||||||
|
bayesian_ridge_reg,
|
||||||
|
X=data_x,
|
||||||
|
y=data_y,
|
||||||
|
groups=data_groups,
|
||||||
|
cv=logo,
|
||||||
|
n_jobs=-1,
|
||||||
|
scoring=('r2', 'neg_mean_squared_error', 'neg_mean_absolute_error', 'neg_root_mean_squared_error')
|
||||||
|
)
|
||||||
|
print("Negative Mean Absolute Error", np.median(bayesian_ridge_reg_score['test_neg_mean_absolute_error']))
|
||||||
|
print("Negative Root Mean Squared Error", np.median(bayesian_ridge_reg_score['test_neg_root_mean_squared_error']))
|
||||||
|
print("R2", np.median(bayesian_ridge_reg_score['test_r2']))
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
g1 = sns.relplot(data=data_y, x="y_true", y="y_predicted")
|
y_predicted = cross_val_predict(bayesian_ridge_reg, data_x, data_y, groups=data_groups, cv=logo)
|
||||||
#g1.set_axis_labels("true", "predicted")
|
|
||||||
g1.set(title="Negative affect, Random Forest")
|
|
||||||
display(g1)
|
|
||||||
g1.savefig("d18NArfr_relplot.pdf")
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
data_y = pd.DataFrame(pd.concat([data_y, data_groups], axis=1))
|
data_y = pd.DataFrame(pd.concat([data_y, data_groups], axis=1))
|
||||||
|
@ -100,6 +116,14 @@ data_y["y_predicted"] = y_predicted
|
||||||
# %%
|
# %%
|
||||||
data_y.head()
|
data_y.head()
|
||||||
|
|
||||||
|
# %%
|
||||||
|
g1 = sns.relplot(data=data_y, x="y_true", y="y_predicted")
|
||||||
|
#g1.set_axis_labels("true", "predicted")
|
||||||
|
#g1.map(plt.axhline, y=0, color=".7", dashes=(2, 1), zorder=0)
|
||||||
|
#g1.map(plt.axline, xy1=(0,0), slope=1)
|
||||||
|
g1.set(title=",".join([segment, target, chosen_model]))
|
||||||
|
display(g1)
|
||||||
|
g1.savefig("_".join([segment, target, chosen_model, "_relplot.pdf"]))
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
data_y_long = pd.wide_to_long(
|
data_y_long = pd.wide_to_long(
|
||||||
|
@ -116,8 +140,8 @@ data_y_long.head()
|
||||||
# %%
|
# %%
|
||||||
g2 = sns.displot(data_y_long, x="y", hue="value", binwidth=0.1, height=5, aspect=1.5)
|
g2 = sns.displot(data_y_long, x="y", hue="value", binwidth=0.1, height=5, aspect=1.5)
|
||||||
sns.move_legend(g2, "upper left", bbox_to_anchor=(.55, .45))
|
sns.move_legend(g2, "upper left", bbox_to_anchor=(.55, .45))
|
||||||
g2.set(title="Negative affect, Random Forest")
|
g2.set(title=",".join([segment, target, chosen_model]))
|
||||||
g2.savefig("d18NArfr_hist.pdf")
|
g2.savefig("_".join([segment, target, chosen_model, "hist.pdf"]))
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
pca = PCA(n_components=2)
|
pca = PCA(n_components=2)
|
||||||
|
@ -133,6 +157,7 @@ data_pca
|
||||||
# %%
|
# %%
|
||||||
|
|
||||||
g3 = sns.relplot(data = data_pca, x = "pca_0", y = "pca_1", hue = "y_true", palette = sns.color_palette("Spectral", as_cmap=True))
|
g3 = sns.relplot(data = data_pca, x = "pca_0", y = "pca_1", hue = "y_true", palette = sns.color_palette("Spectral", as_cmap=True))
|
||||||
g3.savefig("d18NArfr_PCA.pdf")
|
g3.set(title=",".join([segment, target, chosen_model]) + "\n variance explained = " + str(round(sum(pca.explained_variance_ratio_), 2)))
|
||||||
|
g3.savefig("_".join([segment, target, chosen_model, "_PCA.pdf"]))
|
||||||
|
|
||||||
# %%
|
# %%
|
Loading…
Reference in New Issue