Extract one step of preparation into a separate function.

master
junos 2023-05-10 15:28:09 +02:00
parent caeaf03239
commit 24744c288d
1 changed files with 15 additions and 8 deletions

View File

@ -73,7 +73,7 @@ def insert_row(df, row):
return pd.concat([df, pd.DataFrame([row], columns=df.columns)], ignore_index=True) return pd.concat([df, pd.DataFrame([row], columns=df.columns)], ignore_index=True)
def prepare_regression_model_input(model_input, cv_method="logo"): def prepare_sklearn_data_format(model_input, cv_method="logo"):
index_columns = [ index_columns = [
"local_segment", "local_segment",
"local_segment_label", "local_segment_label",
@ -82,13 +82,7 @@ def prepare_regression_model_input(model_input, cv_method="logo"):
] ]
model_input.set_index(index_columns, inplace=True) model_input.set_index(index_columns, inplace=True)
if cv_method == "logo": if cv_method == "half_logo":
data_x, data_y, data_groups = (
model_input.drop(["target", "pid"], axis=1),
model_input["target"],
model_input["pid"],
)
else:
model_input["pid_index"] = model_input.groupby("pid").cumcount() model_input["pid_index"] = model_input.groupby("pid").cumcount()
model_input["pid_count"] = model_input.groupby("pid")["pid"].transform("count") model_input["pid_count"] = model_input.groupby("pid")["pid"].transform("count")
@ -104,6 +98,19 @@ def prepare_regression_model_input(model_input, cv_method="logo"):
model_input["target"], model_input["target"],
model_input["pid_half"], model_input["pid_half"],
) )
else:
data_x, data_y, data_groups = (
model_input.drop(["target", "pid"], axis=1),
model_input["target"],
model_input["pid"],
)
return data_x, data_y, data_groups
def prepare_regression_model_input(model_input, cv_method="logo"):
data_x, data_y, data_groups = prepare_sklearn_data_format(
model_input, cv_method=cv_method
)
categorical_feature_colnames = [ categorical_feature_colnames = [
"gender", "gender",