Add age, gender, and language as features.
Move calculation of age from merge_baseline_data.py to baseline_features.py.labels
parent
176367631b
commit
07da6be398
|
@ -634,5 +634,5 @@ PARAMS_FOR_ANALYSIS:
|
|||
results-survey358134_final.csv, # Belgium 1
|
||||
results-survey413767_final.csv # Belgium 2
|
||||
]
|
||||
FEATURES: [age, gender]
|
||||
FEATURES: [age, gender, startlanguage]
|
||||
CATEGORICAL_FEATURES: [gender]
|
||||
|
|
|
@ -4,3 +4,20 @@ pid = snakemake.params["pid"]
|
|||
requested_features = snakemake.params["features"]
|
||||
baseline_features = pd.DataFrame(columns=requested_features)
|
||||
|
||||
participant_info = pd.read_csv(snakemake.input[0], parse_dates=["date_of_birth"])
|
||||
if not participant_info.empty:
|
||||
if "age" in requested_features:
|
||||
now = pd.Timestamp("now")
|
||||
baseline_features.loc[0, "age"] = (
|
||||
now - participant_info.loc[0, "date_of_birth"]
|
||||
).dt.days / 365.25245
|
||||
if "gender" in requested_features:
|
||||
baseline_features.loc[0, "gender"] = participant_info.loc[0, "gender"]
|
||||
if "startlanguage" in requested_features:
|
||||
baseline_features.loc[0, "startlanguage"] = participant_info.loc[
|
||||
0, "startlanguage"
|
||||
]
|
||||
|
||||
baseline_features.to_csv(
|
||||
snakemake.output[0], index=False, encoding="utf-8",
|
||||
)
|
||||
|
|
|
@ -24,10 +24,6 @@ baseline = (
|
|||
)
|
||||
|
||||
baseline.rename(columns=VARIABLES_TO_TRANSLATE, copy=False, inplace=True)
|
||||
now = pd.Timestamp("now")
|
||||
baseline = baseline.assign(
|
||||
age=lambda x: (now - x.date_of_birth).dt.days / 365.25245,
|
||||
)
|
||||
|
||||
baseline.to_csv(snakemake.output[0],
|
||||
index=False,
|
||||
|
|
Loading…
Reference in New Issue