Try a different approach for preprocessing ESMs.
It is important that this follows generic RAPIDS pattern. In the subsequent step of calculating features, there is an expected file and folder structure of data/interim. See rules/common.smk/find_features_files()labels
parent
ed298a9479
commit
99245afca3
|
@ -168,8 +168,7 @@ for provider in config["PHONE_ESM"]["PROVIDERS"].keys():
|
|||
if config["PHONE_ESM"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_esm_raw.csv",pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_esm_with_datetime.csv",pid=config["PIDS"]))
|
||||
for scale in config["PHONE_ESM"]["PROVIDERS"][provider]["SCALES"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_esm_{scale}_clean.csv",pid=config["PIDS"],scale=scale))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_esm_features/phone_esm_clean_{provider_key}.csv",pid=config["PIDS"],provider_key=provider.lower()))
|
||||
#files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv",pid=config["PIDS"]))
|
||||
#files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||
|
||||
|
|
|
@ -327,8 +327,10 @@ rule conversation_r_features:
|
|||
rule preprocess_esm:
|
||||
input: "data/raw/{pid}/phone_esm_with_datetime.csv"
|
||||
params:
|
||||
questionnaire_name = "{scale}"
|
||||
output: "data/interim/{pid}/phone_esm_{scale}_clean.csv"
|
||||
provider=lambda wildcards: config["PHONE_ESM"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key="{provider_key}",
|
||||
sensor_key="phone_esm"
|
||||
output: "data/interim/{pid}/phone_esm_features/phone_esm_clean_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/phone_esm/straw/preprocess.py"
|
||||
|
||||
|
|
|
@ -1,21 +1,24 @@
|
|||
from esm_preprocess import *
|
||||
from esm_JCQ import reverse_jcq_demand_control_scoring
|
||||
|
||||
questionnaire_name = snakemake.params["questionnaire_name"]
|
||||
requested_scales = provider["SCALES"]
|
||||
|
||||
df_esm = pd.read_csv(snakemake.input[0])
|
||||
df_esm_preprocessed = preprocess_esm(df_esm)
|
||||
|
||||
try:
|
||||
questionnaire_id = QUESTIONNAIRE_IDS[questionnaire_name]
|
||||
except ValueError:
|
||||
if not all([scale in QUESTIONNAIRE_IDS for scale in requested_scales]):
|
||||
unknown_scales = set(requested_scales) - set(QUESTIONNAIRE_IDS.keys())
|
||||
print("The requested questionnaire name should be one of the following:")
|
||||
print(QUESTIONNAIRE_IDS.keys())
|
||||
else:
|
||||
df_esm_selected = df_esm_preprocessed[df_esm_preprocessed["questionnaire_id"] == questionnaire_id]
|
||||
df_esm_clean = clean_up_esm(df_esm_selected)
|
||||
if questionnaire_name.startswith("JCQ"):
|
||||
df_esm_reversed = reverse_jcq_demand_control_scoring(df_esm_clean)
|
||||
df_esm_reversed.to_csv(snakemake.output[0])
|
||||
else:
|
||||
df_esm_clean.to_csv(snakemake.output[0])
|
||||
raise ValueError("You requested scales not collected: ", unknown_scales)
|
||||
|
||||
df_esm_clean = clean_up_esm(df_esm_preprocessed)
|
||||
df_esm_clean["esm_user_score"] = df_esm_clean["esm_user_answer_numeric"]
|
||||
|
||||
for scale in requested_scales:
|
||||
questionnaire_id = QUESTIONNAIRE_IDS[scale]
|
||||
mask = df_esm_clean["questionnaire_id"] == questionnaire_id
|
||||
if scale.startswith("JCQ"):
|
||||
df_esm_clean.loc[mask] = reverse_jcq_demand_control_scoring(df_esm_clean.loc[mask])
|
||||
|
||||
df_esm_clean.to_csv(snakemake.output[0])
|
||||
|
|
Loading…
Reference in New Issue