Refactor analysis part of snakefile
parent
211aec1234
commit
8aa25144f0
93
Snakefile
93
Snakefile
|
@ -6,6 +6,8 @@ include: "rules/models.snakefile"
|
||||||
include: "rules/reports.snakefile"
|
include: "rules/reports.snakefile"
|
||||||
include: "rules/mystudy.snakefile" # You can add snakfiles with rules tailored to your project
|
include: "rules/mystudy.snakefile" # You can add snakfiles with rules tailored to your project
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
|
||||||
files_to_compute = []
|
files_to_compute = []
|
||||||
|
|
||||||
if len(config["PIDS"]) == 0:
|
if len(config["PIDS"]) == 0:
|
||||||
|
@ -94,6 +96,97 @@ if config["CONVERSATION"]["COMPUTE"]:
|
||||||
# TODO add files_to_compute.extend(optional_conversation_input(None)), the Android or iOS table gets processed depending on each participant
|
# TODO add files_to_compute.extend(optional_conversation_input(None)), the Android or iOS table gets processed depending on each participant
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/conversation_{segment}.csv",pid=config["PIDS"], segment = config["CONVERSATION"]["DAY_SEGMENTS"]))
|
files_to_compute.extend(expand("data/processed/{pid}/conversation_{segment}.csv",pid=config["PIDS"], segment = config["CONVERSATION"]["DAY_SEGMENTS"]))
|
||||||
|
|
||||||
|
if config["PARAMS_FOR_ANALYSIS"]["COMPUTE"]:
|
||||||
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"]
|
||||||
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"]
|
||||||
|
models, scalers, rows_nan_thresholds, cols_nan_thresholds = [], [], [], []
|
||||||
|
for model_name in config["PARAMS_FOR_ANALYSIS"]["MODEL_NAMES"]:
|
||||||
|
models = models + [model_name] * len(config["PARAMS_FOR_ANALYSIS"]["MODEL_SCALER"][model_name]) * len(rows_nan_threshold)
|
||||||
|
scalers = scalers + config["PARAMS_FOR_ANALYSIS"]["MODEL_SCALER"][model_name] * len(rows_nan_threshold)
|
||||||
|
rows_nan_thresholds = rows_nan_thresholds + list(itertools.chain.from_iterable([threshold] * len(config["PARAMS_FOR_ANALYSIS"]["MODEL_SCALER"][model_name]) for threshold in rows_nan_threshold))
|
||||||
|
cols_nan_thresholds = cols_nan_thresholds + list(itertools.chain.from_iterable([threshold] * len(config["PARAMS_FOR_ANALYSIS"]["MODEL_SCALER"][model_name]) for threshold in cols_nan_threshold))
|
||||||
|
results = config["PARAMS_FOR_ANALYSIS"]["RESULT_COMPONENTS"] + ["merged_population_model_results"]
|
||||||
|
|
||||||
|
files_to_compute.extend(expand("data/processed/{pid}/data_for_individual_model/{source}_{day_segment}_original.csv",
|
||||||
|
pid = config["PIDS"],
|
||||||
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]))
|
||||||
|
files_to_compute.extend(expand("data/processed/data_for_population_model/{source}_{day_segment}_original.csv",
|
||||||
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]))
|
||||||
|
files_to_compute.extend(expand(
|
||||||
|
expand("data/processed/{pid}/data_for_individual_model/{{rows_nan_threshold}}|{{cols_nan_threshold}}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_clean.csv",
|
||||||
|
pid = config["PIDS"],
|
||||||
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
||||||
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
||||||
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
||||||
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
||||||
|
zip,
|
||||||
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
||||||
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"]))
|
||||||
|
files_to_compute.extend(expand(
|
||||||
|
expand("data/processed/data_for_population_model/{{rows_nan_threshold}}|{{cols_nan_threshold}}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_clean.csv",
|
||||||
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
||||||
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
||||||
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
||||||
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
||||||
|
zip,
|
||||||
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
||||||
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"]))
|
||||||
|
files_to_compute.extend(expand("data/processed/data_for_population_model/demographic_features.csv"))
|
||||||
|
files_to_compute.extend(expand("data/processed/data_for_population_model/targets_{summarised}.csv",
|
||||||
|
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]))
|
||||||
|
files_to_compute.extend(expand(
|
||||||
|
expand("data/processed/data_for_population_model/{{rows_nan_threshold}}|{{cols_nan_threshold}}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_nancellsratio.csv",
|
||||||
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
||||||
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
||||||
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
||||||
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
|
||||||
|
zip,
|
||||||
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
||||||
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"]))
|
||||||
|
files_to_compute.extend(expand(
|
||||||
|
expand("data/processed/data_for_population_model/{{rows_nan_threshold}}|{{cols_nan_threshold}}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_{summarised}.csv",
|
||||||
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
||||||
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
||||||
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
||||||
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
|
||||||
|
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
|
||||||
|
zip,
|
||||||
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
||||||
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"]))
|
||||||
|
files_to_compute.extend(expand(
|
||||||
|
expand("data/processed/output_population_model/{{rows_nan_threshold}}|{{cols_nan_threshold}}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_{summarised}_{cv_method}_baseline.csv",
|
||||||
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
||||||
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
||||||
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
||||||
|
cv_method = config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"],
|
||||||
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
|
||||||
|
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
|
||||||
|
zip,
|
||||||
|
rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
|
||||||
|
cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"]))
|
||||||
|
files_to_compute.extend(expand(
|
||||||
|
expand("data/processed/output_population_model/{{rows_nan_threshold}}|{{cols_nan_threshold}}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{{model}}/{cv_method}/{source}_{day_segment}_{summarised}_{{scaler}}/{result}.csv",
|
||||||
|
days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
|
||||||
|
days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
|
||||||
|
cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
|
||||||
|
cv_method = config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"],
|
||||||
|
source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
|
||||||
|
day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
|
||||||
|
summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"],
|
||||||
|
result = results),
|
||||||
|
zip,
|
||||||
|
rows_nan_threshold = rows_nan_thresholds,
|
||||||
|
cols_nan_threshold = cols_nan_thresholds,
|
||||||
|
model = models,
|
||||||
|
scaler = scalers))
|
||||||
|
|
||||||
rule all:
|
rule all:
|
||||||
input:
|
input:
|
||||||
files_to_compute
|
files_to_compute
|
||||||
|
|
|
@ -183,6 +183,7 @@ CONVERSATION:
|
||||||
|
|
||||||
### Analysis ################################################################
|
### Analysis ################################################################
|
||||||
PARAMS_FOR_ANALYSIS:
|
PARAMS_FOR_ANALYSIS:
|
||||||
|
COMPUTE: False
|
||||||
GROUNDTRUTH_TABLE: participant_info
|
GROUNDTRUTH_TABLE: participant_info
|
||||||
SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"]
|
SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"]
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
|
@ -206,9 +207,9 @@ PARAMS_FOR_ANALYSIS:
|
||||||
DAYS_AFTER_DISCHARGE: 7
|
DAYS_AFTER_DISCHARGE: 7
|
||||||
|
|
||||||
# Cleaning Parameters
|
# Cleaning Parameters
|
||||||
COLS_NAN_THRESHOLD: 0.5
|
COLS_NAN_THRESHOLD: [0.1, 0.3, 0.5]
|
||||||
COLS_VAR_THRESHOLD: True
|
COLS_VAR_THRESHOLD: True
|
||||||
ROWS_NAN_THRESHOLD: 0.5
|
ROWS_NAN_THRESHOLD: [0.1, 0.3, 0.5]
|
||||||
PARTICIPANT_DAYS_BEFORE_THRESHOLD: 7
|
PARTICIPANT_DAYS_BEFORE_THRESHOLD: 7
|
||||||
PARTICIPANT_DAYS_AFTER_THRESHOLD: 4
|
PARTICIPANT_DAYS_AFTER_THRESHOLD: 4
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue