rapids/rules/models.smk

rule download_demographic_data:
    input:
        participant_file = "data/external/participant_files/{pid}.yaml",
        data = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CONTAINER"]
    output:
        "data/raw/{pid}/participant_info_raw.csv"
    script:
        "../src/data/workflow_example/download_demographic_data.R"

rule demographic_features:
    input:
        participant_info = "data/raw/{pid}/participant_info_raw.csv"
    params:
        pid = "{pid}",
        features = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FEATURES"]
    output:
        "data/processed/features/{pid}/demographic_features.csv"
    script:
        "../src/features/workflow_example/demographic_features.py"

rule download_target_data:
    input:
        participant_file = "data/external/participant_files/{pid}.yaml",
        data = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["TARGET"]["CONTAINER"]
    output:
        "data/raw/{pid}/participant_target_raw.csv"
    script:
        "../src/data/workflow_example/download_target_data.R"

rule target_readable_datetime:
    input:
        sensor_input = "data/raw/{pid}/participant_target_raw.csv",
        time_segments = "data/interim/time_segments/{pid}_time_segments.csv",
        pid_file = "data/external/participant_files/{pid}.yaml",
        tzcodes_file = input_tzcodes_file,
    params:
        device_type = "fitbit",
        timezone_parameters = config["TIMEZONE"],
        pid = "{pid}",
        time_segments_type = config["TIME_SEGMENTS"]["TYPE"],
        include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
    output:
        "data/raw/{pid}/participant_target_with_datetime.csv"
    script:
        "../src/data/datetime/readable_datetime.R"

rule parse_targets:
    input:
        targets = "data/raw/{pid}/participant_target_with_datetime.csv",
        time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
    output:
        "data/processed/targets/{pid}/parsed_targets.csv"
    script:
        "../src/models/workflow_example/parse_targets.py"

rule merge_features_and_targets_for_individual_model:
    input:
        cleaned_sensor_features = "data/processed/features/{pid}/all_sensor_features_cleaned_rapids.csv",
        targets = "data/processed/targets/{pid}/parsed_targets.csv",
    output:
        "data/processed/models/individual_model/{pid}/input.csv"
    script:
        "../src/models/workflow_example/merge_features_and_targets_for_individual_model.py"

rule merge_features_and_targets_for_population_model:
    input:
        cleaned_sensor_features = "data/processed/features/all_participants/all_sensor_features_cleaned_rapids.csv",
        demographic_features = expand("data/processed/features/{pid}/demographic_features.csv", pid=config["PIDS"]),
        targets = expand("data/processed/targets/{pid}/parsed_targets.csv", pid=config["PIDS"]),
    output:
        "data/processed/models/population_model/input.csv"
    script:
        "../src/models/workflow_example/merge_features_and_targets_for_population_model.py"

rule baselines_for_individual_model:
    input:
        "data/processed/models/individual_model/{pid}/input.csv"
    params:
        cv_method = "{cv_method}",
        colnames_demographic_features = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FEATURES"],
    output:
        "data/processed/models/individual_model/{pid}/output_{cv_method}/baselines.csv"
    log:
        "data/processed/models/individual_model/{pid}/output_{cv_method}/baselines_notes.log"
    script:
        "../src/models/workflow_example/baselines.py"

rule baselines_for_population_model:
    input:
        "data/processed/models/population_model/input.csv"
    params:
        cv_method = "{cv_method}",
        colnames_demographic_features = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FEATURES"],
    output:
        "data/processed/models/population_model/output_{cv_method}/baselines.csv"
    log:
        "data/processed/models/population_model/output_{cv_method}/baselines_notes.log"
    script:
        "../src/models/workflow_example/baselines.py"

rule modelling_for_individual_participants:
    input:
        data = "data/processed/models/individual_model/{pid}/input.csv"
    params:
        model = "{model}",
        cv_method = "{cv_method}",
        scaler = "{scaler}",
        categorical_operators = config["PARAMS_FOR_ANALYSIS"]["CATEGORICAL_OPERATORS"],
        categorical_demographic_features = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CATEGORICAL_FEATURES"],
        model_hyperparams = config["PARAMS_FOR_ANALYSIS"]["MODEL_HYPERPARAMS"],
    output:
        fold_predictions = "data/processed/models/individual_model/{pid}/output_{cv_method}/{model}/{scaler}/fold_predictions.csv",
        fold_metrics = "data/processed/models/individual_model/{pid}/output_{cv_method}/{model}/{scaler}/fold_metrics.csv",
        overall_results = "data/processed/models/individual_model/{pid}/output_{cv_method}/{model}/{scaler}/overall_results.csv",
        fold_feature_importances = "data/processed/models/individual_model/{pid}/output_{cv_method}/{model}/{scaler}/fold_feature_importances.csv"
    log:
        "data/processed/models/individual_model/{pid}/output_{cv_method}/{model}/{scaler}/notes.log"
    script:
        "../src/models/workflow_example/modelling.py"

rule modelling_for_all_participants:
    input:
        data = "data/processed/models/population_model/input.csv"
    params:
        model = "{model}",
        cv_method = "{cv_method}",
        scaler = "{scaler}",
        categorical_operators = config["PARAMS_FOR_ANALYSIS"]["CATEGORICAL_OPERATORS"],
        categorical_demographic_features = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CATEGORICAL_FEATURES"],
        model_hyperparams = config["PARAMS_FOR_ANALYSIS"]["MODEL_HYPERPARAMS"],
    output:
        fold_predictions = "data/processed/models/population_model/output_{cv_method}/{model}/{scaler}/fold_predictions.csv",
        fold_metrics = "data/processed/models/population_model/output_{cv_method}/{model}/{scaler}/fold_metrics.csv",
        overall_results = "data/processed/models/population_model/output_{cv_method}/{model}/{scaler}/overall_results.csv",
        fold_feature_importances = "data/processed/models/population_model/output_{cv_method}/{model}/{scaler}/fold_feature_importances.csv"
    log:
        "data/processed/models/population_model/output_{cv_method}/{model}/{scaler}/notes.log"
    script:
        "../src/models/workflow_example/modelling.py"
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`rule download_demographic_data:`
Delete mystudy.snakefile and add all rules to models.snakefile 2020-07-29 21:25:26 +02:00			`input:`
Migrate analysis example to new data stream 2021-03-13 01:52:34 +01:00			`participant_file = "data/external/participant_files/{pid}.yaml",`
			`data = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CONTAINER"]`
Delete mystudy.snakefile and add all rules to models.snakefile 2020-07-29 21:25:26 +02:00			`output:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/raw/{pid}/participant_info_raw.csv"`
Delete mystudy.snakefile and add all rules to models.snakefile 2020-07-29 21:25:26 +02:00			`script:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"../src/data/workflow_example/download_demographic_data.R"`
Delete mystudy.snakefile and add all rules to models.snakefile 2020-07-29 21:25:26 +02:00
			`rule demographic_features:`
			`input:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`participant_info = "data/raw/{pid}/participant_info_raw.csv"`
Delete mystudy.snakefile and add all rules to models.snakefile 2020-07-29 21:25:26 +02:00			`params:`
			`pid = "{pid}",`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`features = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FEATURES"]`
Delete mystudy.snakefile and add all rules to models.snakefile 2020-07-29 21:25:26 +02:00			`output:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/processed/features/{pid}/demographic_features.csv"`
Delete mystudy.snakefile and add all rules to models.snakefile 2020-07-29 21:25:26 +02:00			`script:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"../src/features/workflow_example/demographic_features.py"`
Delete mystudy.snakefile and add all rules to models.snakefile 2020-07-29 21:25:26 +02:00
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`rule download_target_data:`
Add merge metrics module for analysis rules 2020-03-09 18:32:14 +01:00			`input:`
Migrate analysis example to new data stream 2021-03-13 01:52:34 +01:00			`participant_file = "data/external/participant_files/{pid}.yaml",`
			`data = config["PARAMS_FOR_ANALYSIS"]["TARGET"]["FOLDER"] + "/" + config["PARAMS_FOR_ANALYSIS"]["TARGET"]["CONTAINER"]`
Add merge metrics module for analysis rules 2020-03-09 18:32:14 +01:00			`output:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/raw/{pid}/participant_target_raw.csv"`
Add merge metrics module for analysis rules 2020-03-09 18:32:14 +01:00			`script:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"../src/data/workflow_example/download_target_data.R"`
Add merge metrics module for analysis rules 2020-03-09 18:32:14 +01:00
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`rule target_readable_datetime:`
Add merge module for demographic features and target 2020-04-16 20:20:16 +02:00			`input:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`sensor_input = "data/raw/{pid}/participant_target_raw.csv",`
Migrate analysis example to new data stream 2021-03-13 01:52:34 +01:00			`time_segments = "data/interim/time_segments/{pid}_time_segments.csv",`
			`pid_file = "data/external/participant_files/{pid}.yaml",`
			`tzcodes_file = input_tzcodes_file,`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`params:`
Migrate analysis example to new data stream 2021-03-13 01:52:34 +01:00			`device_type = "fitbit",`
			`timezone_parameters = config["TIMEZONE"],`
			`pid = "{pid}",`
Refactor day segments to time segments 2020-12-03 00:41:03 +01:00			`time_segments_type = config["TIME_SEGMENTS"]["TYPE"],`
			`include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]`
Add merge module for demographic features and target 2020-04-16 20:20:16 +02:00			`output:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/raw/{pid}/participant_target_with_datetime.csv"`
Add merge module for demographic features and target 2020-04-16 20:20:16 +02:00			`script:`
Migrate analysis example to new data stream 2021-03-13 01:52:34 +01:00			`"../src/data/datetime/readable_datetime.R"`
Add merge module for demographic features and target 2020-04-16 20:20:16 +02:00
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`rule parse_targets:`
Add merge module for demographic features and target 2020-04-16 20:20:16 +02:00			`input:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`targets = "data/raw/{pid}/participant_target_with_datetime.csv",`
Refactor day segments to time segments 2020-12-03 00:41:03 +01:00			`time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"`
Add merge module for demographic features and target 2020-04-16 20:20:16 +02:00			`output:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/processed/targets/{pid}/parsed_targets.csv"`
Add merge module for demographic features and target 2020-04-16 20:20:16 +02:00			`script:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"../src/models/workflow_example/parse_targets.py"`
Add merge module for demographic features and target 2020-04-16 20:20:16 +02:00
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`rule merge_features_and_targets_for_individual_model:`
Refactor select_days_to_analyse, fix merge bugs, add clean metrics for model 2020-03-18 02:15:53 +01:00			`input:`
Update example workflow and docs 2021-11-02 20:26:01 +01:00			`cleaned_sensor_features = "data/processed/features/{pid}/all_sensor_features_cleaned_rapids.csv",`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`targets = "data/processed/targets/{pid}/parsed_targets.csv",`
Refactor select_days_to_analyse, fix merge bugs, add clean metrics for model 2020-03-18 02:15:53 +01:00			`output:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/processed/models/individual_model/{pid}/input.csv"`
Refactor select_days_to_analyse, fix merge bugs, add clean metrics for model 2020-03-18 02:15:53 +01:00			`script:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"../src/models/workflow_example/merge_features_and_targets_for_individual_model.py"`
Refactor select_days_to_analyse, fix merge bugs, add clean metrics for model 2020-03-18 02:15:53 +01:00
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`rule merge_features_and_targets_for_population_model:`
Add one rule to calculate the ratio of cells with missing values for cleaned features 2020-05-16 00:25:07 +02:00			`input:`
Update example workflow and docs 2021-11-02 20:26:01 +01:00			`cleaned_sensor_features = "data/processed/features/all_participants/all_sensor_features_cleaned_rapids.csv",`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`demographic_features = expand("data/processed/features/{pid}/demographic_features.csv", pid=config["PIDS"]),`
			`targets = expand("data/processed/targets/{pid}/parsed_targets.csv", pid=config["PIDS"]),`
Add one rule to calculate the ratio of cells with missing values for cleaned features 2020-05-16 00:25:07 +02:00			`output:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/processed/models/population_model/input.csv"`
Add one rule to calculate the ratio of cells with missing values for cleaned features 2020-05-16 00:25:07 +02:00			`script:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"../src/models/workflow_example/merge_features_and_targets_for_population_model.py"`

			`rule baselines_for_individual_model:`
Add modeling module 2020-04-30 00:53:54 +02:00			`input:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/processed/models/individual_model/{pid}/input.csv"`
Split modeling module into two rules; Add RandomOverSampler for resampling; Add log; Fix bug of AUC 2020-05-16 00:42:03 +02:00			`params:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`cv_method = "{cv_method}",`
			`colnames_demographic_features = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FEATURES"],`
Split modeling module into two rules; Add RandomOverSampler for resampling; Add log; Fix bug of AUC 2020-05-16 00:42:03 +02:00			`output:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/processed/models/individual_model/{pid}/output_{cv_method}/baselines.csv"`
			`log:`
			`"data/processed/models/individual_model/{pid}/output_{cv_method}/baselines_notes.log"`
Split modeling module into two rules; Add RandomOverSampler for resampling; Add log; Fix bug of AUC 2020-05-16 00:42:03 +02:00			`script:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"../src/models/workflow_example/baselines.py"`

			`rule baselines_for_population_model:`
Add baseline 2020-05-16 00:45:45 +02:00			`input:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/processed/models/population_model/input.csv"`
Add baseline 2020-05-16 00:45:45 +02:00			`params:`
			`cv_method = "{cv_method}",`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`colnames_demographic_features = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["FEATURES"],`
Add baseline 2020-05-16 00:45:45 +02:00			`output:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/processed/models/population_model/output_{cv_method}/baselines.csv"`
Add baseline 2020-05-16 00:45:45 +02:00			`log:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/processed/models/population_model/output_{cv_method}/baselines_notes.log"`
Add baseline 2020-05-16 00:45:45 +02:00			`script:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"../src/models/workflow_example/baselines.py"`

Rename modeling.py to modelling.py & Update example_config.yaml 2020-11-26 04:35:38 +01:00			`rule modelling_for_individual_participants:`
Split modeling module into two rules; Add RandomOverSampler for resampling; Add log; Fix bug of AUC 2020-05-16 00:42:03 +02:00			`input:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`data = "data/processed/models/individual_model/{pid}/input.csv"`
Add modeling module 2020-04-30 00:53:54 +02:00			`params:`
			`model = "{model}",`
			`cv_method = "{cv_method}",`
			`scaler = "{scaler}",`
			`categorical_operators = config["PARAMS_FOR_ANALYSIS"]["CATEGORICAL_OPERATORS"],`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`categorical_demographic_features = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CATEGORICAL_FEATURES"],`
Add modeling module 2020-04-30 00:53:54 +02:00			`model_hyperparams = config["PARAMS_FOR_ANALYSIS"]["MODEL_HYPERPARAMS"],`
			`output:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`fold_predictions = "data/processed/models/individual_model/{pid}/output_{cv_method}/{model}/{scaler}/fold_predictions.csv",`
			`fold_metrics = "data/processed/models/individual_model/{pid}/output_{cv_method}/{model}/{scaler}/fold_metrics.csv",`
			`overall_results = "data/processed/models/individual_model/{pid}/output_{cv_method}/{model}/{scaler}/overall_results.csv",`
			`fold_feature_importances = "data/processed/models/individual_model/{pid}/output_{cv_method}/{model}/{scaler}/fold_feature_importances.csv"`
Split modeling module into two rules; Add RandomOverSampler for resampling; Add log; Fix bug of AUC 2020-05-16 00:42:03 +02:00			`log:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`"data/processed/models/individual_model/{pid}/output_{cv_method}/{model}/{scaler}/notes.log"`
Add modeling module 2020-04-30 00:53:54 +02:00			`script:`
Rename modeling.py to modelling.py & Update example_config.yaml 2020-11-26 04:35:38 +01:00			`"../src/models/workflow_example/modelling.py"`
Add the rule to merge population model results 2020-05-16 00:49:14 +02:00
Rename modeling.py to modelling.py & Update example_config.yaml 2020-11-26 04:35:38 +01:00			`rule modelling_for_all_participants:`
Add the rule to merge population model results 2020-05-16 00:49:14 +02:00			`input:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`data = "data/processed/models/population_model/input.csv"`
			`params:`
			`model = "{model}",`
			`cv_method = "{cv_method}",`
			`scaler = "{scaler}",`
			`categorical_operators = config["PARAMS_FOR_ANALYSIS"]["CATEGORICAL_OPERATORS"],`
			`categorical_demographic_features = config["PARAMS_FOR_ANALYSIS"]["DEMOGRAPHIC"]["CATEGORICAL_FEATURES"],`
			`model_hyperparams = config["PARAMS_FOR_ANALYSIS"]["MODEL_HYPERPARAMS"],`
Add the rule to merge population model results 2020-05-16 00:49:14 +02:00			`output:`
Add analysis example workflow 2020-11-25 22:34:05 +01:00			`fold_predictions = "data/processed/models/population_model/output_{cv_method}/{model}/{scaler}/fold_predictions.csv",`
			`fold_metrics = "data/processed/models/population_model/output_{cv_method}/{model}/{scaler}/fold_metrics.csv",`
			`overall_results = "data/processed/models/population_model/output_{cv_method}/{model}/{scaler}/overall_results.csv",`
			`fold_feature_importances = "data/processed/models/population_model/output_{cv_method}/{model}/{scaler}/fold_feature_importances.csv"`
			`log:`
			`"data/processed/models/population_model/output_{cv_method}/{model}/{scaler}/notes.log"`
Add the rule to merge population model results 2020-05-16 00:49:14 +02:00			`script:`
Rename modeling.py to modelling.py & Update example_config.yaml 2020-11-26 04:35:38 +01:00			`"../src/models/workflow_example/modelling.py"`