diff --git a/Snakefile b/Snakefile index f7873960..115fb029 100644 --- a/Snakefile +++ b/Snakefile @@ -1,9 +1,10 @@ configfile: "config.yaml" -include: "rules/renv.snakefile" -include: "rules/preprocessing.snakefile" -include: "rules/features.snakefile" -include: "rules/models.snakefile" -include: "rules/reports.snakefile" +include: "rules/common.smk" +include: "rules/renv.smk" +include: "rules/preprocessing.smk" +include: "rules/features.smk" +include: "rules/models.smk" +include: "rules/reports.smk" import itertools diff --git a/rules/common.smk b/rules/common.smk new file mode 100644 index 00000000..bebbac44 --- /dev/null +++ b/rules/common.smk @@ -0,0 +1,113 @@ +# Common.smk ########################################################################################################## + +def infer_participant_platform(participant_file): + with open(participant_file, encoding="ISO-8859-1") as external_file: + external_file_content = external_file.readlines() + platforms = external_file_content[1].strip().split(",") + if platforms[0] == "multiple" or (len(platforms) > 1 and "android" in platforms and "ios" in platforms): + platform = "android" + else: + platform = platforms[0] + + if platform not in ["android", "ios"]: + raise ValueError("Platform (line 2) in a participant file should be 'android', 'ios', or 'multiple'. You typed '" + platforms + "'") + + return platform + +# Preprocessing.smk #################################################################################################### + +def optional_phone_sensed_bins_input(wildcards): + platform = infer_participant_platform("data/external/"+wildcards.pid) + + if platform == "android": + tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist + elif platform == "ios": + tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist + + return expand("data/raw/{{pid}}/{table}_with_datetime.csv", table = tables_platform) + +# Features.smk ######################################################################################################### + +def optional_ar_input(wildcards): + platform = infer_participant_platform("data/external/"+wildcards.pid) + + if platform == "android": + return ["data/raw/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_with_datetime_unified.csv", + "data/processed/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_deltas.csv"] + elif platform == "ios": + return ["data/raw/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_with_datetime_unified.csv", + "data/processed/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_deltas.csv"] + +def optional_conversation_input(wildcards): + platform = infer_participant_platform("data/external/"+wildcards.pid) + + if platform == "android": + return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "_with_datetime_unified.csv"] + elif platform == "ios": + return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["IOS"] + "_with_datetime_unified.csv"] + +def optional_location_barnett_input(wildcards): + if config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED": + return expand("data/raw/{{pid}}/{sensor}_resampled.csv", sensor=config["BARNETT_LOCATION"]["DB_TABLE"]) + else: + return expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BARNETT_LOCATION"]["DB_TABLE"]) + +def optional_location_doryab_input(wildcards): + if config["DORYAB_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED": + return expand("data/raw/{{pid}}/{sensor}_resampled.csv", sensor=config["DORYAB_LOCATION"]["DB_TABLE"]) + else: + return expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["DORYAB_LOCATION"]["DB_TABLE"]) + +def optional_steps_sleep_input(wildcards): + if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED": + return "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv" + else: + return [] + +def optional_wifi_input(wildcards): + if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) == 0: + return {"visible_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"])} + elif len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) == 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0: + return {"connected_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])} + elif len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0: + return {"visible_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]), "connected_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])} + else: + raise ValueError("If you are computing WIFI features you need to provide either VISIBLE_ACCESS_POINTS, CONNECTED_ACCESS_POINTS or both") + + +# Models.smk ########################################################################################################### + +def input_merge_features_of_single_participant(wildcards): + if wildcards.source == "phone_fitbit_features": + return expand("data/processed/{pid}/{features}_{day_segment}.csv", pid=wildcards.pid, features=config["PARAMS_FOR_ANALYSIS"]["PHONE_FEATURES"] + config["PARAMS_FOR_ANALYSIS"]["FITBIT_FEATURES"], day_segment=wildcards.day_segment) + else: + return expand("data/processed/{pid}/{features}_{day_segment}.csv", pid=wildcards.pid, features=config["PARAMS_FOR_ANALYSIS"][wildcards.source.upper()], day_segment=wildcards.day_segment) + +def optional_input_days_to_include(wildcards): + if config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["ENABLED"]: + # This input automatically trigers the rule days_to_analyse in mystudy.snakefile + return ["data/interim/{pid}/days_to_analyse" + \ + "_" + str(config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["DAYS_BEFORE_SURGERY"]) + \ + "_" + str(config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["DAYS_IN_HOSPITAL"]) + \ + "_" + str(config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["DAYS_AFTER_DISCHARGE"]) + ".csv"] + else: + return [] + +def optional_input_valid_sensed_days(wildcards): + if config["PARAMS_FOR_ANALYSIS"]["DROP_VALID_SENSED_DAYS"]["ENABLED"]: + # This input automatically trigers the rule phone_valid_sensed_days in preprocessing.snakefile + return ["data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv"] + else: + return [] + +# Reports.smk ########################################################################################################### + +def optional_heatmap_days_by_sensors_input(wildcards): + platform = infer_participant_platform("data/external/"+wildcards.pid) + + if platform == "android": + tables_platform = [table for table in config["HEATMAP_DAYS_BY_SENSORS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist + elif platform == "ios": + tables_platform = [table for table in config["HEATMAP_DAYS_BY_SENSORS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist + + return expand("data/raw/{{pid}}/{table}_with_datetime.csv", table = tables_platform) diff --git a/rules/features.snakefile b/rules/features.smk similarity index 71% rename from rules/features.snakefile rename to rules/features.smk index e4e6e78b..4a55059d 100644 --- a/rules/features.snakefile +++ b/rules/features.smk @@ -1,63 +1,3 @@ -def infer_participant_platform(participant_file): - with open(participant_file, encoding="ISO-8859-1") as external_file: - external_file_content = external_file.readlines() - platforms = external_file_content[1].strip().split(",") - if platforms[0] == "multiple" or (len(platforms) > 1 and "android" in platforms and "ios" in platforms): - platform = "android" - else: - platform = platforms[0] - return platform - -def optional_ar_input(wildcards): - platform = infer_participant_platform("data/external/"+wildcards.pid) - - if platform == "android": - return ["data/raw/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_with_datetime_unified.csv", - "data/processed/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_deltas.csv"] - elif platform == "ios": - return ["data/raw/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_with_datetime_unified.csv", - "data/processed/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_deltas.csv"] - else: - raise ValueError("Platform (line 2) in a participant file should be 'android', 'ios', or 'multiple'. You typed '" + platforms + "'") - -def optional_conversation_input(wildcards): - platform = infer_participant_platform("data/external/"+wildcards.pid) - - if platform == "android": - return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "_with_datetime_unified.csv"] - elif platform == "ios": - return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["IOS"] + "_with_datetime_unified.csv"] - else: - raise ValueError("Platform (line 2) in a participant file should be 'android' or 'ios', or 'multiple'. You typed '" + platforms + "'") - -def optional_location_input(wildcards): - if config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED": - return expand("data/raw/{{pid}}/{sensor}_resampled.csv", sensor=config["BARNETT_LOCATION"]["DB_TABLE"]) - else: - return expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BARNETT_LOCATION"]["DB_TABLE"]) - -def optional_location_doryab_input(wildcards): - if config["DORYAB_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED": - return expand("data/raw/{{pid}}/{sensor}_resampled.csv", sensor=config["DORYAB_LOCATION"]["DB_TABLE"]) - else: - return expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["DORYAB_LOCATION"]["DB_TABLE"]) - -def optional_steps_sleep_input(wildcards): - if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED": - return "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv" - else: - return [] - -def optional_wifi_input(wildcards): - if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) == 0: - return {"visible_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"])} - elif len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) == 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0: - return {"connected_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])} - elif len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0: - return {"visible_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]), "connected_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])} - else: - raise ValueError("If you are computing WIFI features you need to provide either VISIBLE_ACCESS_POINTS, CONNECTED_ACCESS_POINTS or both") - rule messages_features: input: expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"]) @@ -116,7 +56,7 @@ rule ios_activity_recognition_deltas: rule location_barnett_features: input: - locations = optional_location_input + locations = optional_location_barnett_input params: features = config["BARNETT_LOCATION"]["FEATURES"], locations_to_use = config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"], diff --git a/rules/models.snakefile b/rules/models.smk similarity index 88% rename from rules/models.snakefile rename to rules/models.smk index 7187e4ba..287fdf35 100644 --- a/rules/models.snakefile +++ b/rules/models.smk @@ -36,29 +36,6 @@ rule demographic_features: script: "../src/features/demographic_features.py" -def input_merge_features_of_single_participant(wildcards): - if wildcards.source == "phone_fitbit_features": - return expand("data/processed/{pid}/{features}_{day_segment}.csv", pid=wildcards.pid, features=config["PARAMS_FOR_ANALYSIS"]["PHONE_FEATURES"] + config["PARAMS_FOR_ANALYSIS"]["FITBIT_FEATURES"], day_segment=wildcards.day_segment) - else: - return expand("data/processed/{pid}/{features}_{day_segment}.csv", pid=wildcards.pid, features=config["PARAMS_FOR_ANALYSIS"][wildcards.source.upper()], day_segment=wildcards.day_segment) - -def optional_input_days_to_include(wildcards): - if config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["ENABLED"]: - # This input automatically trigers the rule days_to_analyse in mystudy.snakefile - return ["data/interim/{pid}/days_to_analyse" + \ - "_" + str(config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["DAYS_BEFORE_SURGERY"]) + \ - "_" + str(config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["DAYS_IN_HOSPITAL"]) + \ - "_" + str(config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["DAYS_AFTER_DISCHARGE"]) + ".csv"] - else: - return [] - -def optional_input_valid_sensed_days(wildcards): - if config["PARAMS_FOR_ANALYSIS"]["DROP_VALID_SENSED_DAYS"]["ENABLED"]: - # This input automatically trigers the rule phone_valid_sensed_days in preprocessing.snakefile - return ["data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv"] - else: - return [] - rule merge_features_for_individual_model: input: feature_files = input_merge_features_of_single_participant, diff --git a/rules/packrat.snakefile b/rules/packrat.snakefile deleted file mode 100644 index bd46cccc..00000000 --- a/rules/packrat.snakefile +++ /dev/null @@ -1,22 +0,0 @@ -# --- Packrat Rules --- # -## Taken from https://github.com/lachlandeer/snakemake-econ-r - -## packrat_install: installs packrat onto machine -rule packrat_install: - shell: - "R -e 'install.packages(\"packrat\", repos=\"http://cran.us.r-project.org\")'" - -## packrat_install: initialize a packrat environment for this project -rule packrat_init: - shell: - "R -e 'packrat::init()'" - -## packrat_snap : Look for new R packages in files & archives them -rule packrat_snap: - shell: - "R -e 'packrat::snapshot()'" - -## packrat_restore: Installs archived packages onto a new machine -rule packrat_restore: - shell: - "R -e 'packrat::restore()'" \ No newline at end of file diff --git a/rules/preprocessing.snakefile b/rules/preprocessing.smk similarity index 84% rename from rules/preprocessing.snakefile rename to rules/preprocessing.smk index cfa26138..fdcc4f3f 100644 --- a/rules/preprocessing.snakefile +++ b/rules/preprocessing.smk @@ -1,22 +1,3 @@ -def optional_phone_sensed_bins_input(wildcards): - with open("data/external/"+wildcards.pid, encoding="ISO-8859-1") as external_file: - external_file_content = external_file.readlines() - platforms = external_file_content[1].strip().split(",") - if platforms[0] == "multiple" or (len(platforms) > 1 and "android" in platforms and "ios" in platforms): - platform = "android" - else: - platform = platforms[0] - - if platform not in ["android", "ios"]: - raise ValueError("Platform (line 2) in a participant file should be 'android', 'ios', or 'multiple'. You typed '" + platforms + "'") - - if platform == "android": - tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist - elif platform == "ios": - tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist - - return expand("data/raw/{{pid}}/{table}_with_datetime.csv", table = tables_platform) - rule restore_sql_file: input: sql_file = "data/external/rapids_example.sql", diff --git a/rules/renv.snakefile b/rules/renv.smk similarity index 100% rename from rules/renv.snakefile rename to rules/renv.smk diff --git a/rules/reports.snakefile b/rules/reports.smk similarity index 63% rename from rules/reports.snakefile rename to rules/reports.smk index c746d770..13064a02 100644 --- a/rules/reports.snakefile +++ b/rules/reports.smk @@ -1,27 +1,3 @@ -def optional_heatmap_days_by_sensors_input(wildcards): - with open("data/external/"+wildcards.pid, encoding="ISO-8859-1") as external_file: - external_file_content = external_file.readlines() - platforms = external_file_content[1].strip().split(",") - if platforms[0] == "multiple" or (len(platforms) > 1 and "android" in platforms and "ios" in platforms): - platform = "android" - else: - platform = platforms[0] - - if platform not in ["android", "ios"]: - raise ValueError("Platform (line 2) in a participant file should be 'android', 'ios', or 'multiple'. You typed '" + platforms + "'") - - input_for_heatmap_days_by_sensors, tables = [], config["HEATMAP_DAYS_BY_SENSORS"]["DB_TABLES"] - - for sensor in ["ACTIVITY_RECOGNITION", "CONVERSATION"]: - table = config[sensor]["DB_TABLE"][platform.upper()] - if table in tables: - input_for_heatmap_days_by_sensors.append("data/raw/{pid}/" + table + "_with_datetime.csv") - tables = [x for x in tables if x not in config[sensor]["DB_TABLE"].values()] - for table in tables: - input_for_heatmap_days_by_sensors.append("data/raw/{pid}/" + table + "_with_datetime.csv") - - return input_for_heatmap_days_by_sensors - rule heatmap_features_correlations: input: features = expand("data/processed/{pid}/{sensor}_{day_segment}.csv", pid=config["PIDS"], sensor=config["HEATMAP_FEATURES_CORRELATIONS"]["PHONE_FEATURES"]+config["HEATMAP_FEATURES_CORRELATIONS"]["FITBIT_FEATURES"], day_segment=config["DAY_SEGMENTS"]), @@ -98,36 +74,3 @@ rule overall_compliance_heatmap: "reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html" script: "../src/visualization/overall_compliance_heatmap.py" - -# rule heatmap_rows: -# input: -# sensor = "data/raw/{pid}/{sensor}_with_datetime.csv", -# pid_file = "data/external/{pid}" -# params: -# table = "{sensor}", -# pid = "{pid}", -# bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"] -# output: -# "reports/figures/{pid}/{sensor}_heatmap_rows.html" -# script: -# "../src/visualization/heatmap_rows.py" - -# rule battery_consumption_rates_barchart: -# input: -# sensor = "data/processed/{pid}/battery_daily.csv", -# pid_file = "data/external/{pid}" -# params: -# pid = "{pid}" -# output: -# "reports/figures/{pid}/battery_consumption_rates_barchart.html" -# script: -# "../src/visualization/battery_consumption_rates_barchart.py" - -# rule compliance_report: -# input: -# sensor_heatmaps = expand("reports/figures/{{pid}}/{sensor}_heatmap_rows.html", sensor=PHONE_SENSORS), -# compliance_heatmap = rules.compliance_heatmap.output -# output: -# "reports/compliance/{pid}/compliance_report.html", -# script: -# "../src/visualization/compliance_report.Rmd" \ No newline at end of file diff --git a/tests/scripts/run_tests.sh b/tests/scripts/run_tests.sh index 6c1872a2..e96c2f8a 100755 --- a/tests/scripts/run_tests.sh +++ b/tests/scripts/run_tests.sh @@ -19,7 +19,7 @@ cp tests/data/external/* data/external # cp rules/preprocessing.snakefile bak echo Disabling downloading of dataset... -sed -e '46,58 s/^/#/' -e 's/rules.download_dataset.output/"data\/raw\/\{pid\}\/\{sensor\}_raw\.csv"/' rules/preprocessing.snakefile > tmp +sed -e '27,39 s/^/#/' -e 's/rules.download_dataset.output/"data\/raw\/\{pid\}\/\{sensor\}_raw\.csv"/' rules/preprocessing.snakefile > tmp cp tmp rules/preprocessing.snakefile echo Running RAPIDS Pipeline on testdata...