From 7c240a9613e6ffd1f825722aad455c8b9e2058fe Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Tue, 17 Mar 2020 17:26:30 -0400 Subject: [PATCH] Select days to analyse Co-authored-by: JulioV --- Snakefile | 11 ++++++++- config.yaml | 5 +++- rules/mystudy.snakefile | 12 ++++++++++ src/models/select_days_to_analyse.py | 34 ++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 rules/mystudy.snakefile create mode 100644 src/models/select_days_to_analyse.py diff --git a/Snakefile b/Snakefile index 9797b5ea..0037cdfc 100644 --- a/Snakefile +++ b/Snakefile @@ -4,9 +4,18 @@ include: "rules/preprocessing.snakefile" include: "rules/features.snakefile" include: "rules/models.snakefile" include: "rules/reports.snakefile" +include: "rules/mystudy.snakefile" # You can add snakfiles with rules tailored to your project rule all: input: + # My study (this is an example of a rule created specifically for a study) + expand("data/interim/{pid}/days_to_analyse_{days_before_surgery}_{days_in_hospital}_{days_after_discharge}.csv", + pid=config["PIDS"], + days_before_surgery = config["METRICS_FOR_ANALYSIS"]["DAYS_BEFORE_SURGERY"], + days_after_discharge= config["METRICS_FOR_ANALYSIS"]["DAYS_AFTER_DISCHARGE"], + days_in_hospital= config["METRICS_FOR_ANALYSIS"]["DAYS_IN_HOSPITAL"]), + + # Feature extraction expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]), expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["FITBIT_TABLE"]), expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]), @@ -64,7 +73,7 @@ rule all: expand("data/processed/metrics_for_population_model/{source}_{day_segment}.csv", source = config["METRICS_FOR_ANALYSIS"]["SOURCES"], day_segment = config["METRICS_FOR_ANALYSIS"]["DAY_SEGMENTS"]), - # Reports + # Vizualisations expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"]), expand("reports/figures/{pid}/battery_consumption_rates_barchart.html", pid=config["PIDS"]), diff --git a/config.yaml b/config.yaml index 98d97d40..5a1b5227 100644 --- a/config.yaml +++ b/config.yaml @@ -130,4 +130,7 @@ METRICS_FOR_ANALYSIS: PHONE_METRICS: [accelerometer, applications_foreground, battery, call_incoming, call_missed, call_outgoing, google_activity_recognition, light, location_barnett, screen, sms_received, sms_sent] FITBIT_METRICS: [fitbit_heartrate, fitbit_step] PHONE_FITBIT_METRICS: "" # This array is merged in the input_merge_features_of_single_participant function in models.snakefile - DROP_VALID_SENSED_DAYS: True \ No newline at end of file + DROP_VALID_SENSED_DAYS: True + DAYS_BEFORE_SURGERY: 15 + DAYS_AFTER_DISCHARGE: 7 + DAYS_IN_HOSPITAL: F diff --git a/rules/mystudy.snakefile b/rules/mystudy.snakefile new file mode 100644 index 00000000..5e9aeec9 --- /dev/null +++ b/rules/mystudy.snakefile @@ -0,0 +1,12 @@ +rule days_to_analyse: + input: + participant_info = "data/external/participant_info.csv", + pid_file = "data/external/{pid}" + params: + days_before_surgery = config["METRICS_FOR_ANALYSIS"]["DAYS_BEFORE_SURGERY"], + days_after_discharge = config["METRICS_FOR_ANALYSIS"]["DAYS_AFTER_DISCHARGE"], + days_in_hospital= config["METRICS_FOR_ANALYSIS"]["DAYS_IN_HOSPITAL"] + output: + "data/interim/{pid}/days_to_analyse_{days_before_surgery}_{days_in_hospital}_{days_after_discharge}.csv" + script: + "../src/models/select_days_to_analyse.py" diff --git a/src/models/select_days_to_analyse.py b/src/models/select_days_to_analyse.py new file mode 100644 index 00000000..bcb543e4 --- /dev/null +++ b/src/models/select_days_to_analyse.py @@ -0,0 +1,34 @@ +import pandas as pd +from datetime import timedelta + +def appendDaysInRange(days_to_analyse, start_date, end_date): + num_of_days = (end_date - start_date).days + for day in range(num_of_days + 1): + days_to_analyse = days_to_analyse.append({"days_to_analyse": start_date + timedelta(days = day)}, ignore_index=True) + return days_to_analyse + +days_before_surgery = snakemake.params["days_before_surgery"] +days_in_hospital = snakemake.params["days_in_hospital"] +days_after_discharge = snakemake.params["days_after_discharge"] + +participant_info = pd.read_csv(snakemake.input["participant_info"], parse_dates=["surgery_date", "discharge_date"]) +with open(snakemake.input["pid_file"], encoding="ISO-8859-1") as external_file: + pid_file_content = external_file.readlines() +device_ids = pid_file_content[0].strip().split(",") + +days_to_analyse = pd.DataFrame(columns = ["days_to_analyse"]) +participant_info = participant_info[participant_info["device_id"].isin(device_ids)] +try: + surgery_date, discharge_date = participant_info["surgery_date"].iloc[0].date(), participant_info["discharge_date"].iloc[0].date() +except: + pass +else: + start_date = surgery_date - timedelta(days = days_before_surgery) + end_date = discharge_date + timedelta(days = days_after_discharge) + + days_to_analyse = appendDaysInRange(days_to_analyse, start_date, surgery_date - timedelta(days = 1)) + if days_in_hospital == "T": + days_to_analyse = appendDaysInRange(days_to_analyse, surgery_date, discharge_date) + days_to_analyse = appendDaysInRange(days_to_analyse, discharge_date + timedelta(days = 1), end_date) + +days_to_analyse.to_csv(snakemake.output[0], index=False)