diff --git a/Snakefile b/Snakefile index 1b60f3d9..48e9b30e 100644 --- a/Snakefile +++ b/Snakefile @@ -34,7 +34,7 @@ rule all: day_segment = config["SCREEN"]["DAY_SEGMENTS"]), # Reports expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), - expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"], sensor=config["SENSORS"]), + expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"]), expand("reports/figures/{pid}/battery_consumption_rates_barchart.html", pid=config["PIDS"]), # --- Packrat Rules --- # diff --git a/rules/reports.snakefile b/rules/reports.snakefile index ad045b4b..d5c8802c 100644 --- a/rules/reports.snakefile +++ b/rules/reports.snakefile @@ -11,7 +11,7 @@ rule heatmap_rows: rule compliance_heatmap: input: - expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["SENSORS"]) + "data/interim/{pid}/phone_sensed_bins.csv" params: pid = "{pid}" output: diff --git a/src/visualization/compliance_heatmap.py b/src/visualization/compliance_heatmap.py index b693667b..413947bb 100644 --- a/src/visualization/compliance_heatmap.py +++ b/src/visualization/compliance_heatmap.py @@ -4,12 +4,12 @@ import plotly.io as pio import plotly.graph_objects as go import datetime -def getComplianceMatrix(dates, compliance_bins): +def getDatesComplianceMatrix(phone_sensed_bins): + dates = phone_sensed_bins.index compliance_matrix = [] for date in dates: - date_bins = compliance_bins[compliance_bins["local_date"] == date] - compliance_matrix.append(date_bins["has_row"].tolist()) - return compliance_matrix + compliance_matrix.append(phone_sensed_bins.loc[date, :].tolist()) + return dates, compliance_matrix def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size): bins_per_hour = int(60 / bin_size) @@ -25,51 +25,16 @@ def getComplianceHeatmap(dates, compliance_matrix, pid, output_path, bin_size): # get current patient id pid = snakemake.params["pid"] -sensors_dates = [] -sensors_five_minutes_row_is = pd.DataFrame() -for sensor_path in snakemake.input: - sensor_data = pd.read_csv(sensor_path) - - # check if the sensor is off - if sensor_data.empty: - continue +phone_sensed_bins = pd.read_csv(snakemake.input[0], parse_dates=["local_date"], index_col="local_date") - # create a dataframe contains 2 columns: local_date_time, has_row - sensor_data["has_row"] = [1]*sensor_data.shape[0] - sensor_data["local_date_time"] = pd.to_datetime(sensor_data["local_date_time"]) - sensed_bins = sensor_data[["local_date_time", "has_row"]] - - # get the first date and the last date of current sensor - start_date = datetime.datetime.combine(sensed_bins["local_date_time"][0].date(), datetime.time(0,0,0)) - end_date = datetime.datetime.combine(sensed_bins["local_date_time"][sensed_bins.shape[0]-1].date(), datetime.time(23,59,59)) - - # add the above datetime with has_row=0 to our dataframe - sensed_bins.loc[sensed_bins.shape[0], :] = [start_date, 0] - sensed_bins.loc[sensed_bins.shape[0], :] = [end_date, 0] - # get bins with 5 min - sensor_five_minutes_row_is = pd.DataFrame(sensed_bins.resample("5T", on="local_date_time")["has_row"].sum()) - sensor_five_minutes_row_is["has_row"] = (sensor_five_minutes_row_is["has_row"]>0).astype(int) - # merge current sensor with previous sensors - if sensors_five_minutes_row_is.empty: - sensors_five_minutes_row_is = sensor_five_minutes_row_is - else: - sensors_five_minutes_row_is = pd.concat([sensors_five_minutes_row_is, sensor_five_minutes_row_is]).groupby("local_date_time").sum() - -if sensors_five_minutes_row_is.empty: +if phone_sensed_bins.empty: empty_html = open(snakemake.output[0], "w") empty_html.write("There is no sensor data for " + pid) empty_html.close() else: - sensors_five_minutes_row_is.reset_index(inplace=True) - # resample again to impute missing dates - sensors_five_minutes_row_is_successive = pd.DataFrame(sensors_five_minutes_row_is.resample("5T", on="local_date_time")["has_row"].sum()) - - # get sorted date list - sensors_five_minutes_row_is_successive.reset_index(inplace=True) - sensors_five_minutes_row_is_successive["local_date"] = sensors_five_minutes_row_is_successive["local_date_time"].apply(lambda x: x.date()) - dates = list(set(sensors_five_minutes_row_is_successive["local_date"])) - dates.sort() - compliance_matrix = getComplianceMatrix(dates, sensors_five_minutes_row_is_successive) - + # resample to impute missing dates + phone_sensed_bins = phone_sensed_bins.resample("1D").asfreq().fillna(0) + # get dates and compliance_matrix + dates, compliance_matrix = getDatesComplianceMatrix(phone_sensed_bins) # get heatmap getComplianceHeatmap(dates, compliance_matrix, pid, snakemake.output[0], 5) \ No newline at end of file