diff --git a/.gitignore b/.gitignore index dce3492d..688aea95 100644 --- a/.gitignore +++ b/.gitignore @@ -101,4 +101,6 @@ data/raw/* data/interim/* !/data/interim/.gitkeep data/processed/* -!/data/processed/.gitkeep \ No newline at end of file +!/data/processed/.gitkeep + +reports/ \ No newline at end of file diff --git a/Snakefile b/Snakefile index bd50a655..67d57ce7 100644 --- a/Snakefile +++ b/Snakefile @@ -1,6 +1,7 @@ configfile: "config.yaml" include: "rules/preprocessing.snakefile" include: "rules/features.snakefile" +include: "rules/reports.snakefile" rule all: input: @@ -11,6 +12,7 @@ rule all: sms_type = config["COM_SMS"]["SMS_TYPES"], day_segment = config["COM_SMS"]["DAY_SEGMENTS"], metric = config["COM_SMS"]["METRICS"]), + expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), # --- Packrat Rules --- # ## Taken from https://github.com/lachlandeer/snakemake-econ-r diff --git a/config.yaml b/config.yaml index a2cc57c9..9f50cbb0 100644 --- a/config.yaml +++ b/config.yaml @@ -1,5 +1,5 @@ # Valid database table names -SENSORS: [messages] +SENSORS: [messages, calls] # Participants to include in the analysis # You must create a file for each participant diff --git a/requirements.txt b/requirements.txt index 8db59e27..50494a1f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,15 +18,23 @@ Jinja2==2.10.3 jinja2-time==0.2.0 jsonschema==3.1.1 MarkupSafe==1.1.1 +mkl-fft==1.0.14 +mkl-random==1.1.0 +mkl-service==2.3.0 more-itertools==7.2.0 +numpy==1.17.2 +pandas==0.25.2 pbr==5.4.3 +plotly==4.2.1 poyo==0.5.0 psutil==5.6.3 pyrsistent==0.15.4 python-dateutil==2.8.0 +pytz==2019.3 PyYAML==5.1.2 ratelimiter==1.2.0.post0 requests==2.22.0 +retrying==1.3.3 six==1.12.0 smmap2==2.0.5 snakemake==5.7.1 diff --git a/rules/reports.snakefile b/rules/reports.snakefile new file mode 100644 index 00000000..112d5302 --- /dev/null +++ b/rules/reports.snakefile @@ -0,0 +1,10 @@ +rule heatmap_rows: + input: + "data/raw/{pid}/{sensor}_with_datetime.csv" + params: + table = "{sensor}", + pid = "{pid}" + output: + "reports/figures/{pid}/{sensor}_heatmap_rows.html" + script: + "../src/visualization/heatmap_rows.py" \ No newline at end of file diff --git a/src/visualization/heatmap_rows.py b/src/visualization/heatmap_rows.py new file mode 100644 index 00000000..56084f78 --- /dev/null +++ b/src/visualization/heatmap_rows.py @@ -0,0 +1,35 @@ +import pandas as pd +import numpy as np +import plotly.io as pio +import plotly.graph_objects as go + +def getHourlyRowCount(dates, sensor_data): + hourly_row_count = [] + for date in dates: + num_rows = [] + daily_rows = sensor_data[sensor_data["local_date"] == date] + for hour in range(24): + hourly_rows = daily_rows[daily_rows["local_hour"] == hour] + num_rows.append(hourly_rows.shape[0]) + hourly_row_count.append(num_rows) + return hourly_row_count + +def getHourlyRowCountHeatmap(dates, hourly_row_count, sensor_name, pid, output_path): + plot = go.Figure(data=go.Heatmap(z=hourly_row_count,x=[x for x in range(24)],y=dates,colorscale='Viridis')) + plot.update_layout(title="Hourly row count heatmap for " + pid + " for sensor " + sensor_name) + pio.write_html(plot, file=output_path, auto_open=False) + + + +sensor_data = pd.read_csv(snakemake.input[0]) +# get current sensor name +sensor_name = snakemake.params["table"] +# get current patient id +pid = snakemake.params["pid"] +# get sorted date list +dates = list(set(sensor_data["local_date"])) +dates.sort() +# get num of rows per hour per day +hourly_row_count = getHourlyRowCount(dates, sensor_data) +# get heatmap +getHourlyRowCountHeatmap(dates, hourly_row_count, sensor_name, pid, snakemake.output[0])