diff --git a/Snakefile b/Snakefile index d849a3f4..e98061fe 100644 --- a/Snakefile +++ b/Snakefile @@ -44,6 +44,9 @@ rule all: expand("data/raw/{pid}/fitbit_{fitbit_sensor}_with_datetime.csv", pid=config["PIDS"], fitbit_sensor=config["FITBIT_SENSORS"]), + expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", + pid = config["PIDS"], + day_segment = config["STEP"]["DAY_SEGMENTS"]), # Reports expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"]), diff --git a/config.yaml b/config.yaml index 013af9fc..377afd85 100644 --- a/config.yaml +++ b/config.yaml @@ -91,3 +91,11 @@ LIGHT: ACCELEROMETER: DAY_SEGMENTS: *day_segments METRICS: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"] + +STEP: + DAY_SEGMENTS: *day_segments + METRICS: + ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"] + SEDENTARY_BOUT: ["countsedentarybout", "maxdurationsedentarybout", "mindurationsedentarybout", "avgdurationsedentarybout", "stddurationsedentarybout"] + ACTIVE_BOUT: ["countactivebout", "maxdurationactivebout", "mindurationactivebout", "avgdurationactivebout", "stddurationactivebout"] + THRESHOLD_ACTIVE_BOUT: 10 # steps \ No newline at end of file diff --git a/rules/features.snakefile b/rules/features.snakefile index c40e3cbc..b10d8ace 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -130,3 +130,17 @@ rule accelerometer_metrics: "data/processed/{pid}/accelerometer_{day_segment}.csv" script: "../src/features/accelerometer_metrics.py" + +rule fitbit_step_metrics: + input: + steps_data = "data/raw/{pid}/fitbit_steps_with_datetime.csv", + params: + day_segment = "{day_segment}", + metrics_all_steps = config["STEP"]["METRICS"]["ALL_STEPS"], + metrics_sedentary_bout = config["STEP"]["METRICS"]["SEDENTARY_BOUT"], + metrics_active_bout = config["STEP"]["METRICS"]["ACTIVE_BOUT"], + threshold_active_bout = config["STEP"]["THRESHOLD_ACTIVE_BOUT"] + output: + "data/processed/{pid}/fitbit_step_{day_segment}.csv" + script: + "../src/features/fitbit_step_metrics.py" diff --git a/src/features/fitbit_step_metrics.py b/src/features/fitbit_step_metrics.py new file mode 100644 index 00000000..f436b53d --- /dev/null +++ b/src/features/fitbit_step_metrics.py @@ -0,0 +1,97 @@ +import pandas as pd +import numpy as np +import datetime as dt +from features_utils import splitOvernightEpisodes, splitMultiSegmentEpisodes + +day_segment = snakemake.params["day_segment"] +all_steps = snakemake.params["metrics_all_steps"] +sedentary_bout = snakemake.params["metrics_sedentary_bout"] +active_bout = snakemake.params["metrics_active_bout"] +threshold_active_bout = snakemake.params['threshold_active_bout'] + +#Read csv into a pandas dataframe +data = pd.read_csv(snakemake.input['steps_data'],parse_dates=['local_date_time']) +columns = list("step_" + str(day_segment) + "_" + column for column in (all_steps + sedentary_bout + active_bout)) + +if (day_segment != 'daily'): + data = data.loc[data['local_day_segment'] == str(day_segment)] + +if data.empty: + finalDataset = pd.DataFrame(columns = columns) +else: + finalDataset = pd.DataFrame() + + #Preprocessing: + data.local_date_time = pd.to_datetime(data.local_date_time) + resampledData = data.set_index(data.local_date_time) + resampledData.index.names = ['datetime'] + + resampledData['time_diff_minutes'] = resampledData['local_date_time'].diff().fillna(pd.Timedelta(seconds=0)).dt.total_seconds().div(60).astype(int) + + #Sedentary Bout when you have less than 10 steps in a minute + #Active Bout when you have greater or equal to 10 steps in a minute + resampledData['active_sedentary'] = np.where(resampledData['steps']