Add Fitbit steps feature extraction
parent
db2f3dc50f
commit
d82fa0d217
|
@ -44,6 +44,9 @@ rule all:
|
|||
expand("data/raw/{pid}/fitbit_{fitbit_sensor}_with_datetime.csv",
|
||||
pid=config["PIDS"],
|
||||
fitbit_sensor=config["FITBIT_SENSORS"]),
|
||||
expand("data/processed/{pid}/fitbit_step_{day_segment}.csv",
|
||||
pid = config["PIDS"],
|
||||
day_segment = config["STEP"]["DAY_SEGMENTS"]),
|
||||
# Reports
|
||||
expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||
expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"]),
|
||||
|
|
|
@ -91,3 +91,11 @@ LIGHT:
|
|||
ACCELEROMETER:
|
||||
DAY_SEGMENTS: *day_segments
|
||||
METRICS: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"]
|
||||
|
||||
STEP:
|
||||
DAY_SEGMENTS: *day_segments
|
||||
METRICS:
|
||||
ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"]
|
||||
SEDENTARY_BOUT: ["countsedentarybout", "maxdurationsedentarybout", "mindurationsedentarybout", "avgdurationsedentarybout", "stddurationsedentarybout"]
|
||||
ACTIVE_BOUT: ["countactivebout", "maxdurationactivebout", "mindurationactivebout", "avgdurationactivebout", "stddurationactivebout"]
|
||||
THRESHOLD_ACTIVE_BOUT: 10 # steps
|
|
@ -130,3 +130,17 @@ rule accelerometer_metrics:
|
|||
"data/processed/{pid}/accelerometer_{day_segment}.csv"
|
||||
script:
|
||||
"../src/features/accelerometer_metrics.py"
|
||||
|
||||
rule fitbit_step_metrics:
|
||||
input:
|
||||
steps_data = "data/raw/{pid}/fitbit_steps_with_datetime.csv",
|
||||
params:
|
||||
day_segment = "{day_segment}",
|
||||
metrics_all_steps = config["STEP"]["METRICS"]["ALL_STEPS"],
|
||||
metrics_sedentary_bout = config["STEP"]["METRICS"]["SEDENTARY_BOUT"],
|
||||
metrics_active_bout = config["STEP"]["METRICS"]["ACTIVE_BOUT"],
|
||||
threshold_active_bout = config["STEP"]["THRESHOLD_ACTIVE_BOUT"]
|
||||
output:
|
||||
"data/processed/{pid}/fitbit_step_{day_segment}.csv"
|
||||
script:
|
||||
"../src/features/fitbit_step_metrics.py"
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import datetime as dt
|
||||
from features_utils import splitOvernightEpisodes, splitMultiSegmentEpisodes
|
||||
|
||||
day_segment = snakemake.params["day_segment"]
|
||||
all_steps = snakemake.params["metrics_all_steps"]
|
||||
sedentary_bout = snakemake.params["metrics_sedentary_bout"]
|
||||
active_bout = snakemake.params["metrics_active_bout"]
|
||||
threshold_active_bout = snakemake.params['threshold_active_bout']
|
||||
|
||||
#Read csv into a pandas dataframe
|
||||
data = pd.read_csv(snakemake.input['steps_data'],parse_dates=['local_date_time'])
|
||||
columns = list("step_" + str(day_segment) + "_" + column for column in (all_steps + sedentary_bout + active_bout))
|
||||
|
||||
if (day_segment != 'daily'):
|
||||
data = data.loc[data['local_day_segment'] == str(day_segment)]
|
||||
|
||||
if data.empty:
|
||||
finalDataset = pd.DataFrame(columns = columns)
|
||||
else:
|
||||
finalDataset = pd.DataFrame()
|
||||
|
||||
#Preprocessing:
|
||||
data.local_date_time = pd.to_datetime(data.local_date_time)
|
||||
resampledData = data.set_index(data.local_date_time)
|
||||
resampledData.index.names = ['datetime']
|
||||
|
||||
resampledData['time_diff_minutes'] = resampledData['local_date_time'].diff().fillna(pd.Timedelta(seconds=0)).dt.total_seconds().div(60).astype(int)
|
||||
|
||||
#Sedentary Bout when you have less than 10 steps in a minute
|
||||
#Active Bout when you have greater or equal to 10 steps in a minute
|
||||
resampledData['active_sedentary'] = np.where(resampledData['steps']<int(threshold_active_bout),'sedentary','active')
|
||||
|
||||
activeData = resampledData[resampledData['active_sedentary']=="active"]
|
||||
sedentaryData = resampledData[resampledData['active_sedentary']=="sedentary"]
|
||||
|
||||
#Time Calculations of sedentary/active bouts:
|
||||
resampledData['active_sedentary_groups'] = (resampledData.active_sedentary != resampledData.active_sedentary.shift()).cumsum().values
|
||||
|
||||
#Get the total minutes for each episode
|
||||
minutesGroupedBy = resampledData.groupby(['local_date','active_sedentary','active_sedentary_groups'])['time_diff_minutes'].sum()
|
||||
|
||||
#Get Stats for all episodes in terms of minutes
|
||||
statsMinutes = minutesGroupedBy.groupby(['local_date','active_sedentary']).agg([max,min,np.mean,np.std])
|
||||
mux = pd.MultiIndex.from_product([statsMinutes.index.levels[0], statsMinutes.index.levels[1]],names=['local_date','active_sedentary'])
|
||||
statsMinutes = statsMinutes.reindex(mux, fill_value=None).reset_index()
|
||||
statsMinutes.set_index('local_date',inplace = True)
|
||||
|
||||
#Descriptive Statistics Features:
|
||||
if("sumallsteps" in all_steps):
|
||||
finalDataset["step_" + str(day_segment) + "_sumallsteps"] = resampledData['steps'].resample('D').sum()
|
||||
|
||||
if("maxallsteps" in all_steps):
|
||||
finalDataset["step_" + str(day_segment) + "_maxallsteps"] = resampledData['steps'].resample('D').max()
|
||||
|
||||
if("minallsteps" in all_steps):
|
||||
finalDataset["step_" + str(day_segment) + "_minallsteps"] = resampledData['steps'].resample('D').min()
|
||||
|
||||
if("avgallsteps" in all_steps):
|
||||
finalDataset["step_" + str(day_segment) + "_avgallsteps"] = resampledData['steps'].resample('D').mean()
|
||||
|
||||
if("stdallsteps" in all_steps):
|
||||
finalDataset["step_" + str(day_segment) + "_stdallsteps"] = resampledData['steps'].resample('D').std()
|
||||
|
||||
if("countsedentarybout" in sedentary_bout):
|
||||
finalDataset["step_" + str(day_segment) + "_countsedentarybout"] = sedentaryData['active_sedentary'].resample('D').count()
|
||||
|
||||
if("countactivebout" in active_bout):
|
||||
finalDataset["step_" + str(day_segment) + "_countactivebout"] = activeData['active_sedentary'].resample('D').count()
|
||||
|
||||
if("maxdurationsedentarybout" in sedentary_bout):
|
||||
finalDataset["step_" + str(day_segment) + "_maxdurationsedentarybout"] = statsMinutes[statsMinutes['active_sedentary']=='sedentary']['max']
|
||||
|
||||
if("mindurationsedentarybout" in sedentary_bout):
|
||||
finalDataset["step_" + str(day_segment) + "_mindurationsedentarybout"] = statsMinutes[statsMinutes['active_sedentary']=='sedentary']['min']
|
||||
|
||||
if("avgdurationsedentarybout" in sedentary_bout):
|
||||
finalDataset["step_" + str(day_segment) + "_avgdurationsedentarybout"] = statsMinutes[statsMinutes['active_sedentary']=='sedentary']['mean']
|
||||
|
||||
if("stddurationsedentarybout" in sedentary_bout):
|
||||
finalDataset["step_" + str(day_segment) + "_stddurationsedentarybout"] = statsMinutes[statsMinutes['active_sedentary']=='sedentary']['std']
|
||||
|
||||
if("maxdurationactivebout" in active_bout):
|
||||
finalDataset["step_" + str(day_segment) + "_maxdurationactivebout"] = statsMinutes[statsMinutes['active_sedentary']== 'active']['max']
|
||||
|
||||
if("mindurationactivebout" in active_bout):
|
||||
finalDataset["step_" + str(day_segment) + "_mindurationactivebout"] = statsMinutes[statsMinutes['active_sedentary']== 'active']['min']
|
||||
|
||||
if("avgdurationactivebout" in active_bout):
|
||||
finalDataset["step_" + str(day_segment) + "_avgdurationactivebout"] = statsMinutes[statsMinutes['active_sedentary']== 'active']['mean']
|
||||
|
||||
if("stddurationactivebout" in active_bout):
|
||||
finalDataset["step_" + str(day_segment) + "_stddurationactivebout"] = statsMinutes[statsMinutes['active_sedentary']== 'active']['std']
|
||||
|
||||
finalDataset.index.names = ['local_date']
|
||||
finalDataset.to_csv(snakemake.output[0])
|
Loading…
Reference in New Issue