diff --git a/config.yaml b/config.yaml index 11465b42..60bdae5f 100644 --- a/config.yaml +++ b/config.yaml @@ -122,6 +122,7 @@ STEP: SEDENTARY_BOUT: ["countsedentarybout", "maxdurationsedentarybout", "mindurationsedentarybout", "avgdurationsedentarybout", "stddurationsedentarybout"] ACTIVE_BOUT: ["countactivebout", "maxdurationactivebout", "mindurationactivebout", "avgdurationactivebout", "stddurationactivebout"] THRESHOLD_ACTIVE_BOUT: 10 # steps + INCLUDE_ZERO_STEP_ROWS: True METRICS_FOR_ANALYSIS: SOURCES: &sources ["phone_metrics", "fitbit_metrics", "phone_fitbit_metrics"] diff --git a/rules/features.snakefile b/rules/features.snakefile index f295a686..ced22596 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -168,7 +168,8 @@ rule fitbit_step_metrics: metrics_all_steps = config["STEP"]["METRICS"]["ALL_STEPS"], metrics_sedentary_bout = config["STEP"]["METRICS"]["SEDENTARY_BOUT"], metrics_active_bout = config["STEP"]["METRICS"]["ACTIVE_BOUT"], - threshold_active_bout = config["STEP"]["THRESHOLD_ACTIVE_BOUT"] + threshold_active_bout = config["STEP"]["THRESHOLD_ACTIVE_BOUT"], + include_zero_step_rows = config["STEP"]["INCLUDE_ZERO_STEP_ROWS"] output: "data/processed/{pid}/fitbit_step_{day_segment}.csv" script: diff --git a/src/features/fitbit_step_metrics.py b/src/features/fitbit_step_metrics.py index f436b53d..17a356e3 100644 --- a/src/features/fitbit_step_metrics.py +++ b/src/features/fitbit_step_metrics.py @@ -8,6 +8,7 @@ all_steps = snakemake.params["metrics_all_steps"] sedentary_bout = snakemake.params["metrics_sedentary_bout"] active_bout = snakemake.params["metrics_active_bout"] threshold_active_bout = snakemake.params['threshold_active_bout'] +include_zero_step_rows = snakemake.params["include_zero_step_rows"] #Read csv into a pandas dataframe data = pd.read_csv(snakemake.input['steps_data'],parse_dates=['local_date_time']) @@ -92,6 +93,12 @@ else: if("stddurationactivebout" in active_bout): finalDataset["step_" + str(day_segment) + "_stddurationactivebout"] = statsMinutes[statsMinutes['active_sedentary']== 'active']['std'] + + #Exclude data when the total step count is ZERO during the whole epoch + if not include_zero_step_rows: + finalDataset["sumallsteps_aux"] = resampledData["steps"].resample("D").sum() + finalDataset = finalDataset.query("sumallsteps_aux != 0") + del finalDataset["sumallsteps_aux"] finalDataset.index.names = ['local_date'] finalDataset.to_csv(snakemake.output[0])