2020-01-29 22:22:53 +01:00
import pandas as pd
import numpy as np
2020-06-06 03:29:39 +02:00
from fitbit_step . fitbit_step_base import base_fitbit_step_features
2020-01-29 22:22:53 +01:00
2020-06-24 19:02:12 +02:00
step_data = pd . read_csv ( snakemake . input [ " step_data " ] , parse_dates = [ " local_date_time " , " local_date " ] )
2020-01-29 22:22:53 +01:00
day_segment = snakemake . params [ " day_segment " ]
2020-06-06 03:29:39 +02:00
threshold_active_bout = snakemake . params [ " threshold_active_bout " ]
2020-03-09 20:59:51 +01:00
include_zero_step_rows = snakemake . params [ " include_zero_step_rows " ]
2020-06-06 03:29:39 +02:00
step_features = pd . DataFrame ( columns = [ " local_date " ] )
2020-01-29 22:22:53 +01:00
2020-06-06 03:29:39 +02:00
requested_features = { }
requested_features [ " features_all_steps " ] = snakemake . params [ " features_all_steps " ]
requested_features [ " features_sedentary_bout " ] = snakemake . params [ " features_sedentary_bout " ]
requested_features [ " features_active_bout " ] = snakemake . params [ " features_active_bout " ]
2020-01-29 22:22:53 +01:00
2020-06-06 03:29:39 +02:00
step_features = step_features . merge ( base_fitbit_step_features ( step_data , day_segment , requested_features , threshold_active_bout , include_zero_step_rows ) , on = " local_date " , how = " outer " )
2020-01-29 22:22:53 +01:00
2020-06-06 03:29:39 +02:00
assert np . sum ( [ len ( x ) for x in requested_features . values ( ) ] ) + 1 == step_features . shape [ 1 ] , " The number of features in the output dataframe (= " + str ( step_features . shape [ 1 ] ) + " ) does not match the expected value (= " + str ( np . sum ( [ len ( x ) for x in requested_features . values ( ) ] ) ) + " + 1). Verify your fitbit step feature extraction functions "
2020-01-29 22:22:53 +01:00
2020-06-06 03:29:39 +02:00
step_features . to_csv ( snakemake . output [ 0 ] , index = False )