import pandas as pd import numpy as np import datetime as dt from features_utils import splitOvernightEpisodes, splitMultiSegmentEpisodes day_segment = snakemake.params["day_segment"] all_steps = snakemake.params["metrics_all_steps"] sedentary_bout = snakemake.params["metrics_sedentary_bout"] active_bout = snakemake.params["metrics_active_bout"] threshold_active_bout = snakemake.params['threshold_active_bout'] #Read csv into a pandas dataframe data = pd.read_csv(snakemake.input['steps_data'],parse_dates=['local_date_time']) columns = list("step_" + str(day_segment) + "_" + column for column in (all_steps + sedentary_bout + active_bout)) if (day_segment != 'daily'): data = data.loc[data['local_day_segment'] == str(day_segment)] if data.empty: finalDataset = pd.DataFrame(columns = columns) else: finalDataset = pd.DataFrame() #Preprocessing: data.local_date_time = pd.to_datetime(data.local_date_time) resampledData = data.set_index(data.local_date_time) resampledData.index.names = ['datetime'] resampledData['time_diff_minutes'] = resampledData['local_date_time'].diff().fillna(pd.Timedelta(seconds=0)).dt.total_seconds().div(60).astype(int) #Sedentary Bout when you have less than 10 steps in a minute #Active Bout when you have greater or equal to 10 steps in a minute resampledData['active_sedentary'] = np.where(resampledData['steps']