rapids/src/data/fitbit_steps_intraday_exclu...

48 lines
2.7 KiB
Python

import pandas as pd
from datetime import timedelta
exclude_sleep = snakemake.params["exclude_sleep"]
exclude_time_based = exclude_sleep["TIME_BASED"]["EXCLUDE"]
exclude_fitbit_based = exclude_sleep["FITBIT_BASED"]["EXCLUDE"]
exclude_sleep_fixed_start = exclude_sleep["TIME_BASED"]["START_TIME"] + ":00"
exclude_sleep_fixed_end = exclude_sleep["TIME_BASED"]["END_TIME"] + ":00"
steps_intraday_data = pd.read_csv(snakemake.input["sensor_data"], parse_dates=["local_date"])
sleep_data = pd.read_csv(snakemake.input["sleep_data"]).dropna(subset=["local_start_date_time", "local_end_date_time"], how="any") if snakemake.input["sleep_data"] else pd.DataFrame()
if not steps_intraday_data.empty:
if exclude_time_based and (not exclude_fitbit_based or (exclude_fitbit_based and sleep_data.empty)):
query_string = "local_time < @exclude_sleep_fixed_start " + ("&" if exclude_sleep_fixed_start >= exclude_sleep_fixed_end else "|") + " local_time > @exclude_sleep_fixed_end"
steps_intraday_data.query(query_string, inplace=True)
elif exclude_fitbit_based and (not sleep_data.empty):
queries = []
if exclude_time_based:
# Get fixed intervals
fixed_start_dates = pd.date_range(steps_intraday_data["local_date"].min() - timedelta(days=1), steps_intraday_data["local_date"].max())
fixed_end_dates = fixed_start_dates + timedelta(days=1) if exclude_sleep_fixed_start >= exclude_sleep_fixed_end else fixed_start_dates
fixed_time = pd.DataFrame({"fixed_start_date_time": fixed_start_dates.strftime("%Y-%m-%d") + " " + exclude_sleep_fixed_start,
"fixed_end_date_time": fixed_end_dates.strftime("%Y-%m-%d") + " " + exclude_sleep_fixed_end})
# Remove fixed intervals that intersect with sleep intervals from the fixed sleep periods
sleep_data["query_intersect"] = "(fixed_start_date_time < '" + sleep_data["local_end_date_time"] + "' & fixed_end_date_time > '" + sleep_data["local_start_date_time"] + "')"
query_string_subtract = "~(" + " | ".join(sleep_data["query_intersect"].tolist()) + ")"
fixed_time.query(query_string_subtract, inplace=True)
# Add TIME_BASED query to queries
queries = ("(local_date_time < '" + fixed_time["fixed_start_date_time"] + "' | local_date_time > '" + fixed_time["fixed_end_date_time"] + "')").tolist()
# Add FITBIT_BASED query to queries
queries = queries + ("(local_date_time < '" + sleep_data["local_start_date_time"] + "' | local_date_time > '" + sleep_data["local_end_date_time"] + "')").tolist()
steps_intraday_data.query(" & ".join(queries), inplace=True)
steps_intraday_data.to_csv(snakemake.output[0], index=False)