2020-05-15 23:51:00 +02:00
|
|
|
import json
|
2020-10-22 20:38:40 +02:00
|
|
|
import numpy as np
|
2020-05-15 23:51:00 +02:00
|
|
|
import pandas as pd
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
CALORIES_INTRADAY_COLUMNS = ("device_id",
|
|
|
|
"level", "mets", "value",
|
2020-10-22 19:08:52 +02:00
|
|
|
"local_date_time", "timestamp")
|
2020-05-15 23:51:00 +02:00
|
|
|
|
2020-10-22 19:08:52 +02:00
|
|
|
def parseCaloriesData(calories_data):
|
2020-05-15 23:51:00 +02:00
|
|
|
if calories_data.empty:
|
|
|
|
return pd.DataFrame(), pd.DataFrame(columns=CALORIES_INTRADAY_COLUMNS)
|
|
|
|
device_id = calories_data["device_id"].iloc[0]
|
|
|
|
records_intraday = []
|
|
|
|
# Parse JSON into individual records
|
|
|
|
for record in calories_data.fitbit_data:
|
|
|
|
record = json.loads(record) # Parse text into JSON
|
|
|
|
curr_date = datetime.strptime(
|
|
|
|
record["activities-calories"][0]["dateTime"], "%Y-%m-%d")
|
|
|
|
dataset = record["activities-calories-intraday"]["dataset"]
|
|
|
|
for data in dataset:
|
|
|
|
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
|
|
|
d_datetime = datetime.combine(curr_date, d_time)
|
|
|
|
|
|
|
|
row_intraday = (device_id,
|
|
|
|
data["level"], data["mets"], data["value"],
|
2020-10-22 19:08:52 +02:00
|
|
|
d_datetime, 0)
|
2020-05-15 23:51:00 +02:00
|
|
|
|
|
|
|
records_intraday.append(row_intraday)
|
|
|
|
|
2020-10-22 20:38:40 +02:00
|
|
|
return pd.DataFrame(data=[], columns=["local_date_time", "timestamp"]), pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS)
|
2020-10-22 19:08:52 +02:00
|
|
|
|
|
|
|
table_format = snakemake.params["table_format"]
|
2020-10-22 20:38:40 +02:00
|
|
|
timezone = snakemake.params["timezone"]
|
2020-10-22 19:08:52 +02:00
|
|
|
|
|
|
|
if table_format == "JSON":
|
|
|
|
json_raw = pd.read_csv(snakemake.input[0])
|
|
|
|
summary, intraday = parseCaloriesData(json_raw)
|
|
|
|
elif table_format == "CSV":
|
2020-10-26 22:17:53 +01:00
|
|
|
summary = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
|
|
|
intraday = pd.read_csv(snakemake.input[1], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
2020-10-22 19:08:52 +02:00
|
|
|
|
2021-01-06 17:43:01 +01:00
|
|
|
# if not pd.isnull(local_start_date) and not pd.isnull(local_end_date):
|
|
|
|
|
2020-10-22 20:38:40 +02:00
|
|
|
if summary.shape[0] > 0:
|
2021-01-06 17:43:01 +01:00
|
|
|
summary["timestamp"] = summary["local_date_time"].dt.tz_localize(timezone, ambiguous=False, nonexistent="NaT").dropna().astype(np.int64) // 10**6
|
|
|
|
summary.dropna(subset=['timestamp'], inplace=True)
|
2020-10-22 20:38:40 +02:00
|
|
|
if intraday.shape[0] > 0:
|
2021-01-06 17:43:01 +01:00
|
|
|
intraday["timestamp"] = intraday["local_date_time"].dt.tz_localize(timezone, ambiguous=False, nonexistent="NaT").dropna().astype(np.int64) // 10**6
|
|
|
|
intraday.dropna(subset=['timestamp'], inplace=True)
|
2020-10-22 19:08:52 +02:00
|
|
|
|
|
|
|
summary.to_csv(snakemake.output["summary_data"], index=False)
|
|
|
|
intraday.to_csv(snakemake.output["intraday_data"], index=False)
|