rapids/src/data/fitbit_parse_calories.py

46 lines
1.9 KiB
Python
Raw Normal View History

import json
import pandas as pd
from datetime import datetime
CALORIES_INTRADAY_COLUMNS = ("device_id",
"level", "mets", "value",
2020-10-22 19:08:52 +02:00
"local_date_time", "timestamp")
2020-10-22 19:08:52 +02:00
def parseCaloriesData(calories_data):
if calories_data.empty:
return pd.DataFrame(), pd.DataFrame(columns=CALORIES_INTRADAY_COLUMNS)
device_id = calories_data["device_id"].iloc[0]
records_intraday = []
# Parse JSON into individual records
for record in calories_data.fitbit_data:
record = json.loads(record) # Parse text into JSON
curr_date = datetime.strptime(
record["activities-calories"][0]["dateTime"], "%Y-%m-%d")
dataset = record["activities-calories-intraday"]["dataset"]
for data in dataset:
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
d_datetime = datetime.combine(curr_date, d_time)
row_intraday = (device_id,
data["level"], data["mets"], data["value"],
2020-10-22 19:08:52 +02:00
d_datetime, 0)
records_intraday.append(row_intraday)
2020-10-22 19:08:52 +02:00
return pd.DataFrame(data=[], columns=["local_date_time"]), pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS)
table_format = snakemake.params["table_format"]
if table_format == "JSON":
json_raw = pd.read_csv(snakemake.input[0])
summary, intraday = parseCaloriesData(json_raw)
elif table_format == "CSV":
summary = pd.read_csv(snakemake.input[0])
intraday = pd.read_csv(snakemake.input[1])
summary["timestamp"] = (summary["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
summary.to_csv(snakemake.output["summary_data"], index=False)
intraday.to_csv(snakemake.output["intraday_data"], index=False)