Implement parse fitbit data
parent
e0cd360c6d
commit
cff83a7ceb
41
Snakefile
41
Snakefile
|
@ -144,29 +144,42 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"]))
|
||||
|
||||
if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
|
||||
raise ValueError("config['FITBIT_HEARTRATE']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_HEARTRATE"]["TABLE_FORMAT"])
|
||||
|
||||
if config["FITBIT_STEPS"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
|
||||
raise ValueError("config['FITBIT_STEPS']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_STEPS"]["TABLE_FORMAT"])
|
||||
|
||||
if config["FITBIT_CALORIES"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
|
||||
raise ValueError("config['FITBIT_CALORIES']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_CALORIES"]["TABLE_FORMAT"])
|
||||
|
||||
if config["FITBIT_SLEEP"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
|
||||
raise ValueError("config['FITBIT_SLEEP']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_SLEEP"]["TABLE_FORMAT"])
|
||||
|
||||
for provider in config["FITBIT_HEARTRATE"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_raw.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||
# files_to_compute.extend(expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv", pid = config["PIDS"], day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||
|
||||
for provider in config["FITBIT_STEPS"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_STEPS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_raw.csv", pid=config["PIDS"]))
|
||||
# if config["STEP"]["COMPUTE"]:
|
||||
# if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED":
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary"]))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["STEP"]["TABLE"]))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_step_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
|
||||
# files_to_compute.extend(expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_STEPS"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||
|
||||
for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_CALORIES"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||
|
||||
for provider in config["FITBIT_SLEEP"]["PROVIDERS"].keys():
|
||||
if config["FITBIT_SLEEP"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_raw.csv", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"]))
|
||||
# files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"]))
|
||||
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_SLEEP"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed_episodes.csv", pid=config["PIDS"], fitbit_data_type=["summary"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
|
||||
|
||||
# visualization for data exploration
|
||||
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
||||
|
|
57
config.yaml
57
config.yaml
|
@ -15,14 +15,15 @@ TIMEZONE: &timezone
|
|||
DATABASE_GROUP: &database_group
|
||||
MY_GROUP
|
||||
|
||||
# config section for the script that creates participant files automatically
|
||||
PARTICIPANT_FILES: # run snakemake -j1 -R parse_participant_files
|
||||
PHONE_SECTION:
|
||||
INCLUDE: TRUE
|
||||
ADD: TRUE
|
||||
PARSED_FROM: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE
|
||||
PARSED_SOURCE: *database_group # DB credentials group or CSV file path. If CSV file, it should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional)
|
||||
IGNORED_DEVICE_IDS: []
|
||||
FITBIT_SECTION:
|
||||
INCLUDE: FALSE
|
||||
ADD: FALSE
|
||||
SAME_AS_PHONE: FALSE # If TRUE, all config below is ignored
|
||||
PARSED_FROM: CSV_FILE
|
||||
PARSED_SOURCE: "external/my_fitbit_participants.csv" # CSV file should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional)
|
||||
|
@ -241,28 +242,40 @@ PHONE_CONVERSATION:
|
|||
SRC_FOLDER: "rapids" # inside src/features/phone_conversation
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
############## FITBIT ##########################################################
|
||||
################################################################################
|
||||
|
||||
FITBIT_HEARTRATE:
|
||||
TABLE: "fitbit_data"
|
||||
PARSE_JSON: TRUE
|
||||
TABLE_FORMAT: JSON # JSON or CSV
|
||||
TABLE:
|
||||
JSON: fitbit_heartrate
|
||||
CSV:
|
||||
SUMMARY: heartrate_summary.csv
|
||||
INTRADAY: heartrate_intraday.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
COMPUTE: False
|
||||
SUMMARY_FEATURES: ["restinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. height, weight) use these with care: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"]
|
||||
INTRADAY_FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
|
||||
|
||||
|
||||
FITBIT_STEPS:
|
||||
TABLE: fitbit_data
|
||||
PARSE_JSON: TRUE
|
||||
EXCLUDE_SLEEP:
|
||||
TABLE_FORMAT: JSON # JSON or CSV
|
||||
TABLE:
|
||||
JSON: fitbit_steps
|
||||
CSV:
|
||||
SUMMARY: steps_summary.csv
|
||||
INTRADAY: steps_intraday.csv
|
||||
EXCLUDE_SLEEP: # you can exclude sleep periods from the step features computation
|
||||
EXCLUDE: False
|
||||
TYPE: FIXED # FIXED OR FITBIT_BASED (configure FITBIT_SLEEP section)
|
||||
FIXED:
|
||||
START: "23:00"
|
||||
END: "07:00"
|
||||
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: TRUE
|
||||
COMPUTE: False
|
||||
FEATURES:
|
||||
ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"]
|
||||
SEDENTARY_BOUT: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration"]
|
||||
|
@ -271,15 +284,33 @@ FITBIT_STEPS:
|
|||
INCLUDE_ZERO_STEP_ROWS: False
|
||||
|
||||
FITBIT_SLEEP:
|
||||
TABLE: fitbit_data
|
||||
PARSE_JSON: TRUE
|
||||
TABLE_FORMAT: JSON # JSON or CSV
|
||||
TABLE:
|
||||
JSON: fitbit_sleep
|
||||
CSV:
|
||||
SUMMARY: sleep_summary.csv
|
||||
INTRADAY: sleep_intraday.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: TRUE
|
||||
COMPUTE: False
|
||||
SLEEP_TYPES: ["main", "nap", "all"]
|
||||
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
||||
|
||||
### Visualizations ################################################################
|
||||
FITBIT_CALORIES:
|
||||
TABLE_FORMAT: JSON # JSON or CSV
|
||||
TABLE:
|
||||
JSON: fitbit_calories
|
||||
CSV:
|
||||
SUMMARY: calories_summary.csv
|
||||
INTRADAY: calories_intraday.csv
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: []
|
||||
|
||||
### Visualizations #############################################################
|
||||
################################################################################
|
||||
|
||||
HEATMAP_FEATURES_CORRELATIONS:
|
||||
PLOT: False
|
||||
MIN_ROWS_RATIO: 0.5
|
||||
|
|
|
@ -44,9 +44,10 @@ rule download_fitbit_data:
|
|||
params:
|
||||
source = config["SENSOR_DATA"]["FITBIT"]["SOURCE"],
|
||||
sensor = "fitbit_" + "{sensor}",
|
||||
type = "{fitbit_data_type}",
|
||||
table = lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"],
|
||||
output:
|
||||
"data/raw/{pid}/fitbit_{sensor}_raw.csv"
|
||||
"data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_raw.csv"
|
||||
script:
|
||||
"../src/data/download_fitbit_data.R"
|
||||
|
||||
|
@ -179,37 +180,63 @@ rule phone_application_categories:
|
|||
script:
|
||||
"../src/data/application_categories.R"
|
||||
|
||||
# rule fitbit_heartrate_with_datetime:
|
||||
# input:
|
||||
# expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["HEARTRATE"]["TABLE"])
|
||||
# params:
|
||||
# local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||
# fitbit_sensor = "heartrate"
|
||||
# output:
|
||||
# summary_data = "data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv",
|
||||
# intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv"
|
||||
# script:
|
||||
# "../src/data/fitbit_readable_datetime.py"
|
||||
rule fitbit_parse_heartrate:
|
||||
input:
|
||||
data = expand("data/raw/{{pid}}/fitbit_heartrate_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
|
||||
params:
|
||||
table = config["FITBIT_HEARTRATE"]["TABLE"],
|
||||
table_format = config["FITBIT_HEARTRATE"]["TABLE_FORMAT"]
|
||||
output:
|
||||
summary_data = "data/raw/{pid}/fitbit_heartrate_summary_parsed.csv",
|
||||
intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv"
|
||||
script:
|
||||
"../src/data/fitbit_parse_heartrate.py"
|
||||
|
||||
# rule fitbit_step_with_datetime:
|
||||
# input:
|
||||
# expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["STEP"]["TABLE"])
|
||||
# params:
|
||||
# local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||
# fitbit_sensor = "steps"
|
||||
# output:
|
||||
# intraday_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv"
|
||||
# script:
|
||||
# "../src/data/fitbit_readable_datetime.py"
|
||||
rule fitbit_parse_steps:
|
||||
input:
|
||||
data = expand("data/raw/{{pid}}/fitbit_steps_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_STEPS"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
|
||||
params:
|
||||
table = config["FITBIT_STEPS"]["TABLE"],
|
||||
table_format = config["FITBIT_STEPS"]["TABLE_FORMAT"]
|
||||
output:
|
||||
summary_data = "data/raw/{pid}/fitbit_steps_summary_parsed.csv",
|
||||
intraday_data = "data/raw/{pid}/fitbit_steps_intraday_parsed.csv"
|
||||
script:
|
||||
"../src/data/fitbit_parse_steps.py"
|
||||
|
||||
# rule fitbit_sleep_with_datetime:
|
||||
# input:
|
||||
# expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["SLEEP"]["TABLE"])
|
||||
# params:
|
||||
# local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||
# fitbit_sensor = "sleep"
|
||||
# output:
|
||||
# summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
|
||||
# intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
||||
# script:
|
||||
# "../src/data/fitbit_readable_datetime.py"
|
||||
rule fitbit_parse_calories:
|
||||
input:
|
||||
data = expand("data/raw/{{pid}}/fitbit_calories_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
|
||||
params:
|
||||
table = config["FITBIT_CALORIES"]["TABLE"],
|
||||
table_format = config["FITBIT_CALORIES"]["TABLE_FORMAT"]
|
||||
output:
|
||||
summary_data = "data/raw/{pid}/fitbit_calories_summary_parsed.csv",
|
||||
intraday_data = "data/raw/{pid}/fitbit_calories_intraday_parsed.csv"
|
||||
script:
|
||||
"../src/data/fitbit_parse_calories.py"
|
||||
|
||||
rule fitbit_parse_sleep:
|
||||
input:
|
||||
data = expand("data/raw/{{pid}}/fitbit_sleep_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_SLEEP"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
|
||||
params:
|
||||
table = config["FITBIT_SLEEP"]["TABLE"],
|
||||
table_format = config["FITBIT_SLEEP"]["TABLE_FORMAT"]
|
||||
output:
|
||||
summary_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_episodes.csv",
|
||||
intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_parsed.csv"
|
||||
script:
|
||||
"../src/data/fitbit_parse_sleep.py"
|
||||
|
||||
rule fitbit_readable_datetime:
|
||||
input:
|
||||
sensor_input = "data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed.csv",
|
||||
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
||||
params:
|
||||
fixed_timezone = "UTC",
|
||||
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
||||
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
||||
output:
|
||||
"data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed_with_datetime.csv"
|
||||
script:
|
||||
"../src/data/readable_datetime.R"
|
||||
|
|
|
@ -5,11 +5,9 @@ from datetime import datetime
|
|||
|
||||
CALORIES_INTRADAY_COLUMNS = ("device_id",
|
||||
"level", "mets", "value",
|
||||
"local_date_time", "local_date", "local_month", "local_day",
|
||||
"local_day_of_week", "local_time", "local_hour", "local_minute",
|
||||
"local_day_segment")
|
||||
"local_date_time", "timestamp")
|
||||
|
||||
def parseCaloriesData(calories_data, HOUR2EPOCH):
|
||||
def parseCaloriesData(calories_data):
|
||||
if calories_data.empty:
|
||||
return pd.DataFrame(), pd.DataFrame(columns=CALORIES_INTRADAY_COLUMNS)
|
||||
device_id = calories_data["device_id"].iloc[0]
|
||||
|
@ -26,10 +24,23 @@ def parseCaloriesData(calories_data, HOUR2EPOCH):
|
|||
|
||||
row_intraday = (device_id,
|
||||
data["level"], data["mets"], data["value"],
|
||||
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
|
||||
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
|
||||
HOUR2EPOCH[d_datetime.hour])
|
||||
d_datetime, 0)
|
||||
|
||||
records_intraday.append(row_intraday)
|
||||
|
||||
return pd.DataFrame(), pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS)
|
||||
return pd.DataFrame(data=[], columns=["local_date_time"]), pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS)
|
||||
|
||||
table_format = snakemake.params["table_format"]
|
||||
|
||||
if table_format == "JSON":
|
||||
json_raw = pd.read_csv(snakemake.input[0])
|
||||
summary, intraday = parseCaloriesData(json_raw)
|
||||
elif table_format == "CSV":
|
||||
summary = pd.read_csv(snakemake.input[0])
|
||||
intraday = pd.read_csv(snakemake.input[1])
|
||||
|
||||
summary["timestamp"] = (summary["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
|
||||
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
|
||||
|
||||
summary.to_csv(snakemake.output["summary_data"], index=False)
|
||||
intraday.to_csv(snakemake.output["intraday_data"], index=False)
|
|
@ -1,10 +1,12 @@
|
|||
import json
|
||||
import json, sys
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from math import trunc
|
||||
|
||||
|
||||
HR_SUMMARY_COLUMNS = ("device_id",
|
||||
"local_date",
|
||||
"local_date_time",
|
||||
"timestamp",
|
||||
"heartrate_daily_restinghr",
|
||||
"heartrate_daily_caloriesoutofrange",
|
||||
"heartrate_daily_caloriesfatburn",
|
||||
|
@ -12,10 +14,10 @@ HR_SUMMARY_COLUMNS = ("device_id",
|
|||
"heartrate_daily_caloriespeak")
|
||||
|
||||
HR_INTRADAY_COLUMNS = ("device_id",
|
||||
"heartrate", "heartrate_zone",
|
||||
"local_date_time", "local_date", "local_month", "local_day",
|
||||
"local_day_of_week", "local_time", "local_hour", "local_minute",
|
||||
"local_day_segment")
|
||||
"heartrate",
|
||||
"heartrate_zone",
|
||||
"local_date_time",
|
||||
"timestamp")
|
||||
|
||||
def parseHeartrateZones(heartrate_data):
|
||||
# Get the range of heartrate zones: outofrange, fatburn, cardio, peak
|
||||
|
@ -58,6 +60,7 @@ def parseHeartrateSummaryData(record_summary, device_id, curr_date):
|
|||
|
||||
row_summary = (device_id,
|
||||
curr_date,
|
||||
0,
|
||||
d_resting_heartrate,
|
||||
d_calories_outofrange,
|
||||
d_calories_fatburn,
|
||||
|
@ -68,7 +71,7 @@ def parseHeartrateSummaryData(record_summary, device_id, curr_date):
|
|||
|
||||
|
||||
|
||||
def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range, HOUR2EPOCH):
|
||||
def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range):
|
||||
for data in dataset:
|
||||
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
||||
d_datetime = datetime.combine(curr_date, d_time)
|
||||
|
@ -83,15 +86,16 @@ def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date,
|
|||
|
||||
row_intraday = (device_id,
|
||||
d_hr, d_hrzone,
|
||||
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
|
||||
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
|
||||
HOUR2EPOCH[d_datetime.hour])
|
||||
d_datetime,
|
||||
0)
|
||||
|
||||
records_intraday.append(row_intraday)
|
||||
return records_intraday
|
||||
|
||||
# def append_timestamp(data):
|
||||
|
||||
def parseHeartrateData(heartrate_data, HOUR2EPOCH):
|
||||
|
||||
def parseHeartrateData(heartrate_data):
|
||||
if heartrate_data.empty:
|
||||
return pd.DataFrame(columns=HR_SUMMARY_COLUMNS), pd.DataFrame(columns=HR_INTRADAY_COLUMNS)
|
||||
device_id = heartrate_data["device_id"].iloc[0]
|
||||
|
@ -109,6 +113,21 @@ def parseHeartrateData(heartrate_data, HOUR2EPOCH):
|
|||
records_summary.append(row_summary)
|
||||
|
||||
dataset = record["activities-heart-intraday"]["dataset"]
|
||||
records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range, HOUR2EPOCH)
|
||||
records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range)
|
||||
|
||||
return pd.DataFrame(data=records_summary, columns=HR_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS)
|
||||
|
||||
table_format = snakemake.params["table_format"]
|
||||
|
||||
if table_format == "JSON":
|
||||
json_raw = pd.read_csv(snakemake.input[0])
|
||||
summary, intraday = parseHeartrateData(json_raw)
|
||||
elif table_format == "CSV":
|
||||
summary = pd.read_csv(snakemake.input[0])
|
||||
intraday = pd.read_csv(snakemake.input[1])
|
||||
|
||||
summary["timestamp"] = (summary["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
|
||||
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
|
||||
|
||||
summary.to_csv(snakemake.output["summary_data"], index=False)
|
||||
intraday.to_csv(snakemake.output["intraday_data"], index=False)
|
|
@ -1,35 +0,0 @@
|
|||
import json
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
STEPS_INTRADAY_COLUMNS = ("device_id",
|
||||
"steps",
|
||||
"local_date_time", "local_date", "local_month", "local_day",
|
||||
"local_day_of_week", "local_time", "local_hour", "local_minute",
|
||||
"local_day_segment")
|
||||
|
||||
|
||||
def parseStepsData(steps_data, HOUR2EPOCH):
|
||||
if steps_data.empty:
|
||||
return pd.DataFrame(), pd.DataFrame(columns=STEPS_INTRADAY_COLUMNS)
|
||||
device_id = steps_data["device_id"].iloc[0]
|
||||
records_intraday = []
|
||||
# Parse JSON into individual records
|
||||
for record in steps_data.fitbit_data:
|
||||
record = json.loads(record) # Parse text into JSON
|
||||
curr_date = datetime.strptime(
|
||||
record["activities-steps"][0]["dateTime"], "%Y-%m-%d")
|
||||
dataset = record["activities-steps-intraday"]["dataset"]
|
||||
for data in dataset:
|
||||
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
||||
d_datetime = datetime.combine(curr_date, d_time)
|
||||
|
||||
row_intraday = (device_id,
|
||||
data["value"],
|
||||
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
|
||||
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
|
||||
HOUR2EPOCH[d_datetime.hour])
|
||||
|
||||
records_intraday.append(row_intraday)
|
||||
|
||||
return pd.DataFrame(), pd.DataFrame(data=records_intraday, columns=STEPS_INTRADAY_COLUMNS)
|
|
@ -12,14 +12,13 @@ SLEEP_SUMMARY_COLUMNS_V1_2 = ("device_id", "efficiency",
|
|||
"minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed",
|
||||
"is_main_sleep", "type",
|
||||
"local_start_date_time", "local_end_date_time",
|
||||
"local_start_date", "local_end_date",
|
||||
"local_start_day_segment", "local_end_day_segment")
|
||||
"start_timestamp", "end_timestamp")
|
||||
SLEEP_SUMMARY_COLUMNS_V1 = SLEEP_SUMMARY_COLUMNS_V1_2 + ("count_awake", "duration_awake", "count_awakenings", "count_restless", "duration_restless")
|
||||
|
||||
SLEEP_INTRADAY_COLUMNS = ("device_id",
|
||||
# For "classic" type, original_level is one of {"awake", "restless", "asleep"}
|
||||
# For "stages" type, original_level is one of {"wake", "deep", "light", "rem"}
|
||||
"original_level",
|
||||
"level",
|
||||
# For "classic" type, unified_level is one of {0, 1} where 0: awake {"awake" + "restless"}, 1: asleep {"asleep"}
|
||||
# For "stages" type, unified_level is one of {0, 1} where 0: awake {"wake"}, 1: asleep {"deep" + "light" + "rem"}
|
||||
"unified_level",
|
||||
|
@ -27,9 +26,8 @@ SLEEP_INTRADAY_COLUMNS = ("device_id",
|
|||
"is_main_sleep",
|
||||
# one of {"classic", "stages"}
|
||||
"type",
|
||||
"local_date_time", "local_date", "local_month", "local_day",
|
||||
"local_day_of_week", "local_time", "local_hour", "local_minute",
|
||||
"local_day_segment")
|
||||
"local_date_time",
|
||||
"timestamp")
|
||||
|
||||
def mergeLongAndShortData(data_summary):
|
||||
longData = pd.DataFrame(columns=['dateTime', 'level', 'seconds'])
|
||||
|
@ -76,7 +74,7 @@ def classicData1min(data_summary):
|
|||
# print(dataList)
|
||||
return dataList
|
||||
# Parse one record for sleep API version 1
|
||||
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH):
|
||||
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday):
|
||||
|
||||
# Summary data
|
||||
sleep_record_type = "classic"
|
||||
|
@ -89,7 +87,7 @@ def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, rec
|
|||
d_is_main_sleep, sleep_record_type,
|
||||
d_start_datetime, d_end_datetime,
|
||||
d_start_datetime.date(), d_end_datetime.date(),
|
||||
HOUR2EPOCH[d_start_datetime.hour], HOUR2EPOCH[d_end_datetime.hour],
|
||||
0,0,
|
||||
record["awakeCount"], record["awakeDuration"], record["awakeningsCount"],
|
||||
record["restlessCount"], record["restlessDuration"])
|
||||
|
||||
|
@ -111,23 +109,17 @@ def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, rec
|
|||
# (1: "asleep", 2: "restless", 3: "awake")
|
||||
d_original_level = SLEEP_CODE2LEVEL[int(data["value"])-1]
|
||||
|
||||
# unified_level summarises original_level (we came up with this classification)
|
||||
# 0 is awake, 1 is asleep
|
||||
# {"awake" + "restless"} are set to 0 and {"asleep"} is set to 1
|
||||
d_unified_level = 0 if d_original_level == "awake" or d_original_level == "restless" else 1
|
||||
|
||||
row_intraday = (device_id,
|
||||
d_original_level, d_unified_level, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
|
||||
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
|
||||
HOUR2EPOCH[d_datetime.hour])
|
||||
d_original_level, -1, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, 0)
|
||||
|
||||
records_intraday.append(row_intraday)
|
||||
|
||||
return records_summary, records_intraday
|
||||
|
||||
# Parse one record for sleep API version 1.2
|
||||
def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH):
|
||||
def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday):
|
||||
|
||||
# Summary data
|
||||
sleep_record_type = record['type']
|
||||
|
@ -139,8 +131,7 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
|
|||
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
|
||||
d_is_main_sleep, sleep_record_type,
|
||||
d_start_datetime, d_end_datetime,
|
||||
d_start_datetime.date(), d_end_datetime.date(),
|
||||
HOUR2EPOCH[d_start_datetime.hour], HOUR2EPOCH[d_end_datetime.hour])
|
||||
0,0)
|
||||
|
||||
records_summary.append(row_summary)
|
||||
if sleep_record_type == 'classic':
|
||||
|
@ -160,13 +151,9 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
|
|||
|
||||
d_original_level = data["level"]
|
||||
|
||||
d_unified_level = 0 if d_original_level == "awake" or d_original_level == "restless" else 1
|
||||
|
||||
row_intraday = (device_id,
|
||||
d_original_level, d_unified_level, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
|
||||
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
|
||||
HOUR2EPOCH[d_datetime.hour])
|
||||
d_original_level, -1, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, 0)
|
||||
records_intraday.append(row_intraday)
|
||||
else:
|
||||
## for sleep type "stages"
|
||||
|
@ -185,13 +172,9 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
|
|||
|
||||
d_original_level = data[1]
|
||||
|
||||
d_unified_level = 1 if d_original_level == "deep" or d_original_level == "light" or d_original_level == "rem" else 0
|
||||
|
||||
row_intraday = (device_id,
|
||||
d_original_level, d_unified_level, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
|
||||
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
|
||||
HOUR2EPOCH[d_datetime.hour])
|
||||
d_original_level, -1, d_is_main_sleep, sleep_record_type,
|
||||
d_datetime, 0)
|
||||
|
||||
records_intraday.append(row_intraday)
|
||||
|
||||
|
@ -199,7 +182,7 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
|
|||
|
||||
|
||||
|
||||
def parseSleepData(sleep_data, HOUR2EPOCH):
|
||||
def parseSleepData(sleep_data):
|
||||
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
|
||||
if sleep_data.empty:
|
||||
return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS)
|
||||
|
@ -214,10 +197,29 @@ def parseSleepData(sleep_data, HOUR2EPOCH):
|
|||
# For sleep API version 1
|
||||
if "awakeCount" in record:
|
||||
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1
|
||||
records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH)
|
||||
records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday)
|
||||
# For sleep API version 1.2
|
||||
else:
|
||||
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
|
||||
records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH)
|
||||
records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday)
|
||||
|
||||
return pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
|
||||
|
||||
table_format = snakemake.params["table_format"]
|
||||
|
||||
if table_format == "JSON":
|
||||
json_raw = pd.read_csv(snakemake.input[0])
|
||||
summary, intraday = parseSleepData(json_raw)
|
||||
elif table_format == "CSV":
|
||||
summary = pd.read_csv(snakemake.input[0])
|
||||
intraday = pd.read_csv(snakemake.input[1])
|
||||
|
||||
summary["start_timestamp"] = (summary["local_start_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
|
||||
summary["end_timestamp"] = (summary["local_end_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
|
||||
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
|
||||
|
||||
# Unifying level
|
||||
intraday["unified_level"] = np.where(intraday["level"].isin(["awake", "wake", "restless"]), 0, 1)
|
||||
|
||||
summary.to_csv(snakemake.output["summary_data"], index=False)
|
||||
intraday.to_csv(snakemake.output["intraday_data"], index=False)
|
|
@ -0,0 +1,49 @@
|
|||
import json
|
||||
import pandas as pd
|
||||
from datetime import datetime, timezone
|
||||
from math import trunc
|
||||
|
||||
STEPS_INTRADAY_COLUMNS = ("device_id",
|
||||
"steps",
|
||||
"local_date_time",
|
||||
"timestamp")
|
||||
|
||||
|
||||
def parseStepsData(steps_data):
|
||||
if steps_data.empty:
|
||||
return pd.DataFrame(), pd.DataFrame(columns=STEPS_INTRADAY_COLUMNS)
|
||||
device_id = steps_data["device_id"].iloc[0]
|
||||
records_intraday = []
|
||||
# Parse JSON into individual records
|
||||
for record in steps_data.fitbit_data:
|
||||
record = json.loads(record) # Parse text into JSON
|
||||
curr_date = datetime.strptime(
|
||||
record["activities-steps"][0]["dateTime"], "%Y-%m-%d")
|
||||
dataset = record["activities-steps-intraday"]["dataset"]
|
||||
for data in dataset:
|
||||
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
|
||||
d_datetime = datetime.combine(curr_date, d_time)
|
||||
|
||||
row_intraday = (device_id,
|
||||
data["value"],
|
||||
d_datetime,
|
||||
0)
|
||||
|
||||
records_intraday.append(row_intraday)
|
||||
|
||||
return pd.DataFrame(data=[], columns=["local_date_time"]), pd.DataFrame(data=records_intraday, columns=STEPS_INTRADAY_COLUMNS)
|
||||
|
||||
table_format = snakemake.params["table_format"]
|
||||
|
||||
if table_format == "JSON":
|
||||
json_raw = pd.read_csv(snakemake.input[0])
|
||||
summary, intraday = parseStepsData(json_raw)
|
||||
elif table_format == "CSV":
|
||||
summary = pd.read_csv(snakemake.input[0])
|
||||
intraday = pd.read_csv(snakemake.input[1])
|
||||
|
||||
summary["timestamp"] = (summary["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
|
||||
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
|
||||
|
||||
summary.to_csv(snakemake.output["summary_data"], index=False)
|
||||
intraday.to_csv(snakemake.output["intraday_data"], index=False)
|
Loading…
Reference in New Issue