Implement parse fitbit data

pull/103/head
JulioV 2020-10-22 13:08:52 -04:00
parent e0cd360c6d
commit cff83a7ceb
8 changed files with 267 additions and 150 deletions

View File

@ -144,29 +144,42 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"]))
if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_HEARTRATE']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_HEARTRATE"]["TABLE_FORMAT"])
if config["FITBIT_STEPS"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_STEPS']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_STEPS"]["TABLE_FORMAT"])
if config["FITBIT_CALORIES"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_CALORIES']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_CALORIES"]["TABLE_FORMAT"])
if config["FITBIT_SLEEP"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
raise ValueError("config['FITBIT_SLEEP']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_SLEEP"]["TABLE_FORMAT"])
for provider in config["FITBIT_HEARTRATE"]["PROVIDERS"].keys(): for provider in config["FITBIT_HEARTRATE"]["PROVIDERS"].keys():
if config["FITBIT_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]: if config["FITBIT_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
# files_to_compute.extend(expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv", pid = config["PIDS"], day_segment = config["HEARTRATE"]["DAY_SEGMENTS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
for provider in config["FITBIT_STEPS"]["PROVIDERS"].keys(): for provider in config["FITBIT_STEPS"]["PROVIDERS"].keys():
if config["FITBIT_STEPS"]["PROVIDERS"][provider]["COMPUTE"]: if config["FITBIT_STEPS"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_STEPS"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
# if config["STEP"]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
# if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED": files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary"]))
# files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["STEP"]["TABLE"])) for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys():
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_step_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"])) if config["FITBIT_CALORIES"]["PROVIDERS"][provider]["COMPUTE"]:
# files_to_compute.extend(expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
for provider in config["FITBIT_SLEEP"]["PROVIDERS"].keys(): for provider in config["FITBIT_SLEEP"]["PROVIDERS"].keys():
if config["FITBIT_SLEEP"]["PROVIDERS"][provider]["COMPUTE"]: if config["FITBIT_SLEEP"]["PROVIDERS"][provider]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_SLEEP"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed_episodes.csv", pid=config["PIDS"], fitbit_data_type=["summary"]))
# files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"])) files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
# visualization for data exploration # visualization for data exploration
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]: if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:

View File

@ -15,14 +15,15 @@ TIMEZONE: &timezone
DATABASE_GROUP: &database_group DATABASE_GROUP: &database_group
MY_GROUP MY_GROUP
# config section for the script that creates participant files automatically
PARTICIPANT_FILES: # run snakemake -j1 -R parse_participant_files PARTICIPANT_FILES: # run snakemake -j1 -R parse_participant_files
PHONE_SECTION: PHONE_SECTION:
INCLUDE: TRUE ADD: TRUE
PARSED_FROM: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE PARSED_FROM: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE
PARSED_SOURCE: *database_group # DB credentials group or CSV file path. If CSV file, it should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional) PARSED_SOURCE: *database_group # DB credentials group or CSV file path. If CSV file, it should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional)
IGNORED_DEVICE_IDS: [] IGNORED_DEVICE_IDS: []
FITBIT_SECTION: FITBIT_SECTION:
INCLUDE: FALSE ADD: FALSE
SAME_AS_PHONE: FALSE # If TRUE, all config below is ignored SAME_AS_PHONE: FALSE # If TRUE, all config below is ignored
PARSED_FROM: CSV_FILE PARSED_FROM: CSV_FILE
PARSED_SOURCE: "external/my_fitbit_participants.csv" # CSV file should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional) PARSED_SOURCE: "external/my_fitbit_participants.csv" # CSV file should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional)
@ -241,28 +242,40 @@ PHONE_CONVERSATION:
SRC_FOLDER: "rapids" # inside src/features/phone_conversation SRC_FOLDER: "rapids" # inside src/features/phone_conversation
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"
############## FITBIT ##########################################################
################################################################################
FITBIT_HEARTRATE: FITBIT_HEARTRATE:
TABLE: "fitbit_data" TABLE_FORMAT: JSON # JSON or CSV
PARSE_JSON: TRUE TABLE:
JSON: fitbit_heartrate
CSV:
SUMMARY: heartrate_summary.csv
INTRADAY: heartrate_intraday.csv
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: True COMPUTE: False
SUMMARY_FEATURES: ["restinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. height, weight) use these with care: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"] SUMMARY_FEATURES: ["restinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. height, weight) use these with care: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"]
INTRADAY_FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"] INTRADAY_FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
FITBIT_STEPS: FITBIT_STEPS:
TABLE: fitbit_data TABLE_FORMAT: JSON # JSON or CSV
PARSE_JSON: TRUE TABLE:
EXCLUDE_SLEEP: JSON: fitbit_steps
CSV:
SUMMARY: steps_summary.csv
INTRADAY: steps_intraday.csv
EXCLUDE_SLEEP: # you can exclude sleep periods from the step features computation
EXCLUDE: False EXCLUDE: False
TYPE: FIXED # FIXED OR FITBIT_BASED (configure FITBIT_SLEEP section) TYPE: FIXED # FIXED OR FITBIT_BASED (configure FITBIT_SLEEP section)
FIXED: FIXED:
START: "23:00" START: "23:00"
END: "07:00" END: "07:00"
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: TRUE COMPUTE: False
FEATURES: FEATURES:
ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"] ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"]
SEDENTARY_BOUT: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration"] SEDENTARY_BOUT: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration"]
@ -271,15 +284,33 @@ FITBIT_STEPS:
INCLUDE_ZERO_STEP_ROWS: False INCLUDE_ZERO_STEP_ROWS: False
FITBIT_SLEEP: FITBIT_SLEEP:
TABLE: fitbit_data TABLE_FORMAT: JSON # JSON or CSV
PARSE_JSON: TRUE TABLE:
JSON: fitbit_sleep
CSV:
SUMMARY: sleep_summary.csv
INTRADAY: sleep_intraday.csv
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: TRUE COMPUTE: False
SLEEP_TYPES: ["main", "nap", "all"] SLEEP_TYPES: ["main", "nap", "all"]
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"] SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
### Visualizations ################################################################ FITBIT_CALORIES:
TABLE_FORMAT: JSON # JSON or CSV
TABLE:
JSON: fitbit_calories
CSV:
SUMMARY: calories_summary.csv
INTRADAY: calories_intraday.csv
PROVIDERS:
RAPIDS:
COMPUTE: False
FEATURES: []
### Visualizations #############################################################
################################################################################
HEATMAP_FEATURES_CORRELATIONS: HEATMAP_FEATURES_CORRELATIONS:
PLOT: False PLOT: False
MIN_ROWS_RATIO: 0.5 MIN_ROWS_RATIO: 0.5

View File

@ -44,9 +44,10 @@ rule download_fitbit_data:
params: params:
source = config["SENSOR_DATA"]["FITBIT"]["SOURCE"], source = config["SENSOR_DATA"]["FITBIT"]["SOURCE"],
sensor = "fitbit_" + "{sensor}", sensor = "fitbit_" + "{sensor}",
type = "{fitbit_data_type}",
table = lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"], table = lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"],
output: output:
"data/raw/{pid}/fitbit_{sensor}_raw.csv" "data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_raw.csv"
script: script:
"../src/data/download_fitbit_data.R" "../src/data/download_fitbit_data.R"
@ -179,37 +180,63 @@ rule phone_application_categories:
script: script:
"../src/data/application_categories.R" "../src/data/application_categories.R"
# rule fitbit_heartrate_with_datetime: rule fitbit_parse_heartrate:
# input: input:
# expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["HEARTRATE"]["TABLE"]) data = expand("data/raw/{{pid}}/fitbit_heartrate_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
# params: params:
# local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"], table = config["FITBIT_HEARTRATE"]["TABLE"],
# fitbit_sensor = "heartrate" table_format = config["FITBIT_HEARTRATE"]["TABLE_FORMAT"]
# output: output:
# summary_data = "data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv", summary_data = "data/raw/{pid}/fitbit_heartrate_summary_parsed.csv",
# intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv" intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv"
# script: script:
# "../src/data/fitbit_readable_datetime.py" "../src/data/fitbit_parse_heartrate.py"
# rule fitbit_step_with_datetime: rule fitbit_parse_steps:
# input: input:
# expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["STEP"]["TABLE"]) data = expand("data/raw/{{pid}}/fitbit_steps_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_STEPS"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
# params: params:
# local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"], table = config["FITBIT_STEPS"]["TABLE"],
# fitbit_sensor = "steps" table_format = config["FITBIT_STEPS"]["TABLE_FORMAT"]
# output: output:
# intraday_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv" summary_data = "data/raw/{pid}/fitbit_steps_summary_parsed.csv",
# script: intraday_data = "data/raw/{pid}/fitbit_steps_intraday_parsed.csv"
# "../src/data/fitbit_readable_datetime.py" script:
"../src/data/fitbit_parse_steps.py"
# rule fitbit_sleep_with_datetime: rule fitbit_parse_calories:
# input: input:
# expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["SLEEP"]["TABLE"]) data = expand("data/raw/{{pid}}/fitbit_calories_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
# params: params:
# local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"], table = config["FITBIT_CALORIES"]["TABLE"],
# fitbit_sensor = "sleep" table_format = config["FITBIT_CALORIES"]["TABLE_FORMAT"]
# output: output:
# summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv", summary_data = "data/raw/{pid}/fitbit_calories_summary_parsed.csv",
# intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv" intraday_data = "data/raw/{pid}/fitbit_calories_intraday_parsed.csv"
# script: script:
# "../src/data/fitbit_readable_datetime.py" "../src/data/fitbit_parse_calories.py"
rule fitbit_parse_sleep:
input:
data = expand("data/raw/{{pid}}/fitbit_sleep_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_SLEEP"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
params:
table = config["FITBIT_SLEEP"]["TABLE"],
table_format = config["FITBIT_SLEEP"]["TABLE_FORMAT"]
output:
summary_data = "data/raw/{pid}/fitbit_sleep_summary_parsed_episodes.csv",
intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_parsed.csv"
script:
"../src/data/fitbit_parse_sleep.py"
rule fitbit_readable_datetime:
input:
sensor_input = "data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed.csv",
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
params:
fixed_timezone = "UTC",
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
output:
"data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed_with_datetime.csv"
script:
"../src/data/readable_datetime.R"

View File

@ -5,11 +5,9 @@ from datetime import datetime
CALORIES_INTRADAY_COLUMNS = ("device_id", CALORIES_INTRADAY_COLUMNS = ("device_id",
"level", "mets", "value", "level", "mets", "value",
"local_date_time", "local_date", "local_month", "local_day", "local_date_time", "timestamp")
"local_day_of_week", "local_time", "local_hour", "local_minute",
"local_day_segment")
def parseCaloriesData(calories_data, HOUR2EPOCH): def parseCaloriesData(calories_data):
if calories_data.empty: if calories_data.empty:
return pd.DataFrame(), pd.DataFrame(columns=CALORIES_INTRADAY_COLUMNS) return pd.DataFrame(), pd.DataFrame(columns=CALORIES_INTRADAY_COLUMNS)
device_id = calories_data["device_id"].iloc[0] device_id = calories_data["device_id"].iloc[0]
@ -26,10 +24,23 @@ def parseCaloriesData(calories_data, HOUR2EPOCH):
row_intraday = (device_id, row_intraday = (device_id,
data["level"], data["mets"], data["value"], data["level"], data["mets"], data["value"],
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day, d_datetime, 0)
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
HOUR2EPOCH[d_datetime.hour])
records_intraday.append(row_intraday) records_intraday.append(row_intraday)
return pd.DataFrame(), pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS) return pd.DataFrame(data=[], columns=["local_date_time"]), pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS)
table_format = snakemake.params["table_format"]
if table_format == "JSON":
json_raw = pd.read_csv(snakemake.input[0])
summary, intraday = parseCaloriesData(json_raw)
elif table_format == "CSV":
summary = pd.read_csv(snakemake.input[0])
intraday = pd.read_csv(snakemake.input[1])
summary["timestamp"] = (summary["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
summary.to_csv(snakemake.output["summary_data"], index=False)
intraday.to_csv(snakemake.output["intraday_data"], index=False)

View File

@ -1,10 +1,12 @@
import json import json, sys
import pandas as pd import pandas as pd
from datetime import datetime from datetime import datetime, timezone
from math import trunc
HR_SUMMARY_COLUMNS = ("device_id", HR_SUMMARY_COLUMNS = ("device_id",
"local_date", "local_date_time",
"timestamp",
"heartrate_daily_restinghr", "heartrate_daily_restinghr",
"heartrate_daily_caloriesoutofrange", "heartrate_daily_caloriesoutofrange",
"heartrate_daily_caloriesfatburn", "heartrate_daily_caloriesfatburn",
@ -12,10 +14,10 @@ HR_SUMMARY_COLUMNS = ("device_id",
"heartrate_daily_caloriespeak") "heartrate_daily_caloriespeak")
HR_INTRADAY_COLUMNS = ("device_id", HR_INTRADAY_COLUMNS = ("device_id",
"heartrate", "heartrate_zone", "heartrate",
"local_date_time", "local_date", "local_month", "local_day", "heartrate_zone",
"local_day_of_week", "local_time", "local_hour", "local_minute", "local_date_time",
"local_day_segment") "timestamp")
def parseHeartrateZones(heartrate_data): def parseHeartrateZones(heartrate_data):
# Get the range of heartrate zones: outofrange, fatburn, cardio, peak # Get the range of heartrate zones: outofrange, fatburn, cardio, peak
@ -58,6 +60,7 @@ def parseHeartrateSummaryData(record_summary, device_id, curr_date):
row_summary = (device_id, row_summary = (device_id,
curr_date, curr_date,
0,
d_resting_heartrate, d_resting_heartrate,
d_calories_outofrange, d_calories_outofrange,
d_calories_fatburn, d_calories_fatburn,
@ -68,7 +71,7 @@ def parseHeartrateSummaryData(record_summary, device_id, curr_date):
def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range, HOUR2EPOCH): def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range):
for data in dataset: for data in dataset:
d_time = datetime.strptime(data["time"], '%H:%M:%S').time() d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
d_datetime = datetime.combine(curr_date, d_time) d_datetime = datetime.combine(curr_date, d_time)
@ -83,15 +86,16 @@ def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date,
row_intraday = (device_id, row_intraday = (device_id,
d_hr, d_hrzone, d_hr, d_hrzone,
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day, d_datetime,
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute, 0)
HOUR2EPOCH[d_datetime.hour])
records_intraday.append(row_intraday) records_intraday.append(row_intraday)
return records_intraday return records_intraday
# def append_timestamp(data):
def parseHeartrateData(heartrate_data, HOUR2EPOCH):
def parseHeartrateData(heartrate_data):
if heartrate_data.empty: if heartrate_data.empty:
return pd.DataFrame(columns=HR_SUMMARY_COLUMNS), pd.DataFrame(columns=HR_INTRADAY_COLUMNS) return pd.DataFrame(columns=HR_SUMMARY_COLUMNS), pd.DataFrame(columns=HR_INTRADAY_COLUMNS)
device_id = heartrate_data["device_id"].iloc[0] device_id = heartrate_data["device_id"].iloc[0]
@ -109,6 +113,21 @@ def parseHeartrateData(heartrate_data, HOUR2EPOCH):
records_summary.append(row_summary) records_summary.append(row_summary)
dataset = record["activities-heart-intraday"]["dataset"] dataset = record["activities-heart-intraday"]["dataset"]
records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range, HOUR2EPOCH) records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range)
return pd.DataFrame(data=records_summary, columns=HR_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS) return pd.DataFrame(data=records_summary, columns=HR_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS)
table_format = snakemake.params["table_format"]
if table_format == "JSON":
json_raw = pd.read_csv(snakemake.input[0])
summary, intraday = parseHeartrateData(json_raw)
elif table_format == "CSV":
summary = pd.read_csv(snakemake.input[0])
intraday = pd.read_csv(snakemake.input[1])
summary["timestamp"] = (summary["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
summary.to_csv(snakemake.output["summary_data"], index=False)
intraday.to_csv(snakemake.output["intraday_data"], index=False)

View File

@ -1,35 +0,0 @@
import json
import pandas as pd
from datetime import datetime
STEPS_INTRADAY_COLUMNS = ("device_id",
"steps",
"local_date_time", "local_date", "local_month", "local_day",
"local_day_of_week", "local_time", "local_hour", "local_minute",
"local_day_segment")
def parseStepsData(steps_data, HOUR2EPOCH):
if steps_data.empty:
return pd.DataFrame(), pd.DataFrame(columns=STEPS_INTRADAY_COLUMNS)
device_id = steps_data["device_id"].iloc[0]
records_intraday = []
# Parse JSON into individual records
for record in steps_data.fitbit_data:
record = json.loads(record) # Parse text into JSON
curr_date = datetime.strptime(
record["activities-steps"][0]["dateTime"], "%Y-%m-%d")
dataset = record["activities-steps-intraday"]["dataset"]
for data in dataset:
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
d_datetime = datetime.combine(curr_date, d_time)
row_intraday = (device_id,
data["value"],
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day,
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
HOUR2EPOCH[d_datetime.hour])
records_intraday.append(row_intraday)
return pd.DataFrame(), pd.DataFrame(data=records_intraday, columns=STEPS_INTRADAY_COLUMNS)

View File

@ -12,14 +12,13 @@ SLEEP_SUMMARY_COLUMNS_V1_2 = ("device_id", "efficiency",
"minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed", "minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed",
"is_main_sleep", "type", "is_main_sleep", "type",
"local_start_date_time", "local_end_date_time", "local_start_date_time", "local_end_date_time",
"local_start_date", "local_end_date", "start_timestamp", "end_timestamp")
"local_start_day_segment", "local_end_day_segment")
SLEEP_SUMMARY_COLUMNS_V1 = SLEEP_SUMMARY_COLUMNS_V1_2 + ("count_awake", "duration_awake", "count_awakenings", "count_restless", "duration_restless") SLEEP_SUMMARY_COLUMNS_V1 = SLEEP_SUMMARY_COLUMNS_V1_2 + ("count_awake", "duration_awake", "count_awakenings", "count_restless", "duration_restless")
SLEEP_INTRADAY_COLUMNS = ("device_id", SLEEP_INTRADAY_COLUMNS = ("device_id",
# For "classic" type, original_level is one of {"awake", "restless", "asleep"} # For "classic" type, original_level is one of {"awake", "restless", "asleep"}
# For "stages" type, original_level is one of {"wake", "deep", "light", "rem"} # For "stages" type, original_level is one of {"wake", "deep", "light", "rem"}
"original_level", "level",
# For "classic" type, unified_level is one of {0, 1} where 0: awake {"awake" + "restless"}, 1: asleep {"asleep"} # For "classic" type, unified_level is one of {0, 1} where 0: awake {"awake" + "restless"}, 1: asleep {"asleep"}
# For "stages" type, unified_level is one of {0, 1} where 0: awake {"wake"}, 1: asleep {"deep" + "light" + "rem"} # For "stages" type, unified_level is one of {0, 1} where 0: awake {"wake"}, 1: asleep {"deep" + "light" + "rem"}
"unified_level", "unified_level",
@ -27,9 +26,8 @@ SLEEP_INTRADAY_COLUMNS = ("device_id",
"is_main_sleep", "is_main_sleep",
# one of {"classic", "stages"} # one of {"classic", "stages"}
"type", "type",
"local_date_time", "local_date", "local_month", "local_day", "local_date_time",
"local_day_of_week", "local_time", "local_hour", "local_minute", "timestamp")
"local_day_segment")
def mergeLongAndShortData(data_summary): def mergeLongAndShortData(data_summary):
longData = pd.DataFrame(columns=['dateTime', 'level', 'seconds']) longData = pd.DataFrame(columns=['dateTime', 'level', 'seconds'])
@ -76,7 +74,7 @@ def classicData1min(data_summary):
# print(dataList) # print(dataList)
return dataList return dataList
# Parse one record for sleep API version 1 # Parse one record for sleep API version 1
def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH): def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday):
# Summary data # Summary data
sleep_record_type = "classic" sleep_record_type = "classic"
@ -89,7 +87,7 @@ def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, rec
d_is_main_sleep, sleep_record_type, d_is_main_sleep, sleep_record_type,
d_start_datetime, d_end_datetime, d_start_datetime, d_end_datetime,
d_start_datetime.date(), d_end_datetime.date(), d_start_datetime.date(), d_end_datetime.date(),
HOUR2EPOCH[d_start_datetime.hour], HOUR2EPOCH[d_end_datetime.hour], 0,0,
record["awakeCount"], record["awakeDuration"], record["awakeningsCount"], record["awakeCount"], record["awakeDuration"], record["awakeningsCount"],
record["restlessCount"], record["restlessDuration"]) record["restlessCount"], record["restlessDuration"])
@ -111,23 +109,17 @@ def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, rec
# (1: "asleep", 2: "restless", 3: "awake") # (1: "asleep", 2: "restless", 3: "awake")
d_original_level = SLEEP_CODE2LEVEL[int(data["value"])-1] d_original_level = SLEEP_CODE2LEVEL[int(data["value"])-1]
# unified_level summarises original_level (we came up with this classification)
# 0 is awake, 1 is asleep
# {"awake" + "restless"} are set to 0 and {"asleep"} is set to 1
d_unified_level = 0 if d_original_level == "awake" or d_original_level == "restless" else 1
row_intraday = (device_id, row_intraday = (device_id,
d_original_level, d_unified_level, d_is_main_sleep, sleep_record_type, d_original_level, -1, d_is_main_sleep, sleep_record_type,
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day, d_datetime, 0)
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
HOUR2EPOCH[d_datetime.hour])
records_intraday.append(row_intraday) records_intraday.append(row_intraday)
return records_summary, records_intraday return records_summary, records_intraday
# Parse one record for sleep API version 1.2 # Parse one record for sleep API version 1.2
def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH): def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday):
# Summary data # Summary data
sleep_record_type = record['type'] sleep_record_type = record['type']
@ -139,8 +131,7 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"], record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
d_is_main_sleep, sleep_record_type, d_is_main_sleep, sleep_record_type,
d_start_datetime, d_end_datetime, d_start_datetime, d_end_datetime,
d_start_datetime.date(), d_end_datetime.date(), 0,0)
HOUR2EPOCH[d_start_datetime.hour], HOUR2EPOCH[d_end_datetime.hour])
records_summary.append(row_summary) records_summary.append(row_summary)
if sleep_record_type == 'classic': if sleep_record_type == 'classic':
@ -160,13 +151,9 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
d_original_level = data["level"] d_original_level = data["level"]
d_unified_level = 0 if d_original_level == "awake" or d_original_level == "restless" else 1
row_intraday = (device_id, row_intraday = (device_id,
d_original_level, d_unified_level, d_is_main_sleep, sleep_record_type, d_original_level, -1, d_is_main_sleep, sleep_record_type,
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day, d_datetime, 0)
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
HOUR2EPOCH[d_datetime.hour])
records_intraday.append(row_intraday) records_intraday.append(row_intraday)
else: else:
## for sleep type "stages" ## for sleep type "stages"
@ -185,13 +172,9 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
d_original_level = data[1] d_original_level = data[1]
d_unified_level = 1 if d_original_level == "deep" or d_original_level == "light" or d_original_level == "rem" else 0
row_intraday = (device_id, row_intraday = (device_id,
d_original_level, d_unified_level, d_is_main_sleep, sleep_record_type, d_original_level, -1, d_is_main_sleep, sleep_record_type,
d_datetime, d_datetime.date(), d_datetime.month, d_datetime.day, d_datetime, 0)
d_datetime.weekday(), d_datetime.time(), d_datetime.hour, d_datetime.minute,
HOUR2EPOCH[d_datetime.hour])
records_intraday.append(row_intraday) records_intraday.append(row_intraday)
@ -199,7 +182,7 @@ def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, re
def parseSleepData(sleep_data, HOUR2EPOCH): def parseSleepData(sleep_data):
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2 SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
if sleep_data.empty: if sleep_data.empty:
return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS) return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS)
@ -214,10 +197,29 @@ def parseSleepData(sleep_data, HOUR2EPOCH):
# For sleep API version 1 # For sleep API version 1
if "awakeCount" in record: if "awakeCount" in record:
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1 SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1
records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH) records_summary, records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_summary, records_intraday)
# For sleep API version 1.2 # For sleep API version 1.2
else: else:
SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2 SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday, HOUR2EPOCH) records_summary, records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_summary, records_intraday)
return pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS) return pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
table_format = snakemake.params["table_format"]
if table_format == "JSON":
json_raw = pd.read_csv(snakemake.input[0])
summary, intraday = parseSleepData(json_raw)
elif table_format == "CSV":
summary = pd.read_csv(snakemake.input[0])
intraday = pd.read_csv(snakemake.input[1])
summary["start_timestamp"] = (summary["local_start_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
summary["end_timestamp"] = (summary["local_end_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
# Unifying level
intraday["unified_level"] = np.where(intraday["level"].isin(["awake", "wake", "restless"]), 0, 1)
summary.to_csv(snakemake.output["summary_data"], index=False)
intraday.to_csv(snakemake.output["intraday_data"], index=False)

View File

@ -0,0 +1,49 @@
import json
import pandas as pd
from datetime import datetime, timezone
from math import trunc
STEPS_INTRADAY_COLUMNS = ("device_id",
"steps",
"local_date_time",
"timestamp")
def parseStepsData(steps_data):
if steps_data.empty:
return pd.DataFrame(), pd.DataFrame(columns=STEPS_INTRADAY_COLUMNS)
device_id = steps_data["device_id"].iloc[0]
records_intraday = []
# Parse JSON into individual records
for record in steps_data.fitbit_data:
record = json.loads(record) # Parse text into JSON
curr_date = datetime.strptime(
record["activities-steps"][0]["dateTime"], "%Y-%m-%d")
dataset = record["activities-steps-intraday"]["dataset"]
for data in dataset:
d_time = datetime.strptime(data["time"], '%H:%M:%S').time()
d_datetime = datetime.combine(curr_date, d_time)
row_intraday = (device_id,
data["value"],
d_datetime,
0)
records_intraday.append(row_intraday)
return pd.DataFrame(data=[], columns=["local_date_time"]), pd.DataFrame(data=records_intraday, columns=STEPS_INTRADAY_COLUMNS)
table_format = snakemake.params["table_format"]
if table_format == "JSON":
json_raw = pd.read_csv(snakemake.input[0])
summary, intraday = parseStepsData(json_raw)
elif table_format == "CSV":
summary = pd.read_csv(snakemake.input[0])
intraday = pd.read_csv(snakemake.input[1])
summary["timestamp"] = (summary["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
summary.to_csv(snakemake.output["summary_data"], index=False)
intraday.to_csv(snakemake.output["intraday_data"], index=False)