Split FITBIT_HEARTRATE into FITBIT_HEARTRATE_SUMMARY and FITBIT_HEARTRATE_INTRADAY
parent
a94866e83d
commit
9fc36f67e2
26
Snakefile
26
Snakefile
|
@ -144,9 +144,6 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
|
|
||||||
raise ValueError("config['FITBIT_HEARTRATE']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_HEARTRATE"]["TABLE_FORMAT"])
|
|
||||||
|
|
||||||
if config["FITBIT_STEPS"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
|
if config["FITBIT_STEPS"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
|
||||||
raise ValueError("config['FITBIT_STEPS']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_STEPS"]["TABLE_FORMAT"])
|
raise ValueError("config['FITBIT_STEPS']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_STEPS"]["TABLE_FORMAT"])
|
||||||
|
|
||||||
|
@ -156,13 +153,22 @@ if config["FITBIT_CALORIES"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
|
||||||
if config["FITBIT_SLEEP"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
|
if config["FITBIT_SLEEP"]["TABLE_FORMAT"] not in ["JSON", "CSV"]:
|
||||||
raise ValueError("config['FITBIT_SLEEP']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_SLEEP"]["TABLE_FORMAT"])
|
raise ValueError("config['FITBIT_SLEEP']['TABLE_FORMAT'] should be JSON or CSV but you typed" + config["FITBIT_SLEEP"]["TABLE_FORMAT"])
|
||||||
|
|
||||||
for provider in config["FITBIT_HEARTRATE"]["PROVIDERS"].keys():
|
|
||||||
if config["FITBIT_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]:
|
for provider in config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"].keys():
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
|
if config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_parsed.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_features/fitbit_heartrate_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_summary_parsed_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_summary_features/fitbit_heartrate_summary_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
|
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate_summary.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
|
for provider in config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"].keys():
|
||||||
|
if config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
|
files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_heartrate_intraday.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
for provider in config["FITBIT_STEPS"]["PROVIDERS"].keys():
|
for provider in config["FITBIT_STEPS"]["PROVIDERS"].keys():
|
||||||
if config["FITBIT_STEPS"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["FITBIT_STEPS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
|
|
25
config.yaml
25
config.yaml
|
@ -44,6 +44,7 @@ DEVICE_DATA:
|
||||||
FITBIT:
|
FITBIT:
|
||||||
SOURCE:
|
SOURCE:
|
||||||
TYPE: DATABASE # DATABASE or FILES (set each FITBIT_SENSOR TABLE attribute accordingly with a table name or a file path)
|
TYPE: DATABASE # DATABASE or FILES (set each FITBIT_SENSOR TABLE attribute accordingly with a table name or a file path)
|
||||||
|
COLUMN_FORMAT: JSON # JSON or PLAIN_TEXT
|
||||||
DATABASE_GROUP: *database_group
|
DATABASE_GROUP: *database_group
|
||||||
DEVICE_ID_COLUMN: device_id # column name
|
DEVICE_ID_COLUMN: device_id # column name
|
||||||
TIMEZONE:
|
TIMEZONE:
|
||||||
|
@ -258,20 +259,22 @@ PHONE_CONVERSATION:
|
||||||
############## FITBIT ##########################################################
|
############## FITBIT ##########################################################
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
||||||
FITBIT_HEARTRATE:
|
FITBIT_HEARTRATE_SUMMARY:
|
||||||
TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES
|
TABLE: heartrate_summary
|
||||||
TABLE:
|
|
||||||
JSON: fitbit_heartrate
|
|
||||||
CSV:
|
|
||||||
SUMMARY: heartrate_summary
|
|
||||||
INTRADAY: heartrate_intraday
|
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
FEATURES:
|
FEATURES: ["maxrestinghr", "minrestinghr", "avgrestinghr", "medianrestinghr", "moderestinghr", "stdrestinghr", "diffmaxmoderestinghr", "diffminmoderestinghr", "entropyrestinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. height, weight) use these with care: ["sumcaloriesoutofrange", "maxcaloriesoutofrange", "mincaloriesoutofrange", "avgcaloriesoutofrange", "mediancaloriesoutofrange", "stdcaloriesoutofrange", "entropycaloriesoutofrange", "sumcaloriesfatburn", "maxcaloriesfatburn", "mincaloriesfatburn", "avgcaloriesfatburn", "mediancaloriesfatburn", "stdcaloriesfatburn", "entropycaloriesfatburn", "sumcaloriescardio", "maxcaloriescardio", "mincaloriescardio", "avgcaloriescardio", "mediancaloriescardio", "stdcaloriescardio", "entropycaloriescardio", "sumcaloriespeak", "maxcaloriespeak", "mincaloriespeak", "avgcaloriespeak", "mediancaloriespeak", "stdcaloriespeak", "entropycaloriespeak"]
|
||||||
SUMMARY: ["maxrestinghr", "minrestinghr", "avgrestinghr", "medianrestinghr", "moderestinghr", "stdrestinghr", "diffmaxmoderestinghr", "diffminmoderestinghr", "entropyrestinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. height, weight) use these with care: ["sumcaloriesoutofrange", "maxcaloriesoutofrange", "mincaloriesoutofrange", "avgcaloriesoutofrange", "mediancaloriesoutofrange", "stdcaloriesoutofrange", "entropycaloriesoutofrange", "sumcaloriesfatburn", "maxcaloriesfatburn", "mincaloriesfatburn", "avgcaloriesfatburn", "mediancaloriesfatburn", "stdcaloriesfatburn", "entropycaloriesfatburn", "sumcaloriescardio", "maxcaloriescardio", "mincaloriescardio", "avgcaloriescardio", "mediancaloriescardio", "stdcaloriescardio", "entropycaloriescardio", "sumcaloriespeak", "maxcaloriespeak", "mincaloriespeak", "avgcaloriespeak", "mediancaloriespeak", "stdcaloriespeak", "entropycaloriespeak"]
|
SRC_FOLDER: "rapids" # inside src/features/fitbit_heartrate_summary
|
||||||
INTRADAY: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
|
SRC_LANGUAGE: "python"
|
||||||
SRC_FOLDER: "rapids" # inside src/features/fitbit_heartrate
|
|
||||||
|
FITBIT_HEARTRATE_INTRADAY:
|
||||||
|
TABLE: heartrate_intraday
|
||||||
|
PROVIDERS:
|
||||||
|
RAPIDS:
|
||||||
|
COMPUTE: False
|
||||||
|
FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
|
||||||
|
SRC_FOLDER: "rapids" # inside src/features/fitbit_heartrate_intraday
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
FITBIT_STEPS:
|
FITBIT_STEPS:
|
||||||
|
|
|
@ -372,29 +372,55 @@ rule phone_wifi_visible_r_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.R"
|
"../src/features/entry.R"
|
||||||
|
|
||||||
rule fitbit_heartrate_python_features:
|
rule fitbit_heartrate_summary_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = expand("data/raw/{{pid}}/fitbit_heartrate_{fitbit_data_type}_parsed_with_datetime.csv", fitbit_data_type=["summary", "intraday"]),
|
sensor_data = "data/raw/{pid}/fitbit_heartrate_summary_parsed_with_datetime.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["FITBIT_HEARTRATE"]["PROVIDERS"][wildcards.provider_key.upper()],
|
provider = lambda wildcards: config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "fitbit_heartrate"
|
sensor_key = "fitbit_heartrate_summary"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/fitbit_heartrate_features/fitbit_heartrate_python_{provider_key}.csv"
|
"data/interim/{pid}/fitbit_heartrate_summary_features/fitbit_heartrate_summary_python_{provider_key}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
rule fitbit_heartrate_r_features:
|
rule fitbit_heartrate_summary_r_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = expand("data/raw/{{pid}}/fitbit_heartrate_{fitbit_data_type}_parsed_with_datetime.csv", fitbit_data_type=["summary", "intraday"]),
|
sensor_data = "data/raw/{pid}/fitbit_heartrate_summary_parsed_with_datetime.csv",
|
||||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
provider = lambda wildcards: config["FITBIT_HEARTRATE"]["PROVIDERS"][wildcards.provider_key.upper()],
|
provider = lambda wildcards: config["FITBIT_HEARTRATE_SUMMARY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
provider_key = "{provider_key}",
|
provider_key = "{provider_key}",
|
||||||
sensor_key = "fitbit_heartrate"
|
sensor_key = "fitbit_heartrate_summary"
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/fitbit_heartrate_features/fitbit_heartrate_r_{provider_key}.csv"
|
"data/interim/{pid}/fitbit_heartrate_summary_features/fitbit_heartrate_summary_r_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/entry.R"
|
||||||
|
|
||||||
|
rule fitbit_heartrate_intraday_python_features:
|
||||||
|
input:
|
||||||
|
sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv",
|
||||||
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
provider_key = "{provider_key}",
|
||||||
|
sensor_key = "fitbit_heartrate_intraday"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_python_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/entry.py"
|
||||||
|
|
||||||
|
rule fitbit_heartrate_intraday_r_features:
|
||||||
|
input:
|
||||||
|
sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv",
|
||||||
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["FITBIT_HEARTRATE_INTRADAY"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
provider_key = "{provider_key}",
|
||||||
|
sensor_key = "fitbit_heartrate_intraday"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_r_{provider_key}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.R"
|
"../src/features/entry.R"
|
||||||
|
|
||||||
|
@ -424,19 +450,6 @@ rule fitbit_steps_r_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.R"
|
"../src/features/entry.R"
|
||||||
|
|
||||||
# rule fitbit_heartrate_features:
|
|
||||||
# input:
|
|
||||||
# heartrate_summary_data = "data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv",
|
|
||||||
# heartrate_intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv"
|
|
||||||
# params:
|
|
||||||
# day_segment = "{day_segment}",
|
|
||||||
# summary_features = config["HEARTRATE"]["SUMMARY_FEATURES"],
|
|
||||||
# intraday_features = config["HEARTRATE"]["INTRADAY_FEATURES"]
|
|
||||||
# output:
|
|
||||||
# "data/processed/{pid}/fitbit_heartrate_{day_segment}.csv"
|
|
||||||
# script:
|
|
||||||
# "../src/features/fitbit_heartrate_features.py"
|
|
||||||
|
|
||||||
# rule fitbit_step_features:
|
# rule fitbit_step_features:
|
||||||
# input:
|
# input:
|
||||||
# step_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv",
|
# step_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv",
|
||||||
|
|
|
@ -40,14 +40,13 @@ rule download_phone_data:
|
||||||
rule download_fitbit_data:
|
rule download_fitbit_data:
|
||||||
input:
|
input:
|
||||||
participant_file = "data/external/participant_files/{pid}.yaml",
|
participant_file = "data/external/participant_files/{pid}.yaml",
|
||||||
input_file = [] if config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["TYPE"] == "DATABASE" else lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"]["CSV"][str(wildcards.fitbit_data_type).upper()]
|
input_file = [] if config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["TYPE"] == "DATABASE" else lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"]
|
||||||
params:
|
params:
|
||||||
source = config["DEVICE_DATA"]["FITBIT"]["SOURCE"],
|
source = config["DEVICE_DATA"]["FITBIT"]["SOURCE"],
|
||||||
sensor = "fitbit_" + "{sensor}",
|
sensor = "fitbit_" + "{sensor}",
|
||||||
fitbit_data_type = "{fitbit_data_type}",
|
|
||||||
table = lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"],
|
table = lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"],
|
||||||
output:
|
output:
|
||||||
"data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_raw.csv"
|
"data/raw/{pid}/fitbit_{sensor}_raw.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/download_fitbit_data.R"
|
"../src/data/download_fitbit_data.R"
|
||||||
|
|
||||||
|
@ -183,14 +182,14 @@ rule phone_application_categories:
|
||||||
|
|
||||||
rule fitbit_parse_heartrate:
|
rule fitbit_parse_heartrate:
|
||||||
input:
|
input:
|
||||||
data = expand("data/raw/{{pid}}/fitbit_heartrate_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
|
"data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_raw.csv"
|
||||||
params:
|
params:
|
||||||
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
|
timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
|
||||||
table = config["FITBIT_HEARTRATE"]["TABLE"],
|
table = config["FITBIT_HEARTRATE"]["TABLE"],
|
||||||
table_format = config["FITBIT_HEARTRATE"]["TABLE_FORMAT"]
|
column_format = config["DEVICE_DATA"]["FITBIT"]["SOURCE"]["COLUMN_FORMAT"],
|
||||||
|
fitbit_data_type = "{fitbit_data_type}"
|
||||||
output:
|
output:
|
||||||
summary_data = "data/raw/{pid}/fitbit_heartrate_summary_parsed.csv",
|
"data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_parsed.csv"
|
||||||
intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv"
|
|
||||||
script:
|
script:
|
||||||
"../src/data/fitbit_parse_heartrate.py"
|
"../src/data/fitbit_parse_heartrate.py"
|
||||||
|
|
||||||
|
|
|
@ -96,7 +96,7 @@ def parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date,
|
||||||
# def append_timestamp(data):
|
# def append_timestamp(data):
|
||||||
|
|
||||||
|
|
||||||
def parseHeartrateData(heartrate_data):
|
def parseHeartrateData(heartrate_data, fitbit_data_type):
|
||||||
if heartrate_data.empty:
|
if heartrate_data.empty:
|
||||||
return pd.DataFrame(columns=HR_SUMMARY_COLUMNS), pd.DataFrame(columns=HR_INTRADAY_COLUMNS)
|
return pd.DataFrame(columns=HR_SUMMARY_COLUMNS), pd.DataFrame(columns=HR_INTRADAY_COLUMNS)
|
||||||
device_id = heartrate_data["device_id"].iloc[0]
|
device_id = heartrate_data["device_id"].iloc[0]
|
||||||
|
@ -109,29 +109,36 @@ def parseHeartrateData(heartrate_data):
|
||||||
record = json.loads(record) # Parse text into JSON
|
record = json.loads(record) # Parse text into JSON
|
||||||
curr_date = datetime.strptime(record["activities-heart"][0]["dateTime"], "%Y-%m-%d")
|
curr_date = datetime.strptime(record["activities-heart"][0]["dateTime"], "%Y-%m-%d")
|
||||||
|
|
||||||
record_summary = record["activities-heart"][0]
|
if fitbit_data_type == "summary":
|
||||||
row_summary = parseHeartrateSummaryData(record_summary, device_id, curr_date)
|
record_summary = record["activities-heart"][0]
|
||||||
records_summary.append(row_summary)
|
row_summary = parseHeartrateSummaryData(record_summary, device_id, curr_date)
|
||||||
|
records_summary.append(row_summary)
|
||||||
|
|
||||||
dataset = record["activities-heart-intraday"]["dataset"]
|
if fitbit_data_type == "intraday":
|
||||||
records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range)
|
dataset = record["activities-heart-intraday"]["dataset"]
|
||||||
|
records_intraday = parseHeartrateIntradayData(records_intraday, dataset, device_id, curr_date, heartrate_zones_range)
|
||||||
|
|
||||||
|
if fitbit_data_type == "summary":
|
||||||
|
parsed_data = pd.DataFrame(data=records_summary, columns=HR_SUMMARY_COLUMNS)
|
||||||
|
elif fitbit_data_type == "intraday":
|
||||||
|
parsed_data = pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS)
|
||||||
|
else:
|
||||||
|
raise ValueError("fitbit_data_type can only be one of ['summary', 'intraday'].")
|
||||||
|
return parsed_data
|
||||||
|
|
||||||
|
|
||||||
return pd.DataFrame(data=records_summary, columns=HR_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS)
|
|
||||||
|
|
||||||
table_format = snakemake.params["table_format"]
|
|
||||||
timezone = snakemake.params["timezone"]
|
timezone = snakemake.params["timezone"]
|
||||||
|
column_format = snakemake.params["column_format"]
|
||||||
|
fitbit_data_type = snakemake.params["fitbit_data_type"]
|
||||||
|
|
||||||
if table_format == "JSON":
|
if column_format == "JSON":
|
||||||
json_raw = pd.read_csv(snakemake.input[0])
|
json_raw = pd.read_csv(snakemake.input[0])
|
||||||
summary, intraday = parseHeartrateData(json_raw)
|
parsed_data = parseHeartrateData(json_raw, fitbit_data_type)
|
||||||
elif table_format == "CSV":
|
elif column_format == "PLAIN_TEXT":
|
||||||
summary = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
parsed_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
||||||
intraday = pd.read_csv(snakemake.input[1], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
|
|
||||||
|
|
||||||
if summary.shape[0] > 0:
|
if parsed_data.shape[0] > 0:
|
||||||
summary["timestamp"] = summary["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
|
parsed_data["timestamp"] = parsed_data["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
|
||||||
if intraday.shape[0] > 0:
|
|
||||||
intraday["timestamp"] = intraday["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
|
|
||||||
|
|
||||||
summary.to_csv(snakemake.output["summary_data"], index=False)
|
parsed_data.to_csv(snakemake.output[0], index=False)
|
||||||
intraday.to_csv(snakemake.output["intraday_data"], index=False)
|
|
|
@ -0,0 +1,79 @@
|
||||||
|
import pandas as pd
|
||||||
|
from scipy.stats import entropy
|
||||||
|
|
||||||
|
def statsFeatures(heartrate_data, features, features_type, heartrate_features):
|
||||||
|
|
||||||
|
if features_type == "hr":
|
||||||
|
col_name = "heartrate"
|
||||||
|
elif features_type == "restinghr":
|
||||||
|
col_name = "heartrate_daily_restinghr"
|
||||||
|
elif features_type == "caloriesoutofrange":
|
||||||
|
col_name = "heartrate_daily_caloriesoutofrange"
|
||||||
|
elif features_type == "caloriesfatburn":
|
||||||
|
col_name = "heartrate_daily_caloriesfatburn"
|
||||||
|
elif features_type == "caloriescardio":
|
||||||
|
col_name = "heartrate_daily_caloriescardio"
|
||||||
|
elif features_type == "caloriespeak":
|
||||||
|
col_name = "heartrate_daily_caloriespeak"
|
||||||
|
else:
|
||||||
|
raise ValueError("features_type can only be one of ['hr', 'restinghr', 'caloriesoutofrange', 'caloriesfatburn', 'caloriescardio', 'caloriespeak'].")
|
||||||
|
|
||||||
|
if "sum" + features_type in features:
|
||||||
|
heartrate_features["heartrate_rapids_sum" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].sum()
|
||||||
|
if "max" + features_type in features:
|
||||||
|
heartrate_features["heartrate_rapids_max" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].max()
|
||||||
|
if "min" + features_type in features:
|
||||||
|
heartrate_features["heartrate_rapids_min" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].min()
|
||||||
|
if "avg" + features_type in features:
|
||||||
|
heartrate_features["heartrate_rapids_avg" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].mean()
|
||||||
|
if "median" + features_type in features:
|
||||||
|
heartrate_features["heartrate_rapids_median" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].median()
|
||||||
|
if "mode" + features_type in features:
|
||||||
|
heartrate_features["heartrate_rapids_mode" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||||
|
if "std" + features_type in features:
|
||||||
|
heartrate_features["heartrate_rapids_std" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].std()
|
||||||
|
if "diffmaxmode" + features_type in features:
|
||||||
|
heartrate_features["heartrate_rapids_diffmaxmode" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].max() - heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0])
|
||||||
|
if "diffminmode" + features_type in features:
|
||||||
|
heartrate_features["heartrate_rapids_diffminmode" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(lambda x: pd.Series.mode(x)[0]) - heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].min()
|
||||||
|
if "entropy" + features_type in features:
|
||||||
|
heartrate_features["heartrate_rapids_entropy" + features_type] = heartrate_data[["local_segment", col_name]].groupby(["local_segment"])[col_name].agg(entropy)
|
||||||
|
|
||||||
|
return heartrate_features
|
||||||
|
|
||||||
|
def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, day_segment, filter_data_by_segment):
|
||||||
|
heartrate_intraday_features = pd.DataFrame(columns=["local_segment"] + ["heartrate_rapids_" + x for x in features])
|
||||||
|
if not heartrate_intraday_data.empty:
|
||||||
|
num_rows_per_minute = heartrate_intraday_data.groupby(["local_date", "local_hour", "local_minute"]).count().mean()["device_id"]
|
||||||
|
heartrate_intraday_data = filter_data_by_segment(heartrate_intraday_data, day_segment)
|
||||||
|
|
||||||
|
if not heartrate_intraday_data.empty:
|
||||||
|
heartrate_intraday_features = pd.DataFrame()
|
||||||
|
|
||||||
|
# get stats of heartrate
|
||||||
|
heartrate_intraday_features = statsFeatures(heartrate_intraday_data, features, "hr", heartrate_intraday_features)
|
||||||
|
|
||||||
|
# get number of minutes in each heart rate zone
|
||||||
|
for feature_name in list(set(["minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]) & set(features)):
|
||||||
|
heartrate_zone = heartrate_intraday_data[heartrate_intraday_data["heartrate_zone"] == feature_name[9:-4]]
|
||||||
|
heartrate_intraday_features["heartrate_rapids_" + feature_name] = heartrate_zone.groupby(["local_segment"])["device_id"].count() / num_rows_per_minute
|
||||||
|
heartrate_intraday_features.fillna(value={"heartrate_rapids_" + feature_name: 0}, inplace=True)
|
||||||
|
heartrate_intraday_features.reset_index(inplace=True)
|
||||||
|
|
||||||
|
return heartrate_intraday_features
|
||||||
|
|
||||||
|
|
||||||
|
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||||
|
|
||||||
|
heartrate_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||||
|
|
||||||
|
requested_intraday_features = provider["FEATURES"]
|
||||||
|
# name of the features this function can compute
|
||||||
|
base_intraday_features_names = ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
|
||||||
|
# the subset of requested features this function can compute
|
||||||
|
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
|
||||||
|
|
||||||
|
# extract features from intraday data
|
||||||
|
heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data, intraday_features_to_compute, day_segment, filter_data_by_segment)
|
||||||
|
|
||||||
|
return heartrate_intraday_features
|
|
@ -58,41 +58,16 @@ def extractHRFeaturesFromSummaryData(heartrate_summary_data, summary_features):
|
||||||
|
|
||||||
return heartrate_summary_features
|
return heartrate_summary_features
|
||||||
|
|
||||||
def extractHRFeaturesFromIntradayData(heartrate_intraday_data, features, day_segment, filter_data_by_segment):
|
|
||||||
heartrate_intraday_features = pd.DataFrame(columns=["local_segment"] + ["heartrate_rapids_" + x for x in features])
|
|
||||||
if not heartrate_intraday_data.empty:
|
|
||||||
num_rows_per_minute = heartrate_intraday_data.groupby(["local_date", "local_hour", "local_minute"]).count().mean()["device_id"]
|
|
||||||
heartrate_intraday_data = filter_data_by_segment(heartrate_intraday_data, day_segment)
|
|
||||||
|
|
||||||
if not heartrate_intraday_data.empty:
|
|
||||||
heartrate_intraday_features = pd.DataFrame()
|
|
||||||
|
|
||||||
# get stats of heartrate
|
|
||||||
heartrate_intraday_features = statsFeatures(heartrate_intraday_data, features, "hr", heartrate_intraday_features)
|
|
||||||
|
|
||||||
# get number of minutes in each heart rate zone
|
|
||||||
for feature_name in list(set(["minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]) & set(features)):
|
|
||||||
heartrate_zone = heartrate_intraday_data[heartrate_intraday_data["heartrate_zone"] == feature_name[9:-4]]
|
|
||||||
heartrate_intraday_features["heartrate_rapids_" + feature_name] = heartrate_zone.groupby(["local_segment"])["device_id"].count() / num_rows_per_minute
|
|
||||||
heartrate_intraday_features.fillna(value={"heartrate_rapids_" + feature_name: 0}, inplace=True)
|
|
||||||
heartrate_intraday_features.reset_index(inplace=True)
|
|
||||||
|
|
||||||
return heartrate_intraday_features
|
|
||||||
|
|
||||||
|
|
||||||
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||||
|
|
||||||
heartrate_summary_data = pd.read_csv(sensor_data_files["sensor_data"][0])
|
heartrate_summary_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||||
heartrate_intraday_data = pd.read_csv(sensor_data_files["sensor_data"][1])
|
|
||||||
|
|
||||||
requested_summary_features = provider["FEATURES"]["SUMMARY"]
|
requested_summary_features = provider["FEATURES"]
|
||||||
requested_intraday_features = provider["FEATURES"]["INTRADAY"]
|
|
||||||
# name of the features this function can compute
|
# name of the features this function can compute
|
||||||
base_summary_features_names = ["maxrestinghr", "minrestinghr", "avgrestinghr", "medianrestinghr", "moderestinghr", "stdrestinghr", "diffmaxmoderestinghr", "diffminmoderestinghr", "entropyrestinghr", "sumcaloriesoutofrange", "maxcaloriesoutofrange", "mincaloriesoutofrange", "avgcaloriesoutofrange", "mediancaloriesoutofrange", "stdcaloriesoutofrange", "entropycaloriesoutofrange", "sumcaloriesfatburn", "maxcaloriesfatburn", "mincaloriesfatburn", "avgcaloriesfatburn", "mediancaloriesfatburn", "stdcaloriesfatburn", "entropycaloriesfatburn", "sumcaloriescardio", "maxcaloriescardio", "mincaloriescardio", "avgcaloriescardio", "mediancaloriescardio", "stdcaloriescardio", "entropycaloriescardio", "sumcaloriespeak", "maxcaloriespeak", "mincaloriespeak", "avgcaloriespeak", "mediancaloriespeak", "stdcaloriespeak", "entropycaloriespeak"]
|
base_summary_features_names = ["maxrestinghr", "minrestinghr", "avgrestinghr", "medianrestinghr", "moderestinghr", "stdrestinghr", "diffmaxmoderestinghr", "diffminmoderestinghr", "entropyrestinghr", "sumcaloriesoutofrange", "maxcaloriesoutofrange", "mincaloriesoutofrange", "avgcaloriesoutofrange", "mediancaloriesoutofrange", "stdcaloriesoutofrange", "entropycaloriesoutofrange", "sumcaloriesfatburn", "maxcaloriesfatburn", "mincaloriesfatburn", "avgcaloriesfatburn", "mediancaloriesfatburn", "stdcaloriesfatburn", "entropycaloriesfatburn", "sumcaloriescardio", "maxcaloriescardio", "mincaloriescardio", "avgcaloriescardio", "mediancaloriescardio", "stdcaloriescardio", "entropycaloriescardio", "sumcaloriespeak", "maxcaloriespeak", "mincaloriespeak", "avgcaloriespeak", "mediancaloriespeak", "stdcaloriespeak", "entropycaloriespeak"]
|
||||||
base_intraday_features_names = ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
|
|
||||||
# the subset of requested features this function can compute
|
# the subset of requested features this function can compute
|
||||||
summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features_names))
|
summary_features_to_compute = list(set(requested_summary_features) & set(base_summary_features_names))
|
||||||
intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names))
|
|
||||||
|
|
||||||
# extract features from summary data
|
# extract features from summary data
|
||||||
heartrate_summary_features = pd.DataFrame(columns=["local_segment"] + ["heartrate_rapids_" + x for x in summary_features_to_compute])
|
heartrate_summary_features = pd.DataFrame(columns=["local_segment"] + ["heartrate_rapids_" + x for x in summary_features_to_compute])
|
||||||
|
@ -110,10 +85,4 @@ def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_seg
|
||||||
if not heartrate_summary_data.empty:
|
if not heartrate_summary_data.empty:
|
||||||
heartrate_summary_features = extractHRFeaturesFromSummaryData(heartrate_summary_data, summary_features_to_compute)
|
heartrate_summary_features = extractHRFeaturesFromSummaryData(heartrate_summary_data, summary_features_to_compute)
|
||||||
|
|
||||||
# extract features from intraday data
|
return heartrate_summary_features
|
||||||
heartrate_intraday_features = extractHRFeaturesFromIntradayData(heartrate_intraday_data, intraday_features_to_compute, day_segment, filter_data_by_segment)
|
|
||||||
|
|
||||||
# merge summary features and intraday features
|
|
||||||
heartrate_features = heartrate_intraday_features.merge(heartrate_summary_features, on=["local_segment"], how="outer")
|
|
||||||
|
|
||||||
return heartrate_features
|
|
Loading…
Reference in New Issue