Add tz support to fitbit

pull/103/head
JulioV 2020-10-22 14:38:40 -04:00
parent cff83a7ceb
commit 47adb8bc4b
6 changed files with 39 additions and 17 deletions

View File

@ -42,6 +42,9 @@ SENSOR_DATA:
TYPE: DATABASE # DATABASE or CSV_FILES (set each FITBIT_SENSOR TABLE attribute accordingly)
DATABASE_GROUP: *database_group
DEVICE_ID_COLUMN: device_id # column name
TIMEZONE:
TYPE: SINGLE # Fitbit only supports SINGLE timezones
VALUE: *timezone # timezone code (e.g. America/New_York, see attribute TIMEZONE above and https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
PHONE_VALID_SENSED_BINS:
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
@ -76,7 +79,7 @@ PHONE_CALLS:
TABLE: calls
PROVIDERS:
RAPIDS:
COMPUTE: True
COMPUTE: False
CALL_TYPES: [missed, incoming, outgoing]
FEATURES:
missed: [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact]
@ -128,7 +131,7 @@ PHONE_ACTIVITY_RECOGNITION:
IOS: plugin_ios_activity_recognition
PROVIDERS:
RAPIDS:
COMPUTE: True
COMPUTE: False
FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"]
ACTIVITY_CLASSES:
STATIONARY: ["still", "tilting"]
@ -231,7 +234,7 @@ PHONE_CONVERSATION:
IOS: plugin_studentlife_audio
PROVIDERS:
RAPIDS:
COMPUTE: True
COMPUTE: False
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",

View File

@ -31,7 +31,7 @@ rule download_phone_data:
source = config["SENSOR_DATA"]["PHONE"]["SOURCE"],
sensor = "phone_" + "{sensor}",
table = lambda wildcards: config["PHONE_" + str(wildcards.sensor).upper()]["TABLE"],
timezone = config["TIMEZONE"],
timezone = config["SENSOR_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
aware_multiplatform_tables = config["PHONE_ACTIVITY_RECOGNITION"]["TABLE"]["ANDROID"] + "," + config["PHONE_ACTIVITY_RECOGNITION"]["TABLE"]["IOS"] + "," + config["PHONE_CONVERSATION"]["TABLE"]["ANDROID"] + "," + config["PHONE_CONVERSATION"]["TABLE"]["IOS"],
output:
"data/raw/{pid}/phone_{sensor}_raw.csv"
@ -44,7 +44,7 @@ rule download_fitbit_data:
params:
source = config["SENSOR_DATA"]["FITBIT"]["SOURCE"],
sensor = "fitbit_" + "{sensor}",
type = "{fitbit_data_type}",
fitbit_data_type = "{fitbit_data_type}",
table = lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"],
output:
"data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_raw.csv"
@ -184,6 +184,7 @@ rule fitbit_parse_heartrate:
input:
data = expand("data/raw/{{pid}}/fitbit_heartrate_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_HEARTRATE"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
params:
timezone = config["SENSOR_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
table = config["FITBIT_HEARTRATE"]["TABLE"],
table_format = config["FITBIT_HEARTRATE"]["TABLE_FORMAT"]
output:
@ -196,6 +197,7 @@ rule fitbit_parse_steps:
input:
data = expand("data/raw/{{pid}}/fitbit_steps_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_STEPS"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
params:
timezone = config["SENSOR_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
table = config["FITBIT_STEPS"]["TABLE"],
table_format = config["FITBIT_STEPS"]["TABLE_FORMAT"]
output:
@ -208,6 +210,7 @@ rule fitbit_parse_calories:
input:
data = expand("data/raw/{{pid}}/fitbit_calories_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
params:
timezone = config["SENSOR_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
table = config["FITBIT_CALORIES"]["TABLE"],
table_format = config["FITBIT_CALORIES"]["TABLE_FORMAT"]
output:
@ -220,6 +223,7 @@ rule fitbit_parse_sleep:
input:
data = expand("data/raw/{{pid}}/fitbit_sleep_{fitbit_data_type}_raw.csv", fitbit_data_type = (["json"] if config["FITBIT_SLEEP"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"]))
params:
timezone = config["SENSOR_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
table = config["FITBIT_SLEEP"]["TABLE"],
table_format = config["FITBIT_SLEEP"]["TABLE_FORMAT"]
output:
@ -233,7 +237,7 @@ rule fitbit_readable_datetime:
sensor_input = "data/raw/{pid}/fitbit_{sensor}_{fitbit_data_type}_parsed.csv",
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
params:
fixed_timezone = "UTC",
fixed_timezone = config["SENSOR_DATA"]["FITBIT"]["TIMEZONE"]["VALUE"],
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
output:

View File

@ -1,4 +1,5 @@
import json
import numpy as np
import pandas as pd
from datetime import datetime
@ -28,9 +29,10 @@ def parseCaloriesData(calories_data):
records_intraday.append(row_intraday)
return pd.DataFrame(data=[], columns=["local_date_time"]), pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS)
return pd.DataFrame(data=[], columns=["local_date_time", "timestamp"]), pd.DataFrame(data=records_intraday, columns=CALORIES_INTRADAY_COLUMNS)
table_format = snakemake.params["table_format"]
timezone = snakemake.params["timezone"]
if table_format == "JSON":
json_raw = pd.read_csv(snakemake.input[0])
@ -39,8 +41,10 @@ elif table_format == "CSV":
summary = pd.read_csv(snakemake.input[0])
intraday = pd.read_csv(snakemake.input[1])
summary["timestamp"] = (summary["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
if summary.shape[0] > 0:
summary["timestamp"] = summary["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
if intraday.shape[0] > 0:
intraday["timestamp"] = intraday["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
summary.to_csv(snakemake.output["summary_data"], index=False)
intraday.to_csv(snakemake.output["intraday_data"], index=False)

View File

@ -1,5 +1,6 @@
import json, sys
import pandas as pd
import numpy as np
from datetime import datetime, timezone
from math import trunc
@ -118,6 +119,7 @@ def parseHeartrateData(heartrate_data):
return pd.DataFrame(data=records_summary, columns=HR_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=HR_INTRADAY_COLUMNS)
table_format = snakemake.params["table_format"]
timezone = snakemake.params["timezone"]
if table_format == "JSON":
json_raw = pd.read_csv(snakemake.input[0])
@ -126,8 +128,10 @@ elif table_format == "CSV":
summary = pd.read_csv(snakemake.input[0])
intraday = pd.read_csv(snakemake.input[1])
summary["timestamp"] = (summary["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
if summary.shape[0] > 0:
summary["timestamp"] = summary["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
if intraday.shape[0] > 0:
intraday["timestamp"] = intraday["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
summary.to_csv(snakemake.output["summary_data"], index=False)
intraday.to_csv(snakemake.output["intraday_data"], index=False)

View File

@ -206,6 +206,7 @@ def parseSleepData(sleep_data):
return pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS), pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
table_format = snakemake.params["table_format"]
timezone = snakemake.params["timezone"]
if table_format == "JSON":
json_raw = pd.read_csv(snakemake.input[0])
@ -214,9 +215,11 @@ elif table_format == "CSV":
summary = pd.read_csv(snakemake.input[0])
intraday = pd.read_csv(snakemake.input[1])
summary["start_timestamp"] = (summary["local_start_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
summary["end_timestamp"] = (summary["local_end_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
if summary.shape[0] > 0:
summary["start_timestamp"] = summary["local_start_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
summary["end_timestamp"] = summary["local_end_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
if intraday.shape[0] > 0:
intraday["timestamp"] = intraday["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
# Unifying level
intraday["unified_level"] = np.where(intraday["level"].isin(["awake", "wake", "restless"]), 0, 1)

View File

@ -1,5 +1,6 @@
import json
import pandas as pd
import numpy as np
from datetime import datetime, timezone
from math import trunc
@ -31,9 +32,10 @@ def parseStepsData(steps_data):
records_intraday.append(row_intraday)
return pd.DataFrame(data=[], columns=["local_date_time"]), pd.DataFrame(data=records_intraday, columns=STEPS_INTRADAY_COLUMNS)
return pd.DataFrame(data=[], columns=["local_date_time", "timestamp"]), pd.DataFrame(data=records_intraday, columns=STEPS_INTRADAY_COLUMNS)
table_format = snakemake.params["table_format"]
timezone = snakemake.params["timezone"]
if table_format == "JSON":
json_raw = pd.read_csv(snakemake.input[0])
@ -42,8 +44,10 @@ elif table_format == "CSV":
summary = pd.read_csv(snakemake.input[0])
intraday = pd.read_csv(snakemake.input[1])
summary["timestamp"] = (summary["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
intraday["timestamp"] = (intraday["local_date_time"] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') * 1000
if summary.shape[0] > 0:
summary["timestamp"] = summary["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
if intraday.shape[0] > 0:
intraday["timestamp"] = intraday["local_date_time"].dt.tz_localize(timezone).astype(np.int64) // 10**6
summary.to_csv(snakemake.output["summary_data"], index=False)
intraday.to_csv(snakemake.output["intraday_data"], index=False)