Update participant files structure and fitbit download rule
parent
c835987d52
commit
c266f6dd10
51
Snakefile
51
Snakefile
|
@ -36,9 +36,9 @@ for provider in config["PHONE_MESSAGES"]["PROVIDERS"].keys():
|
||||||
|
|
||||||
for provider in config["PHONE_CALLS"]["PROVIDERS"].keys():
|
for provider in config["PHONE_CALLS"]["PROVIDERS"].keys():
|
||||||
if config["PHONE_CALLS"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_CALLS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_raw.csv", pid=config["PIDS"], sensor=config["PHONE_CALLS"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime.csv", pid=config["PIDS"], sensor=config["PHONE_CALLS"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["PHONE_CALLS"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime_unified.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_calls_features/phone_calls_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CALLS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_calls_features/phone_calls_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CALLS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_calls.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_calls.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
|
@ -122,23 +122,6 @@ for provider in config["PHONE_WIFI_CONNECTED"]["PROVIDERS"].keys():
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_wifi_connected_features/phone_wifi_connected_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_WIFI_CONNECTED"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_wifi_connected_features/phone_wifi_connected_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_WIFI_CONNECTED"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_wifi_connected.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_wifi_connected.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
if config["HEARTRATE"]["COMPUTE"]:
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["HEARTRATE"]["DB_TABLE"]))
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv", pid = config["PIDS"], day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]))
|
|
||||||
|
|
||||||
if config["STEP"]["COMPUTE"]:
|
|
||||||
if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED":
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary"]))
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["STEP"]["DB_TABLE"]))
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_step_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
|
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"]))
|
|
||||||
|
|
||||||
if config["SLEEP"]["COMPUTE"]:
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SLEEP"]["DB_TABLE"]))
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"]))
|
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"]))
|
|
||||||
|
|
||||||
for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
|
for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
|
||||||
if config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_raw.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_raw.csv", pid=config["PIDS"]))
|
||||||
|
@ -150,10 +133,10 @@ for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
|
||||||
for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
||||||
if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED":
|
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED":
|
||||||
if config["PHONE_LOCATIONS"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]:
|
if "PHONE_LOCATIONS" in config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]:
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||||
else:
|
else:
|
||||||
raise ValueError("Error: Add your locations table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][DB_TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
|
raise ValueError("Error: Add PHONE_LOCATIONS (and as many PHONE_SENSORS as you have) to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
|
||||||
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"]))
|
||||||
|
@ -161,6 +144,30 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"]))
|
||||||
|
|
||||||
|
|
||||||
|
for provider in config["FITBIT_HEARTRATE"]["PROVIDERS"].keys():
|
||||||
|
if config["FITBIT_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_raw.csv", pid=config["PIDS"]))
|
||||||
|
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||||
|
# files_to_compute.extend(expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv", pid = config["PIDS"], day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]))
|
||||||
|
|
||||||
|
for provider in config["FITBIT_STEPS"]["PROVIDERS"].keys():
|
||||||
|
if config["FITBIT_STEPS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_steps_raw.csv", pid=config["PIDS"]))
|
||||||
|
# if config["STEP"]["COMPUTE"]:
|
||||||
|
# if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED":
|
||||||
|
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary"]))
|
||||||
|
# files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["STEP"]["TABLE"]))
|
||||||
|
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_step_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
|
||||||
|
# files_to_compute.extend(expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"]))
|
||||||
|
|
||||||
|
for provider in config["FITBIT_SLEEP"]["PROVIDERS"].keys():
|
||||||
|
if config["FITBIT_SLEEP"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_raw.csv", pid=config["PIDS"]))
|
||||||
|
# files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"]))
|
||||||
|
# files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"]))
|
||||||
|
|
||||||
|
|
||||||
# visualization for data exploration
|
# visualization for data exploration
|
||||||
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
||||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||||
|
|
125
config.yaml
125
config.yaml
|
@ -8,33 +8,46 @@ DAY_SEGMENTS: &day_segments
|
||||||
FILE: "data/external/daysegments_periodic.csv"
|
FILE: "data/external/daysegments_periodic.csv"
|
||||||
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, if set to TRUE we consider day segments back enough in the past as to include the first day of data
|
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, if set to TRUE we consider day segments back enough in the past as to include the first day of data
|
||||||
|
|
||||||
# Global timezone
|
# Use tz codes from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones. Double check your code, for example EST is not US Eastern Time.
|
||||||
# Use codes from https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
|
|
||||||
# Double check your code, for example EST is not US Eastern Time.
|
|
||||||
TIMEZONE: &timezone
|
TIMEZONE: &timezone
|
||||||
America/New_York
|
America/New_York
|
||||||
|
|
||||||
DATABASE_GROUP: &database_group
|
DATABASE_GROUP: &database_group
|
||||||
MY_GROUP
|
MY_GROUP
|
||||||
|
|
||||||
DOWNLOAD_PARTICIPANTS:
|
PARTICIPANT_FILES: # run snakemake -j1 -R parse_participant_files
|
||||||
IGNORED_DEVICE_IDS: [] # for example "5a1dd68c-6cd1-48fe-ae1e-14344ac5215f"
|
PHONE_SECTION:
|
||||||
GROUP: *database_group
|
INCLUDE: TRUE
|
||||||
|
PARSED_FROM: AWARE_DEVICE_TABLE #AWARE_DEVICE_TABLE or CSV_FILE
|
||||||
|
PARSED_SOURCE: *database_group # DB credentials group or CSV file path. If CSV file, it should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional)
|
||||||
|
IGNORED_DEVICE_IDS: []
|
||||||
|
FITBIT_SECTION:
|
||||||
|
INCLUDE: FALSE
|
||||||
|
SAME_AS_PHONE: FALSE # If TRUE, all config below is ignored
|
||||||
|
PARSED_FROM: CSV_FILE
|
||||||
|
PARSED_SOURCE: "external/my_fitbit_participants.csv" # CSV file should have: device_id, pid (optional), label (optional), start_date (optional), end_date (optional)
|
||||||
|
|
||||||
# Download data config
|
SENSOR_DATA:
|
||||||
DOWNLOAD_DATASET:
|
PHONE:
|
||||||
GROUP: *database_group
|
SOURCE:
|
||||||
|
TYPE: DATABASE
|
||||||
# Readable datetime config
|
DATABASE_GROUP: *database_group
|
||||||
READABLE_DATETIME:
|
DEVICE_ID_COLUMN: device_id # column name
|
||||||
FIXED_TIMEZONE: *timezone
|
TIMEZONE:
|
||||||
|
TYPE: SINGLE # SINGLE or MULTIPLE
|
||||||
|
VALUE: *timezone # IF TYPE=SINGLE, timezone code (e.g. America/New_York, see attribute TIMEZONE above). If TYPE=MULTIPLE, a table in your database with two columns (timestamp, timezone) where timestamp is a unix timestamp and timezone is one of https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
|
||||||
|
FITBIT:
|
||||||
|
SOURCE:
|
||||||
|
TYPE: DATABASE # DATABASE or CSV_FILES (set each FITBIT_SENSOR TABLE attribute accordingly)
|
||||||
|
DATABASE_GROUP: *database_group
|
||||||
|
DEVICE_ID_COLUMN: device_id # column name
|
||||||
|
|
||||||
PHONE_VALID_SENSED_BINS:
|
PHONE_VALID_SENSED_BINS:
|
||||||
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
||||||
BIN_SIZE: &bin_size 5 # (in minutes)
|
BIN_SIZE: &bin_size 5 # (in minutes)
|
||||||
# Add as many PHONE sensors as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
|
# Add as many PHONE sensors as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
|
||||||
# If you are extracting screen or Barnett/Doryab location features, PHONE_SCREEN and PHONE_LOCATIONS tables are mandatory.
|
# If you are extracting screen or Barnett/Doryab location features, PHONE_SCREEN and PHONE_LOCATIONS tables are mandatory.
|
||||||
# You can choose any of the keys shown below, just make sure its DB_TABLE exists in your database!
|
# You can choose any of the keys shown below, just make sure its TABLE exists in your database!
|
||||||
# PHONE_MESSAGES, PHONE_CALLS, PHONE_LOCATIONS, PHONE_BLUETOOTH, PHONE_ACTIVITY_RECOGNITION, PHONE_BATTERY, PHONE_SCREEN, PHONE_LIGHT,
|
# PHONE_MESSAGES, PHONE_CALLS, PHONE_LOCATIONS, PHONE_BLUETOOTH, PHONE_ACTIVITY_RECOGNITION, PHONE_BATTERY, PHONE_SCREEN, PHONE_LIGHT,
|
||||||
# PHONE_ACCELEROMETER, PHONE_APPLICATIONS_FOREGROUND, PHONE_WIFI_VISIBLE, PHONE_WIFI_CONNECTED, PHONE_CONVERSATION
|
# PHONE_ACCELEROMETER, PHONE_APPLICATIONS_FOREGROUND, PHONE_WIFI_VISIBLE, PHONE_WIFI_CONNECTED, PHONE_CONVERSATION
|
||||||
PHONE_SENSORS: []
|
PHONE_SENSORS: []
|
||||||
|
@ -46,7 +59,7 @@ PHONE_VALID_SENSED_DAYS:
|
||||||
|
|
||||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||||
PHONE_MESSAGES:
|
PHONE_MESSAGES:
|
||||||
DB_TABLE: messages
|
TABLE: messages
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
@ -59,10 +72,10 @@ PHONE_MESSAGES:
|
||||||
|
|
||||||
# Communication call features config, TYPES and FEATURES keys need to match
|
# Communication call features config, TYPES and FEATURES keys need to match
|
||||||
PHONE_CALLS:
|
PHONE_CALLS:
|
||||||
DB_TABLE: calls
|
TABLE: calls
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
CALL_TYPES: [missed, incoming, outgoing]
|
CALL_TYPES: [missed, incoming, outgoing]
|
||||||
FEATURES:
|
FEATURES:
|
||||||
missed: [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact]
|
missed: [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact]
|
||||||
|
@ -72,7 +85,7 @@ PHONE_CALLS:
|
||||||
SRC_FOLDER: "rapids" # inside src/features/phone_calls
|
SRC_FOLDER: "rapids" # inside src/features/phone_calls
|
||||||
|
|
||||||
PHONE_LOCATIONS:
|
PHONE_LOCATIONS:
|
||||||
DB_TABLE: locations
|
TABLE: locations
|
||||||
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED
|
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED
|
||||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||||
|
@ -99,7 +112,7 @@ PHONE_LOCATIONS:
|
||||||
SRC_LANGUAGE: "r"
|
SRC_LANGUAGE: "r"
|
||||||
|
|
||||||
PHONE_BLUETOOTH:
|
PHONE_BLUETOOTH:
|
||||||
DB_TABLE: bluetooth
|
TABLE: bluetooth
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
@ -109,12 +122,12 @@ PHONE_BLUETOOTH:
|
||||||
|
|
||||||
|
|
||||||
PHONE_ACTIVITY_RECOGNITION:
|
PHONE_ACTIVITY_RECOGNITION:
|
||||||
DB_TABLE:
|
TABLE:
|
||||||
ANDROID: plugin_google_activity_recognition
|
ANDROID: plugin_google_activity_recognition
|
||||||
IOS: plugin_ios_activity_recognition
|
IOS: plugin_ios_activity_recognition
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"]
|
FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"]
|
||||||
ACTIVITY_CLASSES:
|
ACTIVITY_CLASSES:
|
||||||
STATIONARY: ["still", "tilting"]
|
STATIONARY: ["still", "tilting"]
|
||||||
|
@ -124,7 +137,7 @@ PHONE_ACTIVITY_RECOGNITION:
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
PHONE_BATTERY:
|
PHONE_BATTERY:
|
||||||
DB_TABLE: battery
|
TABLE: battery
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
@ -133,7 +146,7 @@ PHONE_BATTERY:
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
PHONE_SCREEN:
|
PHONE_SCREEN:
|
||||||
DB_TABLE: screen
|
TABLE: screen
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
@ -146,7 +159,7 @@ PHONE_SCREEN:
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
PHONE_LIGHT:
|
PHONE_LIGHT:
|
||||||
DB_TABLE: light
|
TABLE: light
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
@ -155,7 +168,7 @@ PHONE_LIGHT:
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
PHONE_ACCELEROMETER:
|
PHONE_ACCELEROMETER:
|
||||||
DB_TABLE: accelerometer
|
TABLE: accelerometer
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
@ -173,7 +186,7 @@ PHONE_ACCELEROMETER:
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
PHONE_APPLICATIONS_FOREGROUND:
|
PHONE_APPLICATIONS_FOREGROUND:
|
||||||
DB_TABLE: applications_foreground
|
TABLE: applications_foreground
|
||||||
APPLICATION_CATEGORIES:
|
APPLICATION_CATEGORIES:
|
||||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||||
|
@ -194,7 +207,7 @@ PHONE_APPLICATIONS_FOREGROUND:
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
PHONE_WIFI_VISIBLE:
|
PHONE_WIFI_VISIBLE:
|
||||||
DB_TABLE: "wifi"
|
TABLE: "wifi"
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
@ -203,7 +216,7 @@ PHONE_WIFI_VISIBLE:
|
||||||
SRC_LANGUAGE: "r"
|
SRC_LANGUAGE: "r"
|
||||||
|
|
||||||
PHONE_WIFI_CONNECTED:
|
PHONE_WIFI_CONNECTED:
|
||||||
DB_TABLE: "sensor_wifi"
|
TABLE: "sensor_wifi"
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
@ -212,12 +225,12 @@ PHONE_WIFI_CONNECTED:
|
||||||
SRC_LANGUAGE: "r"
|
SRC_LANGUAGE: "r"
|
||||||
|
|
||||||
PHONE_CONVERSATION:
|
PHONE_CONVERSATION:
|
||||||
DB_TABLE:
|
TABLE:
|
||||||
ANDROID: plugin_studentlife_audio_android
|
ANDROID: plugin_studentlife_audio_android
|
||||||
IOS: plugin_studentlife_audio
|
IOS: plugin_studentlife_audio
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: False
|
COMPUTE: True
|
||||||
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
||||||
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
|
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
|
||||||
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
|
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
|
||||||
|
@ -229,36 +242,42 @@ PHONE_CONVERSATION:
|
||||||
SRC_LANGUAGE: "python"
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
|
|
||||||
HEARTRATE:
|
FITBIT_HEARTRATE:
|
||||||
COMPUTE: False
|
TABLE: "fitbit_data"
|
||||||
DB_TABLE: fitbit_data
|
PARSE_JSON: TRUE
|
||||||
DAY_SEGMENTS: *day_segments
|
PROVIDERS:
|
||||||
SUMMARY_FEATURES: ["restinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. heigh, weight) use with care: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"]
|
RAPIDS:
|
||||||
INTRADAY_FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
|
COMPUTE: True
|
||||||
|
SUMMARY_FEATURES: ["restinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. height, weight) use these with care: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"]
|
||||||
|
INTRADAY_FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
|
||||||
|
|
||||||
STEP:
|
FITBIT_STEPS:
|
||||||
COMPUTE: False
|
TABLE: fitbit_data
|
||||||
DB_TABLE: fitbit_data
|
PARSE_JSON: TRUE
|
||||||
DAY_SEGMENTS: *day_segments
|
|
||||||
EXCLUDE_SLEEP:
|
EXCLUDE_SLEEP:
|
||||||
EXCLUDE: False
|
EXCLUDE: False
|
||||||
TYPE: FIXED # FIXED OR FITBIT_BASED (CONFIGURE FITBIT's SLEEP DB_TABLE)
|
TYPE: FIXED # FIXED OR FITBIT_BASED (configure FITBIT_SLEEP section)
|
||||||
FIXED:
|
FIXED:
|
||||||
START: "23:00"
|
START: "23:00"
|
||||||
END: "07:00"
|
END: "07:00"
|
||||||
FEATURES:
|
PROVIDERS:
|
||||||
ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"]
|
RAPIDS:
|
||||||
SEDENTARY_BOUT: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration"]
|
COMPUTE: TRUE
|
||||||
ACTIVE_BOUT: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration"]
|
FEATURES:
|
||||||
THRESHOLD_ACTIVE_BOUT: 10 # steps
|
ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"]
|
||||||
INCLUDE_ZERO_STEP_ROWS: False
|
SEDENTARY_BOUT: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration"]
|
||||||
|
ACTIVE_BOUT: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration"]
|
||||||
|
THRESHOLD_ACTIVE_BOUT: 10 # steps
|
||||||
|
INCLUDE_ZERO_STEP_ROWS: False
|
||||||
|
|
||||||
SLEEP:
|
FITBIT_SLEEP:
|
||||||
COMPUTE: False
|
TABLE: fitbit_data
|
||||||
DB_TABLE: fitbit_data
|
PARSE_JSON: TRUE
|
||||||
DAY_SEGMENTS: *day_segments
|
PROVIDERS:
|
||||||
SLEEP_TYPES: ["main", "nap", "all"]
|
RAPIDS:
|
||||||
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
COMPUTE: TRUE
|
||||||
|
SLEEP_TYPES: ["main", "nap", "all"]
|
||||||
|
SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]
|
||||||
|
|
||||||
### Visualizations ################################################################
|
### Visualizations ################################################################
|
||||||
HEATMAP_FEATURES_CORRELATIONS:
|
HEATMAP_FEATURES_CORRELATIONS:
|
||||||
|
|
|
@ -6,28 +6,6 @@ rule join_features_from_providers:
|
||||||
script:
|
script:
|
||||||
"../src/features/join_features_from_providers.R"
|
"../src/features/join_features_from_providers.R"
|
||||||
|
|
||||||
rule resample_episodes:
|
|
||||||
input:
|
|
||||||
"data/interim/{pid}/{sensor}_episodes.csv"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/{sensor}_episodes_resampled.csv"
|
|
||||||
script:
|
|
||||||
"../src/features/utils/resample_episodes.R"
|
|
||||||
|
|
||||||
rule resample_episodes_with_datetime:
|
|
||||||
input:
|
|
||||||
sensor_input = "data/interim/{pid}/{sensor}_episodes_resampled.csv",
|
|
||||||
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
|
||||||
params:
|
|
||||||
timezones = None,
|
|
||||||
fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
|
||||||
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
|
||||||
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/{sensor}_episodes_resampled_with_datetime.csv"
|
|
||||||
script:
|
|
||||||
"../src/data/readable_datetime.R"
|
|
||||||
|
|
||||||
rule phone_accelerometer_python_features:
|
rule phone_accelerometer_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = "data/raw/{pid}/phone_accelerometer_with_datetime.csv",
|
sensor_data = "data/raw/{pid}/phone_accelerometer_with_datetime.csv",
|
||||||
|
@ -234,48 +212,48 @@ rule phone_wifi_visible_r_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.R"
|
"../src/features/entry.R"
|
||||||
|
|
||||||
rule fitbit_heartrate_features:
|
# rule fitbit_heartrate_features:
|
||||||
input:
|
# input:
|
||||||
heartrate_summary_data = "data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv",
|
# heartrate_summary_data = "data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv",
|
||||||
heartrate_intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv"
|
# heartrate_intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv"
|
||||||
params:
|
# params:
|
||||||
day_segment = "{day_segment}",
|
# day_segment = "{day_segment}",
|
||||||
summary_features = config["HEARTRATE"]["SUMMARY_FEATURES"],
|
# summary_features = config["HEARTRATE"]["SUMMARY_FEATURES"],
|
||||||
intraday_features = config["HEARTRATE"]["INTRADAY_FEATURES"]
|
# intraday_features = config["HEARTRATE"]["INTRADAY_FEATURES"]
|
||||||
output:
|
# output:
|
||||||
"data/processed/{pid}/fitbit_heartrate_{day_segment}.csv"
|
# "data/processed/{pid}/fitbit_heartrate_{day_segment}.csv"
|
||||||
script:
|
# script:
|
||||||
"../src/features/fitbit_heartrate_features.py"
|
# "../src/features/fitbit_heartrate_features.py"
|
||||||
|
|
||||||
rule fitbit_step_features:
|
# rule fitbit_step_features:
|
||||||
input:
|
# input:
|
||||||
step_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv",
|
# step_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv",
|
||||||
sleep_data = optional_steps_sleep_input
|
# sleep_data = optional_steps_sleep_input
|
||||||
params:
|
# params:
|
||||||
day_segment = "{day_segment}",
|
# day_segment = "{day_segment}",
|
||||||
features_all_steps = config["STEP"]["FEATURES"]["ALL_STEPS"],
|
# features_all_steps = config["STEP"]["FEATURES"]["ALL_STEPS"],
|
||||||
features_sedentary_bout = config["STEP"]["FEATURES"]["SEDENTARY_BOUT"],
|
# features_sedentary_bout = config["STEP"]["FEATURES"]["SEDENTARY_BOUT"],
|
||||||
features_active_bout = config["STEP"]["FEATURES"]["ACTIVE_BOUT"],
|
# features_active_bout = config["STEP"]["FEATURES"]["ACTIVE_BOUT"],
|
||||||
threshold_active_bout = config["STEP"]["THRESHOLD_ACTIVE_BOUT"],
|
# threshold_active_bout = config["STEP"]["THRESHOLD_ACTIVE_BOUT"],
|
||||||
include_zero_step_rows = config["STEP"]["INCLUDE_ZERO_STEP_ROWS"],
|
# include_zero_step_rows = config["STEP"]["INCLUDE_ZERO_STEP_ROWS"],
|
||||||
exclude_sleep = config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"],
|
# exclude_sleep = config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"],
|
||||||
exclude_sleep_type = config["STEP"]["EXCLUDE_SLEEP"]["TYPE"],
|
# exclude_sleep_type = config["STEP"]["EXCLUDE_SLEEP"]["TYPE"],
|
||||||
exclude_sleep_fixed_start = config["STEP"]["EXCLUDE_SLEEP"]["FIXED"]["START"],
|
# exclude_sleep_fixed_start = config["STEP"]["EXCLUDE_SLEEP"]["FIXED"]["START"],
|
||||||
exclude_sleep_fixed_end = config["STEP"]["EXCLUDE_SLEEP"]["FIXED"]["END"],
|
# exclude_sleep_fixed_end = config["STEP"]["EXCLUDE_SLEEP"]["FIXED"]["END"],
|
||||||
output:
|
# output:
|
||||||
"data/processed/{pid}/fitbit_step_{day_segment}.csv"
|
# "data/processed/{pid}/fitbit_step_{day_segment}.csv"
|
||||||
script:
|
# script:
|
||||||
"../src/features/fitbit_step_features.py"
|
# "../src/features/fitbit_step_features.py"
|
||||||
|
|
||||||
rule fitbit_sleep_features:
|
# rule fitbit_sleep_features:
|
||||||
input:
|
# input:
|
||||||
sleep_summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
|
# sleep_summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
|
||||||
sleep_intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
# sleep_intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
||||||
params:
|
# params:
|
||||||
day_segment = "{day_segment}",
|
# day_segment = "{day_segment}",
|
||||||
summary_features = config["SLEEP"]["SUMMARY_FEATURES"],
|
# summary_features = config["SLEEP"]["SUMMARY_FEATURES"],
|
||||||
sleep_types = config["SLEEP"]["SLEEP_TYPES"]
|
# sleep_types = config["SLEEP"]["SLEEP_TYPES"]
|
||||||
output:
|
# output:
|
||||||
"data/processed/{pid}/fitbit_sleep_{day_segment}.csv"
|
# "data/processed/{pid}/fitbit_sleep_{day_segment}.csv"
|
||||||
script:
|
# script:
|
||||||
"../src/features/fitbit_sleep_features.py"
|
# "../src/features/fitbit_sleep_features.py"
|
||||||
|
|
|
@ -3,7 +3,7 @@ rule restore_sql_file:
|
||||||
sql_file = "data/external/rapids_example.sql",
|
sql_file = "data/external/rapids_example.sql",
|
||||||
db_credentials = ".env"
|
db_credentials = ".env"
|
||||||
params:
|
params:
|
||||||
group = config["DOWNLOAD_PARTICIPANTS"]["GROUP"]
|
group = config["DATABASE_GROUP"]
|
||||||
output:
|
output:
|
||||||
touch("data/interim/restore_sql_file.done")
|
touch("data/interim/restore_sql_file.done")
|
||||||
script:
|
script:
|
||||||
|
@ -15,28 +15,40 @@ rule create_example_participant_files:
|
||||||
shell:
|
shell:
|
||||||
"echo 'a748ee1a-1d0b-4ae9-9074-279a2b6ba524\nandroid\ntest01\n2020/04/23,2020/05/04\n' >> ./data/external/example01 && echo '13dbc8a3-dae3-4834-823a-4bc96a7d459d\nios\ntest02\n2020/04/23,2020/05/04\n' >> ./data/external/example02"
|
"echo 'a748ee1a-1d0b-4ae9-9074-279a2b6ba524\nandroid\ntest01\n2020/04/23,2020/05/04\n' >> ./data/external/example01 && echo '13dbc8a3-dae3-4834-823a-4bc96a7d459d\nios\ntest02\n2020/04/23,2020/05/04\n' >> ./data/external/example02"
|
||||||
|
|
||||||
rule download_participants:
|
# rule download_participants:
|
||||||
params:
|
# params:
|
||||||
group = config["DOWNLOAD_PARTICIPANTS"]["GROUP"],
|
# group = config["DOWNLOAD_PARTICIPANTS"]["GROUP"],
|
||||||
ignored_device_ids = config["DOWNLOAD_PARTICIPANTS"]["IGNORED_DEVICE_IDS"],
|
# ignored_device_ids = config["DOWNLOAD_PARTICIPANTS"]["IGNORED_DEVICE_IDS"],
|
||||||
timezone = config["TIMEZONE"]
|
# timezone = config["TIMEZONE"]
|
||||||
priority: 1
|
# priority: 1
|
||||||
script:
|
# script:
|
||||||
"../src/data/download_participants.R"
|
# "../src/data/download_participants.R"
|
||||||
|
|
||||||
rule download_dataset:
|
rule download_phone_data:
|
||||||
input:
|
input:
|
||||||
"data/external/{pid}"
|
"data/external/participant_files/{pid}.yaml"
|
||||||
params:
|
params:
|
||||||
group = config["DOWNLOAD_DATASET"]["GROUP"],
|
source = config["SENSOR_DATA"]["PHONE"]["SOURCE"],
|
||||||
sensor = "{sensor}",
|
sensor = "phone_" + "{sensor}",
|
||||||
table = lambda wildcards: config[str(wildcards.sensor).upper()]["DB_TABLE"],
|
table = lambda wildcards: config["PHONE_" + str(wildcards.sensor).upper()]["TABLE"],
|
||||||
timezone = config["TIMEZONE"],
|
timezone = config["TIMEZONE"],
|
||||||
aware_multiplatform_tables = config["PHONE_ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "," + config["PHONE_ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"] + "," + config["PHONE_CONVERSATION"]["DB_TABLE"]["ANDROID"] + "," + config["PHONE_CONVERSATION"]["DB_TABLE"]["IOS"],
|
aware_multiplatform_tables = config["PHONE_ACTIVITY_RECOGNITION"]["TABLE"]["ANDROID"] + "," + config["PHONE_ACTIVITY_RECOGNITION"]["TABLE"]["IOS"] + "," + config["PHONE_CONVERSATION"]["TABLE"]["ANDROID"] + "," + config["PHONE_CONVERSATION"]["TABLE"]["IOS"],
|
||||||
output:
|
output:
|
||||||
"data/raw/{pid}/{sensor}_raw.csv"
|
"data/raw/{pid}/phone_{sensor}_raw.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/download_dataset.R"
|
"../src/data/download_phone_data.R"
|
||||||
|
|
||||||
|
rule download_fitbit_data:
|
||||||
|
input:
|
||||||
|
"data/external/participant_files/{pid}.yaml"
|
||||||
|
params:
|
||||||
|
source = config["SENSOR_DATA"]["FITBIT"]["SOURCE"],
|
||||||
|
sensor = "fitbit_" + "{sensor}",
|
||||||
|
table = lambda wildcards: config["FITBIT_" + str(wildcards.sensor).upper()]["TABLE"],
|
||||||
|
output:
|
||||||
|
"data/raw/{pid}/fitbit_{sensor}_raw.csv"
|
||||||
|
script:
|
||||||
|
"../src/data/download_fitbit_data.R"
|
||||||
|
|
||||||
rule compute_day_segments:
|
rule compute_day_segments:
|
||||||
input:
|
input:
|
||||||
|
@ -55,8 +67,8 @@ rule phone_readable_datetime:
|
||||||
sensor_input = "data/raw/{pid}/phone_{sensor}_raw.csv",
|
sensor_input = "data/raw/{pid}/phone_{sensor}_raw.csv",
|
||||||
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
||||||
params:
|
params:
|
||||||
timezones = None,
|
timezones = config["SENSOR_DATA"]["PHONE"]["TIMEZONE"]["TYPE"],
|
||||||
fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
fixed_timezone = config["SENSOR_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
|
||||||
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
||||||
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
||||||
output:
|
output:
|
||||||
|
@ -97,7 +109,7 @@ rule phone_valid_sensed_days:
|
||||||
rule unify_ios_android:
|
rule unify_ios_android:
|
||||||
input:
|
input:
|
||||||
sensor_data = "data/raw/{pid}/{sensor}_with_datetime.csv",
|
sensor_data = "data/raw/{pid}/{sensor}_with_datetime.csv",
|
||||||
participant_info = "data/external/{pid}"
|
participant_info = "data/external/participant_files/{pid}.yaml"
|
||||||
params:
|
params:
|
||||||
sensor = "{sensor}",
|
sensor = "{sensor}",
|
||||||
output:
|
output:
|
||||||
|
@ -105,7 +117,7 @@ rule unify_ios_android:
|
||||||
script:
|
script:
|
||||||
"../src/data/unify_ios_android.R"
|
"../src/data/unify_ios_android.R"
|
||||||
|
|
||||||
rule process_phone_location_types:
|
rule process_phone_locations_types:
|
||||||
input:
|
input:
|
||||||
locations = "data/raw/{pid}/phone_locations_raw.csv",
|
locations = "data/raw/{pid}/phone_locations_raw.csv",
|
||||||
phone_sensed_timestamps = "data/interim/{pid}/phone_sensed_timestamps.csv",
|
phone_sensed_timestamps = "data/interim/{pid}/phone_sensed_timestamps.csv",
|
||||||
|
@ -118,13 +130,13 @@ rule process_phone_location_types:
|
||||||
script:
|
script:
|
||||||
"../src/data/process_location_types.R"
|
"../src/data/process_location_types.R"
|
||||||
|
|
||||||
rule readable_datetime_location_processed:
|
rule phone_locations_processed_with_datetime:
|
||||||
input:
|
input:
|
||||||
sensor_input = "data/interim/{pid}/phone_locations_processed.csv",
|
sensor_input = "data/interim/{pid}/phone_locations_processed.csv",
|
||||||
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
||||||
params:
|
params:
|
||||||
timezones = None,
|
timezones = config["SENSOR_DATA"]["PHONE"]["TIMEZONE"]["TYPE"],
|
||||||
fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
fixed_timezone = config["SENSOR_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
|
||||||
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
||||||
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
||||||
output:
|
output:
|
||||||
|
@ -132,6 +144,28 @@ rule readable_datetime_location_processed:
|
||||||
script:
|
script:
|
||||||
"../src/data/readable_datetime.R"
|
"../src/data/readable_datetime.R"
|
||||||
|
|
||||||
|
rule resample_episodes:
|
||||||
|
input:
|
||||||
|
"data/interim/{pid}/{sensor}_episodes.csv"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/{sensor}_episodes_resampled.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/utils/resample_episodes.R"
|
||||||
|
|
||||||
|
rule resample_episodes_with_datetime:
|
||||||
|
input:
|
||||||
|
sensor_input = "data/interim/{pid}/{sensor}_episodes_resampled.csv",
|
||||||
|
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
||||||
|
params:
|
||||||
|
timezones = config["SENSOR_DATA"]["PHONE"]["TIMEZONE"]["TYPE"],
|
||||||
|
fixed_timezone = config["SENSOR_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
|
||||||
|
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
||||||
|
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/{sensor}_episodes_resampled_with_datetime.csv"
|
||||||
|
script:
|
||||||
|
"../src/data/readable_datetime.R"
|
||||||
|
|
||||||
rule phone_application_categories:
|
rule phone_application_categories:
|
||||||
input:
|
input:
|
||||||
"data/raw/{pid}/phone_applications_foreground_with_datetime.csv"
|
"data/raw/{pid}/phone_applications_foreground_with_datetime.csv"
|
||||||
|
@ -145,37 +179,37 @@ rule phone_application_categories:
|
||||||
script:
|
script:
|
||||||
"../src/data/application_categories.R"
|
"../src/data/application_categories.R"
|
||||||
|
|
||||||
rule fitbit_heartrate_with_datetime:
|
# rule fitbit_heartrate_with_datetime:
|
||||||
input:
|
# input:
|
||||||
expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["HEARTRATE"]["DB_TABLE"])
|
# expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["HEARTRATE"]["TABLE"])
|
||||||
params:
|
# params:
|
||||||
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
# local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||||
fitbit_sensor = "heartrate"
|
# fitbit_sensor = "heartrate"
|
||||||
output:
|
# output:
|
||||||
summary_data = "data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv",
|
# summary_data = "data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv",
|
||||||
intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv"
|
# intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv"
|
||||||
script:
|
# script:
|
||||||
"../src/data/fitbit_readable_datetime.py"
|
# "../src/data/fitbit_readable_datetime.py"
|
||||||
|
|
||||||
rule fitbit_step_with_datetime:
|
# rule fitbit_step_with_datetime:
|
||||||
input:
|
# input:
|
||||||
expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["STEP"]["DB_TABLE"])
|
# expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["STEP"]["TABLE"])
|
||||||
params:
|
# params:
|
||||||
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
# local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||||
fitbit_sensor = "steps"
|
# fitbit_sensor = "steps"
|
||||||
output:
|
# output:
|
||||||
intraday_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv"
|
# intraday_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv"
|
||||||
script:
|
# script:
|
||||||
"../src/data/fitbit_readable_datetime.py"
|
# "../src/data/fitbit_readable_datetime.py"
|
||||||
|
|
||||||
rule fitbit_sleep_with_datetime:
|
# rule fitbit_sleep_with_datetime:
|
||||||
input:
|
# input:
|
||||||
expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["SLEEP"]["DB_TABLE"])
|
# expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["SLEEP"]["TABLE"])
|
||||||
params:
|
# params:
|
||||||
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
# local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||||
fitbit_sensor = "sleep"
|
# fitbit_sensor = "sleep"
|
||||||
output:
|
# output:
|
||||||
summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
|
# summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
|
||||||
intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
# intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
||||||
script:
|
# script:
|
||||||
"../src/data/fitbit_readable_datetime.py"
|
# "../src/data/fitbit_readable_datetime.py"
|
||||||
|
|
|
@ -66,7 +66,7 @@ rule overall_compliance_heatmap:
|
||||||
pid_files = expand("data/external/{pid}", pid=config["PIDS"])
|
pid_files = expand("data/external/{pid}", pid=config["PIDS"])
|
||||||
params:
|
params:
|
||||||
only_show_valid_days = config["OVERALL_COMPLIANCE_HEATMAP"]["ONLY_SHOW_VALID_DAYS"],
|
only_show_valid_days = config["OVERALL_COMPLIANCE_HEATMAP"]["ONLY_SHOW_VALID_DAYS"],
|
||||||
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
local_timezone = config["SENSOR_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
|
||||||
expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"],
|
expected_num_of_days = config["OVERALL_COMPLIANCE_HEATMAP"]["EXPECTED_NUM_OF_DAYS"],
|
||||||
bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"],
|
bin_size = config["OVERALL_COMPLIANCE_HEATMAP"]["BIN_SIZE"],
|
||||||
min_bins_per_hour = "{min_valid_bins_per_hour}"
|
min_bins_per_hour = "{min_valid_bins_per_hour}"
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
source("renv/activate.R")
|
||||||
|
library(RMySQL)
|
||||||
|
library(dplyr)
|
||||||
|
library(readr)
|
||||||
|
library(stringr)
|
||||||
|
library(yaml)
|
||||||
|
|
||||||
|
|
||||||
|
participant_file <- snakemake@input[[1]]
|
||||||
|
source <- snakemake@params[["source"]]
|
||||||
|
sensor <- snakemake@params[["sensor"]]
|
||||||
|
table <- snakemake@params[["table"]]
|
||||||
|
sensor_file <- snakemake@output[[1]]
|
||||||
|
|
||||||
|
participant <- read_yaml(participant_file)
|
||||||
|
if(! "FITBIT" %in% names(participant)){
|
||||||
|
stop(paste("The following participant file does not have a FITBIT section, create one manually or automatically (see the docs):", participant_file))
|
||||||
|
}
|
||||||
|
device_ids <- participant$FITBIT$DEVICE_IDS
|
||||||
|
unified_device_id <- tail(device_ids, 1)
|
||||||
|
# As opposed to phone data, we dont' filter by date here because data can still be in JSON format, we need to parse it first
|
||||||
|
|
||||||
|
if(source$TYPE == "DATABASE"){
|
||||||
|
dbEngine <- dbConnect(MySQL(), default.file = "./.env", group = source$DATABASE_GROUP)
|
||||||
|
query <- paste0("SELECT * FROM ", table, " WHERE ",source$DEVICE_ID_COLUMN," IN ('", paste0(device_ids, collapse = "','"), "')")
|
||||||
|
sensor_data <- dbGetQuery(dbEngine, query)
|
||||||
|
dbDisconnect(dbEngine)
|
||||||
|
sensor_data <- sensor_data %>%
|
||||||
|
rename(device_id = source$DEVICE_ID_COLUMN) %>%
|
||||||
|
mutate(device_id = unified_device_id) # Unify device_id
|
||||||
|
|
||||||
|
if(FALSE) # For MoSHI use, we didn't split fitbit sensors into different tables
|
||||||
|
sensor_data <- sensor_data %>% filter(fitbit_data_type == str_split(sensor, "_", simplify = TRUE)[[2]])
|
||||||
|
|
||||||
|
# Droping duplicates on all columns except for _id or id
|
||||||
|
sensor_data <- sensor_data %>% distinct(!!!syms(setdiff(names(sensor_data), c("_id", "id"))))
|
||||||
|
|
||||||
|
write_csv(sensor_data, sensor_file)
|
||||||
|
|
||||||
|
}
|
|
@ -4,6 +4,9 @@ library(RMySQL)
|
||||||
library(stringr)
|
library(stringr)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
library(readr)
|
library(readr)
|
||||||
|
library(yaml)
|
||||||
|
library(lubridate)
|
||||||
|
options(scipen=999)
|
||||||
|
|
||||||
validate_deviceid_platforms <- function(device_ids, platforms){
|
validate_deviceid_platforms <- function(device_ids, platforms){
|
||||||
if(length(device_ids) == 1){
|
if(length(device_ids) == 1){
|
||||||
|
@ -37,38 +40,57 @@ is_multiplaform_participant <- function(dbEngine, device_ids, platforms){
|
||||||
return(FALSE)
|
return(FALSE)
|
||||||
}
|
}
|
||||||
|
|
||||||
participant <- snakemake@input[[1]]
|
get_timestamp_filter <- function(device_ids, participant, timezone){
|
||||||
group <- snakemake@params[["group"]]
|
# Read start and end date from the participant file to filter data within that range
|
||||||
|
start_date <- ymd_hms(paste(participant$PHONE$START_DATE,"00:00:00"), tz=timezone, quiet=TRUE)
|
||||||
|
end_date <- ymd_hms(paste(participant$PHONE$END_DATE, "23:59:59"), tz=timezone, quiet=TRUE)
|
||||||
|
start_timestamp = as.numeric(start_date) * 1000
|
||||||
|
end_timestamp = as.numeric(end_date) * 1000
|
||||||
|
if(is.na(start_timestamp)){
|
||||||
|
message(paste("PHONE[START_DATE] was not provided or failed to parse (", participant$PHONE$START_DATE,"), all data for", paste0(device_ids, collapse=","),"is returned"))
|
||||||
|
return("")
|
||||||
|
}else if(is.na(end_timestamp)){
|
||||||
|
message(paste("PHONE[END_DATE] was not provided or failed to parse (", participant$PHONE$END_DATE,"), all data for", paste0(device_ids, collapse=","),"is returned"))
|
||||||
|
return("")
|
||||||
|
} else if(start_timestamp > end_timestamp){
|
||||||
|
stop(paste("Start date has to be before end date in PHONE[TIME_SPAN] (",start_date,",", date(end_date),"), all data for", paste0(device_ids, collapse=","),"is returned"))
|
||||||
|
return("")
|
||||||
|
} else {
|
||||||
|
message(paste("Filtering data between", start_date, "and", end_date, "in", timezone, "for",paste0(device_ids, collapse=",")))
|
||||||
|
return(paste0("AND timestamp BETWEEN ", start_timestamp, " AND ", end_timestamp))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
participant_file <- snakemake@input[[1]]
|
||||||
|
source <- snakemake@params[["source"]]
|
||||||
|
group <- source$DATABASE_GROUP
|
||||||
table <- snakemake@params[["table"]]
|
table <- snakemake@params[["table"]]
|
||||||
sensor <- snakemake@params[["sensor"]]
|
sensor <- snakemake@params[["sensor"]]
|
||||||
timezone <- snakemake@params[["timezone"]]
|
timezone <- snakemake@params[["timezone"]]
|
||||||
aware_multiplatform_tables <- str_split(snakemake@params[["aware_multiplatform_tables"]], ",")[[1]]
|
aware_multiplatform_tables <- str_split(snakemake@params[["aware_multiplatform_tables"]], ",")[[1]]
|
||||||
sensor_file <- snakemake@output[[1]]
|
sensor_file <- snakemake@output[[1]]
|
||||||
|
|
||||||
device_ids <- strsplit(readLines(participant, n=1), ",")[[1]]
|
participant <- read_yaml(participant_file)
|
||||||
|
if(! "PHONE" %in% names(participant)){
|
||||||
|
stop(paste("The following participant file does not have a PHONE section, create one manually or automatically (see the docs):", participant_file))
|
||||||
|
}
|
||||||
|
device_ids <- participant$PHONE$DEVICE_IDS
|
||||||
unified_device_id <- tail(device_ids, 1)
|
unified_device_id <- tail(device_ids, 1)
|
||||||
platforms <- strsplit(readLines(participant, n=2)[[2]], ",")[[1]]
|
platforms <- participant$PHONE$PLATFORMS
|
||||||
validate_deviceid_platforms(device_ids, platforms)
|
validate_deviceid_platforms(device_ids, platforms)
|
||||||
|
timestamp_filter <- get_timestamp_filter(device_ids, participant, timezone)
|
||||||
# Read start and end date from the participant file to filter data within that range
|
|
||||||
start_date <- strsplit(readLines(participant, n=4)[4], ",")[[1]][1]
|
|
||||||
end_date <- strsplit(readLines(participant, n=4)[4], ",")[[1]][2]
|
|
||||||
start_datetime_utc = format(as.POSIXct(paste0(start_date, " 00:00:00"),format="%Y/%m/%d %H:%M:%S",origin="1970-01-01",tz=timezone), tz="UTC")
|
|
||||||
end_datetime_utc = format(as.POSIXct(paste0(end_date, " 23:59:59"),format="%Y/%m/%d %H:%M:%S",origin="1970-01-01",tz=timezone), tz="UTC")
|
|
||||||
|
|
||||||
dbEngine <- dbConnect(MySQL(), default.file = "./.env", group = group)
|
dbEngine <- dbConnect(MySQL(), default.file = "./.env", group = group)
|
||||||
|
|
||||||
if(is_multiplaform_participant(dbEngine, device_ids, platforms)){
|
if(is_multiplaform_participant(dbEngine, device_ids, platforms)){
|
||||||
sensor_data <- unify_raw_data(dbEngine, table, sensor, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, device_ids, platforms)
|
sensor_data <- unify_raw_data(dbEngine, table, sensor, timestamp_filter, aware_multiplatform_tables, device_ids, platforms)
|
||||||
}else {
|
}else {
|
||||||
# table has two elements for conversation and activity recognition (they store data on a different table for ios and android)
|
# table has two elements for conversation and activity recognition (they store data on a different table for ios and android)
|
||||||
if(length(table) > 1){
|
if(length(table) > 1)
|
||||||
table <- table[[toupper(platforms[1])]]
|
table <- table[[toupper(platforms[1])]]
|
||||||
}
|
query <- paste0("SELECT * FROM ", table, " WHERE ",source$DEVICE_ID_COLUMN," IN ('", paste0(device_ids, collapse = "','"), "')", timestamp_filter)
|
||||||
query <- paste0("SELECT * FROM ", table, " WHERE device_id IN ('", paste0(device_ids, collapse = "','"), "')")
|
sensor_data <- dbGetQuery(dbEngine, query) %>%
|
||||||
if(!(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc)
|
rename(device_id = source$DEVICE_ID_COLUMN)
|
||||||
query <- paste0(query, "AND timestamp BETWEEN 1000*UNIX_TIMESTAMP('", start_datetime_utc, "') AND 1000*UNIX_TIMESTAMP('", end_datetime_utc, "')")
|
|
||||||
sensor_data <- dbGetQuery(dbEngine, query)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sensor_data <- sensor_data %>% arrange(timestamp)
|
sensor_data <- sensor_data %>% arrange(timestamp)
|
|
@ -1,11 +1,13 @@
|
||||||
source("renv/activate.R")
|
source("renv/activate.R")
|
||||||
source("src/data/unify_utils.R")
|
source("src/data/unify_utils.R")
|
||||||
|
library(yaml)
|
||||||
|
|
||||||
sensor_data <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE)
|
sensor_data <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE)
|
||||||
participant_info <- snakemake@input[["participant_info"]]
|
participant_info <- snakemake@input[["participant_info"]]
|
||||||
sensor <- snakemake@params[["sensor"]]
|
sensor <- snakemake@params[["sensor"]]
|
||||||
|
|
||||||
platforms <- strsplit(readLines(participant_info, n=2)[[2]], ",")[[1]]
|
participant <- read_yaml(participant_info)
|
||||||
|
platforms <- participant$PHONE$PLATFORMS
|
||||||
platform <- ifelse(platforms[1] == "multiple" | (length(platforms) > 1 & "android" %in% platforms & "ios" %in% platforms), "android", platforms[1])
|
platform <- ifelse(platforms[1] == "multiple" | (length(platforms) > 1 & "android" %in% platforms & "ios" %in% platforms), "android", platforms[1])
|
||||||
|
|
||||||
sensor_data <- unify_data(sensor_data, sensor, platform)
|
sensor_data <- unify_data(sensor_data, sensor, platform)
|
||||||
|
|
|
@ -138,7 +138,7 @@ unify_ios_conversation <- function(conversation){
|
||||||
}
|
}
|
||||||
|
|
||||||
# This function is used in download_dataset.R
|
# This function is used in download_dataset.R
|
||||||
unify_raw_data <- function(dbEngine, sensor_table, sensor, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, device_ids, platforms){
|
unify_raw_data <- function(dbEngine, sensor_table, sensor, timestamp_filter, aware_multiplatform_tables, device_ids, platforms){
|
||||||
# If platforms is 'multiple', fetch each device_id's platform from aware_device, otherwise, use those given by the user
|
# If platforms is 'multiple', fetch each device_id's platform from aware_device, otherwise, use those given by the user
|
||||||
if(length(platforms) == 1 && platforms == "multiple")
|
if(length(platforms) == 1 && platforms == "multiple")
|
||||||
devices_platforms <- dbGetQuery(dbEngine, paste0("SELECT device_id,brand FROM aware_device WHERE device_id IN ('", paste0(device_ids, collapse = "','"), "')")) %>%
|
devices_platforms <- dbGetQuery(dbEngine, paste0("SELECT device_id,brand FROM aware_device WHERE device_id IN ('", paste0(device_ids, collapse = "','"), "')")) %>%
|
||||||
|
@ -169,10 +169,7 @@ unify_raw_data <- function(dbEngine, sensor_table, sensor, start_datetime_utc, e
|
||||||
table <- conversation_tables[[platform]]
|
table <- conversation_tables[[platform]]
|
||||||
|
|
||||||
if(table %in% available_tables_in_db){
|
if(table %in% available_tables_in_db){
|
||||||
query <- paste0("SELECT * FROM ", table, " WHERE device_id IN ('", device_id, "')")
|
query <- paste0("SELECT * FROM ", table, " WHERE device_id IN ('", device_id, "')", timestamp_filter)
|
||||||
if(!(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc){
|
|
||||||
query <- paste0(query, "AND timestamp BETWEEN 1000*UNIX_TIMESTAMP('", start_datetime_utc, "') AND 1000*UNIX_TIMESTAMP('", end_datetime_utc, "')")
|
|
||||||
}
|
|
||||||
sensor_data <- unify_data(dbGetQuery(dbEngine, query), sensor, platform)
|
sensor_data <- unify_data(dbGetQuery(dbEngine, query), sensor, platform)
|
||||||
participants_sensordata <- append(participants_sensordata, list(sensor_data))
|
participants_sensordata <- append(participants_sensordata, list(sensor_data))
|
||||||
}else{
|
}else{
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
import yaml, os
|
||||||
|
import sys
|
||||||
|
p = Path(r'data/external/').glob('*')
|
||||||
|
files = [x for x in p if x.is_file() and x.suffix == "" and "." not in x.stem]
|
||||||
|
for file in files:
|
||||||
|
reader = open(file, 'r')
|
||||||
|
phone = {"DEVICES_IDS" :"", "PLATFORMS" :"", "LABEL" :"", "START_DATE" :"", "END_DATE" :""}
|
||||||
|
lines = reader.read().splitlines()
|
||||||
|
if(len(lines) >=1 and len(lines[0]) > 0):
|
||||||
|
phone["DEVICE_IDS"] = lines[0]
|
||||||
|
if(len(lines) >=2 and len(lines[1]) > 0):
|
||||||
|
phone["PLATFORMS"] = lines[1]
|
||||||
|
if(len(lines) >=3 and len(lines[2]) > 0):
|
||||||
|
phone["LABEL"] = lines[2]
|
||||||
|
if(len(lines) >=4 and len(lines[3]) > 0):
|
||||||
|
phone["START_DATE"] = lines[3].split(",")[0]
|
||||||
|
phone["END_DATE"] = lines[3].split(",")[1]
|
||||||
|
new_participant_file = Path(r'data/external/participant_files/') / (file.stem + ".yaml")
|
||||||
|
os.makedirs(os.path.dirname(new_participant_file), exist_ok=True)
|
||||||
|
with open(new_participant_file, 'w') as writer:
|
||||||
|
writer.write("PHONE:\n")
|
||||||
|
writer.write(" DEVICE_IDS: [{}]\n".format(phone["DEVICE_IDS"]))
|
||||||
|
writer.write(" PLATFORMS: [{}]\n".format(phone["PLATFORMS"]))
|
||||||
|
writer.write(" LABEL: {}\n".format(phone["LABEL"]))
|
||||||
|
writer.write(" START_DATE: {}\n".format(phone["START_DATE"]))
|
||||||
|
writer.write(" END_DATE: {}\n".format(phone["END_DATE"]))
|
||||||
|
|
||||||
|
writer.write("FITBIT:\n")
|
||||||
|
writer.write(" DEVICE_IDS: [{}]\n".format(phone["DEVICE_IDS"]))
|
||||||
|
writer.write(" LABEL: {}\n".format(phone["LABEL"]))
|
||||||
|
writer.write(" START_DATE: {}\n".format(phone["START_DATE"]))
|
||||||
|
writer.write(" END_DATE: {}\n".format(phone["END_DATE"]))
|
||||||
|
print("Processed files:")
|
||||||
|
print(list(map(str, files)))
|
Loading…
Reference in New Issue