2020-08-03 23:30:15 +02:00
|
|
|
rule create_example_participant_files:
|
|
|
|
output:
|
2020-12-03 18:48:32 +01:00
|
|
|
expand("data/external/participant_files/{pid}.yaml", pid = ["example01", "example02"])
|
2020-08-03 23:30:15 +02:00
|
|
|
shell:
|
2021-03-13 02:04:43 +01:00
|
|
|
"echo 'PHONE:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n PLATFORMS: [android]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example01.yaml && echo 'PHONE:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n PLATFORMS: [ios]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example02.yaml"
|
2020-08-03 23:30:15 +02:00
|
|
|
|
2022-07-07 17:00:47 +02:00
|
|
|
# rule query_usernames_device_empatica_ids:
|
|
|
|
# params:
|
|
|
|
# baseline_folder = "/mnt/e/STRAWbaseline/"
|
|
|
|
# output:
|
|
|
|
# usernames_file = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"],
|
|
|
|
# timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
|
|
|
# script:
|
|
|
|
# "../../participants/prepare_usernames_file.py"
|
|
|
|
|
|
|
|
rule prepare_tzcodes_file:
|
|
|
|
input:
|
|
|
|
timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
|
|
|
output:
|
|
|
|
tzcodes_file = config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]
|
|
|
|
script:
|
|
|
|
"../tools/create_multi_timezones_file.py"
|
|
|
|
|
|
|
|
rule prepare_participants_csv:
|
|
|
|
input:
|
|
|
|
username_list = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"]
|
|
|
|
params:
|
|
|
|
data_configuration = config["PHONE_DATA_STREAMS"][config["PHONE_DATA_STREAMS"]["USE"]],
|
|
|
|
participants_table = "participants",
|
|
|
|
device_id_table = "esm",
|
|
|
|
start_end_date_table = "esm"
|
|
|
|
output:
|
|
|
|
participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"]
|
|
|
|
script:
|
|
|
|
"../src/data/translate_usernames_into_participants_data.R"
|
|
|
|
|
2020-10-27 22:13:16 +01:00
|
|
|
rule create_participants_files:
|
|
|
|
input:
|
2021-03-11 20:30:42 +01:00
|
|
|
participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"]
|
2020-10-27 22:13:16 +01:00
|
|
|
params:
|
|
|
|
config = config["CREATE_PARTICIPANT_FILES"]
|
|
|
|
script:
|
|
|
|
"../src/data/create_participants_files.R"
|
2020-10-21 01:12:01 +02:00
|
|
|
|
2021-03-08 21:58:26 +01:00
|
|
|
rule pull_phone_data:
|
|
|
|
input: unpack(pull_phone_data_input_with_mutation_scripts)
|
2020-02-10 22:45:34 +01:00
|
|
|
params:
|
2021-03-08 21:58:26 +01:00
|
|
|
data_configuration = config["PHONE_DATA_STREAMS"][config["PHONE_DATA_STREAMS"]["USE"]],
|
2020-10-21 01:12:01 +02:00
|
|
|
sensor = "phone_" + "{sensor}",
|
2021-03-13 00:14:49 +01:00
|
|
|
tables = lambda wildcards: config["PHONE_" + str(wildcards.sensor).upper()]["CONTAINER"],
|
2020-10-21 01:12:01 +02:00
|
|
|
output:
|
|
|
|
"data/raw/{pid}/phone_{sensor}_raw.csv"
|
2020-02-10 22:45:34 +01:00
|
|
|
script:
|
2021-03-08 21:58:26 +01:00
|
|
|
"../src/data/streams/pull_phone_data.R"
|
2020-02-10 22:45:34 +01:00
|
|
|
|
2021-03-23 00:15:13 +01:00
|
|
|
rule process_time_segments:
|
2020-07-23 03:54:19 +02:00
|
|
|
input:
|
2021-03-23 00:15:13 +01:00
|
|
|
segments_file = config["TIME_SEGMENTS"]["FILE"],
|
|
|
|
participant_file = "data/external/participant_files/{pid}.yaml"
|
2020-08-26 18:09:53 +02:00
|
|
|
params:
|
2020-12-03 00:41:03 +01:00
|
|
|
time_segments_type = config["TIME_SEGMENTS"]["TYPE"],
|
2020-09-14 20:21:36 +02:00
|
|
|
pid = "{pid}"
|
2020-07-23 03:54:19 +02:00
|
|
|
output:
|
2020-12-03 00:41:03 +01:00
|
|
|
segments_file = "data/interim/time_segments/{pid}_time_segments.csv",
|
|
|
|
segments_labels_file = "data/interim/time_segments/{pid}_time_segments_labels.csv",
|
2020-07-23 03:54:19 +02:00
|
|
|
script:
|
2021-03-23 00:15:13 +01:00
|
|
|
"../src/data/datetime/process_time_segments.R"
|
2020-07-23 03:54:19 +02:00
|
|
|
|
2020-10-19 21:07:12 +02:00
|
|
|
rule phone_readable_datetime:
|
2019-10-24 22:08:05 +02:00
|
|
|
input:
|
2020-10-19 21:07:12 +02:00
|
|
|
sensor_input = "data/raw/{pid}/phone_{sensor}_raw.csv",
|
2021-03-05 23:49:37 +01:00
|
|
|
time_segments = "data/interim/time_segments/{pid}_time_segments.csv",
|
|
|
|
pid_file = "data/external/participant_files/{pid}.yaml",
|
|
|
|
tzcodes_file = input_tzcodes_file,
|
2019-10-24 22:08:05 +02:00
|
|
|
params:
|
2021-03-05 23:49:37 +01:00
|
|
|
device_type = "phone",
|
|
|
|
timezone_parameters = config["TIMEZONE"],
|
|
|
|
pid = "{pid}",
|
2020-12-03 00:41:03 +01:00
|
|
|
time_segments_type = config["TIME_SEGMENTS"]["TYPE"],
|
|
|
|
include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
2019-10-24 22:08:05 +02:00
|
|
|
output:
|
2020-10-19 21:07:12 +02:00
|
|
|
"data/raw/{pid}/phone_{sensor}_with_datetime.csv"
|
2019-10-24 22:08:05 +02:00
|
|
|
script:
|
2021-03-05 23:49:37 +01:00
|
|
|
"../src/data/datetime/readable_datetime.R"
|
2019-11-05 18:34:22 +01:00
|
|
|
|
2020-11-25 01:12:16 +01:00
|
|
|
rule phone_yielded_timestamps:
|
2019-11-05 18:34:22 +01:00
|
|
|
input:
|
2020-11-25 01:12:16 +01:00
|
|
|
all_sensors = expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor = map(str.lower, config["PHONE_DATA_YIELD"]["SENSORS"]))
|
2020-11-25 20:49:42 +01:00
|
|
|
params:
|
|
|
|
sensors = config["PHONE_DATA_YIELD"]["SENSORS"] # not used but needed so the rule is triggered if this array changes
|
2019-11-05 18:34:22 +01:00
|
|
|
output:
|
2020-11-25 01:12:16 +01:00
|
|
|
"data/interim/{pid}/phone_yielded_timestamps.csv"
|
2019-11-05 18:34:22 +01:00
|
|
|
script:
|
2020-11-25 01:12:16 +01:00
|
|
|
"../src/data/phone_yielded_timestamps.R"
|
2019-11-12 20:57:27 +01:00
|
|
|
|
2020-11-25 01:12:16 +01:00
|
|
|
rule phone_yielded_timestamps_with_datetime:
|
2020-10-07 17:51:31 +02:00
|
|
|
input:
|
2020-11-25 01:12:16 +01:00
|
|
|
sensor_input = "data/interim/{pid}/phone_yielded_timestamps.csv",
|
2021-03-05 23:49:37 +01:00
|
|
|
time_segments = "data/interim/time_segments/{pid}_time_segments.csv",
|
|
|
|
pid_file = "data/external/participant_files/{pid}.yaml",
|
|
|
|
tzcodes_file = input_tzcodes_file,
|
2020-11-25 01:12:16 +01:00
|
|
|
params:
|
2021-03-05 23:49:37 +01:00
|
|
|
device_type = "phone",
|
|
|
|
timezone_parameters = config["TIMEZONE"],
|
|
|
|
pid = "{pid}",
|
2020-12-03 00:41:03 +01:00
|
|
|
time_segments_type = config["TIME_SEGMENTS"]["TYPE"],
|
|
|
|
include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
2020-10-07 17:51:31 +02:00
|
|
|
output:
|
2020-11-25 01:12:16 +01:00
|
|
|
"data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv"
|
2020-10-07 17:51:31 +02:00
|
|
|
script:
|
2021-03-05 23:49:37 +01:00
|
|
|
"../src/data/datetime/readable_datetime.R"
|
2020-10-07 17:51:31 +02:00
|
|
|
|
2019-11-12 20:57:27 +01:00
|
|
|
rule unify_ios_android:
|
|
|
|
input:
|
|
|
|
sensor_data = "data/raw/{pid}/{sensor}_with_datetime.csv",
|
2020-10-21 01:12:01 +02:00
|
|
|
participant_info = "data/external/participant_files/{pid}.yaml"
|
2019-11-12 20:57:27 +01:00
|
|
|
params:
|
2020-06-30 23:34:18 +02:00
|
|
|
sensor = "{sensor}",
|
2019-11-12 20:57:27 +01:00
|
|
|
output:
|
|
|
|
"data/raw/{pid}/{sensor}_with_datetime_unified.csv"
|
|
|
|
script:
|
2019-12-10 00:23:00 +01:00
|
|
|
"../src/data/unify_ios_android.R"
|
|
|
|
|
2020-10-21 01:12:01 +02:00
|
|
|
rule process_phone_locations_types:
|
2019-12-10 00:23:00 +01:00
|
|
|
input:
|
2020-10-19 21:07:12 +02:00
|
|
|
locations = "data/raw/{pid}/phone_locations_raw.csv",
|
2021-05-26 20:04:29 +02:00
|
|
|
phone_sensed_timestamps = optional_phone_yield_input_for_locations,
|
2019-12-10 00:23:00 +01:00
|
|
|
params:
|
2020-10-19 21:07:12 +02:00
|
|
|
consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
|
|
|
|
time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
|
2021-09-15 20:28:09 +02:00
|
|
|
locations_to_use = config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"],
|
|
|
|
accuracy_limit = config["PHONE_LOCATIONS"]["ACCURACY_LIMIT"]
|
2019-12-10 00:23:00 +01:00
|
|
|
output:
|
2020-10-19 21:07:12 +02:00
|
|
|
"data/interim/{pid}/phone_locations_processed.csv"
|
2019-12-10 00:23:00 +01:00
|
|
|
script:
|
2020-08-28 19:53:00 +02:00
|
|
|
"../src/data/process_location_types.R"
|
2020-01-15 23:18:10 +01:00
|
|
|
|
2020-10-21 01:12:01 +02:00
|
|
|
rule phone_locations_processed_with_datetime:
|
2020-10-07 17:51:31 +02:00
|
|
|
input:
|
2020-10-19 21:07:12 +02:00
|
|
|
sensor_input = "data/interim/{pid}/phone_locations_processed.csv",
|
2021-03-05 23:49:37 +01:00
|
|
|
time_segments = "data/interim/time_segments/{pid}_time_segments.csv",
|
|
|
|
pid_file = "data/external/participant_files/{pid}.yaml",
|
|
|
|
tzcodes_file = input_tzcodes_file,
|
2020-10-07 17:51:31 +02:00
|
|
|
params:
|
2021-03-05 23:49:37 +01:00
|
|
|
device_type = "phone",
|
|
|
|
timezone_parameters = config["TIMEZONE"],
|
|
|
|
pid = "{pid}",
|
2020-12-03 00:41:03 +01:00
|
|
|
time_segments_type = config["TIME_SEGMENTS"]["TYPE"],
|
|
|
|
include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
2020-10-07 17:51:31 +02:00
|
|
|
output:
|
2020-10-19 21:07:12 +02:00
|
|
|
"data/interim/{pid}/phone_locations_processed_with_datetime.csv"
|
2020-10-07 17:51:31 +02:00
|
|
|
script:
|
2021-03-05 23:49:37 +01:00
|
|
|
"../src/data/datetime/readable_datetime.R"
|
2020-10-07 17:51:31 +02:00
|
|
|
|
2020-10-21 01:12:01 +02:00
|
|
|
rule resample_episodes:
|
2020-01-16 00:28:56 +01:00
|
|
|
input:
|
2020-10-21 01:12:01 +02:00
|
|
|
"data/interim/{pid}/{sensor}_episodes.csv"
|
2020-01-16 00:28:56 +01:00
|
|
|
output:
|
2020-10-21 01:12:01 +02:00
|
|
|
"data/interim/{pid}/{sensor}_episodes_resampled.csv"
|
2020-01-16 00:28:56 +01:00
|
|
|
script:
|
2020-10-21 01:12:01 +02:00
|
|
|
"../src/features/utils/resample_episodes.R"
|
2020-01-16 00:28:56 +01:00
|
|
|
|
2020-10-21 01:12:01 +02:00
|
|
|
rule resample_episodes_with_datetime:
|
2020-01-15 23:18:10 +01:00
|
|
|
input:
|
2020-10-21 01:12:01 +02:00
|
|
|
sensor_input = "data/interim/{pid}/{sensor}_episodes_resampled.csv",
|
2021-03-05 23:49:37 +01:00
|
|
|
time_segments = "data/interim/time_segments/{pid}_time_segments.csv",
|
|
|
|
pid_file = "data/external/participant_files/{pid}.yaml",
|
|
|
|
tzcodes_file = input_tzcodes_file,
|
2020-01-15 23:18:10 +01:00
|
|
|
params:
|
2021-03-05 23:49:37 +01:00
|
|
|
device_type = lambda wildcards: wildcards.sensor.split("_")[0],
|
|
|
|
timezone_parameters = config["TIMEZONE"],
|
|
|
|
pid = "{pid}",
|
2020-12-03 00:41:03 +01:00
|
|
|
time_segments_type = config["TIME_SEGMENTS"]["TYPE"],
|
|
|
|
include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
2020-01-15 23:18:10 +01:00
|
|
|
output:
|
2020-10-21 01:12:01 +02:00
|
|
|
"data/interim/{pid}/{sensor}_episodes_resampled_with_datetime.csv"
|
2020-01-15 23:18:10 +01:00
|
|
|
script:
|
2021-03-05 23:49:37 +01:00
|
|
|
"../src/data/datetime/readable_datetime.R"
|
|
|
|
|
2020-10-21 01:12:01 +02:00
|
|
|
rule phone_application_categories:
|
2020-06-23 17:33:34 +02:00
|
|
|
input:
|
2021-01-14 01:05:36 +01:00
|
|
|
"data/raw/{pid}/phone_applications_{type}_with_datetime.csv"
|
2020-06-23 17:33:34 +02:00
|
|
|
params:
|
2021-01-14 01:05:36 +01:00
|
|
|
catalogue_source = lambda wildcards: config["PHONE_APPLICATIONS_" + str(wildcards.type).upper()]["APPLICATION_CATEGORIES"]["CATALOGUE_SOURCE"],
|
|
|
|
catalogue_file = lambda wildcards: config["PHONE_APPLICATIONS_" + str(wildcards.type).upper()]["APPLICATION_CATEGORIES"]["CATALOGUE_FILE"],
|
|
|
|
update_catalogue_file = lambda wildcards: config["PHONE_APPLICATIONS_" + str(wildcards.type).upper()]["APPLICATION_CATEGORIES"]["UPDATE_CATALOGUE_FILE"],
|
|
|
|
scrape_missing_genres = lambda wildcards: config["PHONE_APPLICATIONS_" + str(wildcards.type).upper()]["APPLICATION_CATEGORIES"]["SCRAPE_MISSING_CATEGORIES"]
|
2020-06-23 17:33:34 +02:00
|
|
|
output:
|
2021-01-14 01:05:36 +01:00
|
|
|
"data/raw/{pid}/phone_applications_{type}_with_datetime_with_categories.csv"
|
2020-06-23 17:33:34 +02:00
|
|
|
script:
|
2020-10-21 01:12:01 +02:00
|
|
|
"../src/data/application_categories.R"
|
2020-06-23 17:33:34 +02:00
|
|
|
|
2021-03-09 22:42:02 +01:00
|
|
|
rule pull_wearable_data:
|
|
|
|
input: unpack(pull_wearable_data_input_with_mutation_scripts)
|
2020-10-22 19:08:52 +02:00
|
|
|
params:
|
2021-03-09 22:42:02 +01:00
|
|
|
data_configuration = lambda wildcards: config[wildcards.device_type.upper() +"_DATA_STREAMS"][config[wildcards.device_type.upper() +"_DATA_STREAMS"]["USE"]],
|
|
|
|
device_type = "{device_type}",
|
|
|
|
sensor = "{device_type}" + "_" + "{sensor}",
|
|
|
|
pid = "{pid}",
|
2021-03-13 00:14:49 +01:00
|
|
|
tables = lambda wildcards: config[wildcards.device_type.upper() + "_" + str(wildcards.sensor).upper()]["CONTAINER"],
|
2021-03-09 22:42:02 +01:00
|
|
|
wildcard_constraints:
|
|
|
|
device_type="(empatica|fitbit)"
|
2020-10-22 19:08:52 +02:00
|
|
|
output:
|
2021-03-09 22:42:02 +01:00
|
|
|
"data/raw/{pid}/{device_type}_{sensor}_raw.csv"
|
2020-10-22 19:08:52 +02:00
|
|
|
script:
|
2021-03-09 22:42:02 +01:00
|
|
|
"../src/data/streams/pull_wearable_data.R"
|
2020-10-21 01:12:01 +02:00
|
|
|
|
2021-03-09 17:20:21 +01:00
|
|
|
rule fitbit_readable_datetime:
|
2020-11-23 18:01:00 +01:00
|
|
|
input:
|
2021-03-09 17:20:21 +01:00
|
|
|
sensor_input = "data/raw/{pid}/fitbit_{sensor}_raw.csv",
|
|
|
|
time_segments = "data/interim/time_segments/{pid}_time_segments.csv",
|
|
|
|
pid_file = "data/external/participant_files/{pid}.yaml",
|
|
|
|
tzcodes_file = input_tzcodes_file,
|
2020-11-23 18:01:00 +01:00
|
|
|
params:
|
2021-03-09 17:20:21 +01:00
|
|
|
device_type = "fitbit",
|
|
|
|
timezone_parameters = config["TIMEZONE"],
|
|
|
|
pid = "{pid}",
|
|
|
|
time_segments_type = config["TIME_SEGMENTS"]["TYPE"],
|
|
|
|
include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
2020-11-23 18:01:00 +01:00
|
|
|
output:
|
2021-03-09 17:20:21 +01:00
|
|
|
"data/raw/{pid}/fitbit_{sensor}_with_datetime.csv"
|
2020-11-23 18:01:00 +01:00
|
|
|
script:
|
2021-03-09 17:20:21 +01:00
|
|
|
"../src/data/datetime/readable_datetime.R"
|
2020-11-23 18:01:00 +01:00
|
|
|
|
2021-05-19 00:27:12 +02:00
|
|
|
rule fitbit_steps_intraday_exclude_sleep:
|
|
|
|
input:
|
|
|
|
sensor_data = "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv",
|
|
|
|
sleep_data = optional_steps_sleep_input
|
|
|
|
params:
|
|
|
|
exclude_sleep = config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"]
|
|
|
|
output:
|
|
|
|
"data/interim/{pid}/fitbit_steps_intraday_with_datetime_exclude_sleep.csv"
|
|
|
|
script:
|
|
|
|
"../src/data/fitbit_steps_intraday_exclude_sleep.py"
|
|
|
|
|
2020-12-15 02:30:34 +01:00
|
|
|
rule empatica_readable_datetime:
|
|
|
|
input:
|
2021-03-07 05:16:59 +01:00
|
|
|
sensor_input = "data/raw/{pid}/empatica_{sensor}_raw.csv",
|
|
|
|
time_segments = "data/interim/time_segments/{pid}_time_segments.csv",
|
|
|
|
pid_file = "data/external/participant_files/{pid}.yaml",
|
|
|
|
tzcodes_file = input_tzcodes_file,
|
2020-12-15 02:30:34 +01:00
|
|
|
params:
|
2021-03-07 05:16:59 +01:00
|
|
|
device_type = "empatica",
|
|
|
|
timezone_parameters = config["TIMEZONE"],
|
|
|
|
pid = "{pid}",
|
2020-12-15 02:30:34 +01:00
|
|
|
time_segments_type = config["TIME_SEGMENTS"]["TYPE"],
|
|
|
|
include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
|
|
|
output:
|
|
|
|
"data/raw/{pid}/empatica_{sensor}_with_datetime.csv"
|
|
|
|
script:
|
2022-07-07 17:00:47 +02:00
|
|
|
"../src/data/datetime/readable_datetime.R"
|