rule create_example_participant_files: output: expand("data/external/participant_files/{pid}.yaml", pid = ["example01", "example02"]) shell: "echo 'PHONE:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n PLATFORMS: [android]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example01.yaml && echo 'PHONE:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n PLATFORMS: [ios]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example02.yaml" # rule query_usernames_device_empatica_ids: # params: # baseline_folder = "/mnt/e/STRAWbaseline/" # output: # usernames_file = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"], # timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"] # script: # "../../participants/prepare_usernames_file.py" rule prepare_tzcodes_file: input: timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"] output: tzcodes_file = config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"] script: "../tools/create_multi_timezones_file.py" rule prepare_participants_csv: input: username_list = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"] params: data_configuration = config["PHONE_DATA_STREAMS"][config["PHONE_DATA_STREAMS"]["USE"]], participants_table = "participants", device_id_table = "esm", start_end_date_table = "esm" output: participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"] script: "../src/data/translate_usernames_into_participants_data.R" rule create_participants_files: input: participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"] params: config = config["CREATE_PARTICIPANT_FILES"] script: "../src/data/create_participants_files.R" rule pull_phone_data: input: unpack(pull_phone_data_input_with_mutation_scripts) params: data_configuration = config["PHONE_DATA_STREAMS"][config["PHONE_DATA_STREAMS"]["USE"]], sensor = "phone_" + "{sensor}", tables = lambda wildcards: config["PHONE_" + str(wildcards.sensor).upper()]["CONTAINER"], output: "data/raw/{pid}/phone_{sensor}_raw.csv" script: "../src/data/streams/pull_phone_data.R" rule process_time_segments: input: segments_file = config["TIME_SEGMENTS"]["FILE"], participant_file = "data/external/participant_files/{pid}.yaml" params: time_segments_type = config["TIME_SEGMENTS"]["TYPE"], pid = "{pid}" output: segments_file = "data/interim/time_segments/{pid}_time_segments.csv", segments_labels_file = "data/interim/time_segments/{pid}_time_segments_labels.csv", script: "../src/data/datetime/process_time_segments.R" rule phone_readable_datetime: input: sensor_input = "data/raw/{pid}/phone_{sensor}_raw.csv", time_segments = "data/interim/time_segments/{pid}_time_segments.csv", pid_file = "data/external/participant_files/{pid}.yaml", tzcodes_file = input_tzcodes_file, params: device_type = "phone", timezone_parameters = config["TIMEZONE"], pid = "{pid}", time_segments_type = config["TIME_SEGMENTS"]["TYPE"], include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] output: "data/raw/{pid}/phone_{sensor}_with_datetime.csv" script: "../src/data/datetime/readable_datetime.R" rule phone_yielded_timestamps: input: all_sensors = expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor = map(str.lower, config["PHONE_DATA_YIELD"]["SENSORS"])) params: sensors = config["PHONE_DATA_YIELD"]["SENSORS"] # not used but needed so the rule is triggered if this array changes output: "data/interim/{pid}/phone_yielded_timestamps.csv" script: "../src/data/phone_yielded_timestamps.R" rule phone_yielded_timestamps_with_datetime: input: sensor_input = "data/interim/{pid}/phone_yielded_timestamps.csv", time_segments = "data/interim/time_segments/{pid}_time_segments.csv", pid_file = "data/external/participant_files/{pid}.yaml", tzcodes_file = input_tzcodes_file, params: device_type = "phone", timezone_parameters = config["TIMEZONE"], pid = "{pid}", time_segments_type = config["TIME_SEGMENTS"]["TYPE"], include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] output: "data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv" script: "../src/data/datetime/readable_datetime.R" rule unify_ios_android: input: sensor_data = "data/raw/{pid}/{sensor}_with_datetime.csv", participant_info = "data/external/participant_files/{pid}.yaml" params: sensor = "{sensor}", output: "data/raw/{pid}/{sensor}_with_datetime_unified.csv" script: "../src/data/unify_ios_android.R" rule process_phone_locations_types: input: locations = "data/raw/{pid}/phone_locations_raw.csv", phone_sensed_timestamps = optional_phone_yield_input_for_locations, params: consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"], time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"], locations_to_use = config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"], accuracy_limit = config["PHONE_LOCATIONS"]["ACCURACY_LIMIT"] output: "data/interim/{pid}/phone_locations_processed.csv" script: "../src/data/process_location_types.R" rule phone_locations_processed_with_datetime: input: sensor_input = "data/interim/{pid}/phone_locations_processed.csv", time_segments = "data/interim/time_segments/{pid}_time_segments.csv", pid_file = "data/external/participant_files/{pid}.yaml", tzcodes_file = input_tzcodes_file, params: device_type = "phone", timezone_parameters = config["TIMEZONE"], pid = "{pid}", time_segments_type = config["TIME_SEGMENTS"]["TYPE"], include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] output: "data/interim/{pid}/phone_locations_processed_with_datetime.csv" script: "../src/data/datetime/readable_datetime.R" rule resample_episodes: input: "data/interim/{pid}/{sensor}_episodes.csv" output: "data/interim/{pid}/{sensor}_episodes_resampled.csv" script: "../src/features/utils/resample_episodes.R" rule resample_episodes_with_datetime: input: sensor_input = "data/interim/{pid}/{sensor}_episodes_resampled.csv", time_segments = "data/interim/time_segments/{pid}_time_segments.csv", pid_file = "data/external/participant_files/{pid}.yaml", tzcodes_file = input_tzcodes_file, params: device_type = lambda wildcards: wildcards.sensor.split("_")[0], timezone_parameters = config["TIMEZONE"], pid = "{pid}", time_segments_type = config["TIME_SEGMENTS"]["TYPE"], include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] output: "data/interim/{pid}/{sensor}_episodes_resampled_with_datetime.csv" script: "../src/data/datetime/readable_datetime.R" rule phone_application_categories: input: "data/raw/{pid}/phone_applications_{type}_with_datetime.csv" params: catalogue_source = lambda wildcards: config["PHONE_APPLICATIONS_" + str(wildcards.type).upper()]["APPLICATION_CATEGORIES"]["CATALOGUE_SOURCE"], catalogue_file = lambda wildcards: config["PHONE_APPLICATIONS_" + str(wildcards.type).upper()]["APPLICATION_CATEGORIES"]["CATALOGUE_FILE"], update_catalogue_file = lambda wildcards: config["PHONE_APPLICATIONS_" + str(wildcards.type).upper()]["APPLICATION_CATEGORIES"]["UPDATE_CATALOGUE_FILE"], scrape_missing_genres = lambda wildcards: config["PHONE_APPLICATIONS_" + str(wildcards.type).upper()]["APPLICATION_CATEGORIES"]["SCRAPE_MISSING_CATEGORIES"] output: "data/raw/{pid}/phone_applications_{type}_with_datetime_with_categories.csv" script: "../src/data/application_categories.R" rule pull_wearable_data: input: unpack(pull_wearable_data_input_with_mutation_scripts) params: data_configuration = lambda wildcards: config[wildcards.device_type.upper() +"_DATA_STREAMS"][config[wildcards.device_type.upper() +"_DATA_STREAMS"]["USE"]], device_type = "{device_type}", sensor = "{device_type}" + "_" + "{sensor}", pid = "{pid}", tables = lambda wildcards: config[wildcards.device_type.upper() + "_" + str(wildcards.sensor).upper()]["CONTAINER"], wildcard_constraints: device_type="(empatica|fitbit)" output: "data/raw/{pid}/{device_type}_{sensor}_raw.csv" script: "../src/data/streams/pull_wearable_data.R" rule fitbit_readable_datetime: input: sensor_input = "data/raw/{pid}/fitbit_{sensor}_raw.csv", time_segments = "data/interim/time_segments/{pid}_time_segments.csv", pid_file = "data/external/participant_files/{pid}.yaml", tzcodes_file = input_tzcodes_file, params: device_type = "fitbit", timezone_parameters = config["TIMEZONE"], pid = "{pid}", time_segments_type = config["TIME_SEGMENTS"]["TYPE"], include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] output: "data/raw/{pid}/fitbit_{sensor}_with_datetime.csv" script: "../src/data/datetime/readable_datetime.R" rule fitbit_steps_intraday_exclude_sleep: input: sensor_data = "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv", sleep_data = optional_steps_sleep_input params: exclude_sleep = config["FITBIT_STEPS_INTRADAY"]["EXCLUDE_SLEEP"] output: "data/interim/{pid}/fitbit_steps_intraday_with_datetime_exclude_sleep.csv" script: "../src/data/fitbit_steps_intraday_exclude_sleep.py" rule empatica_readable_datetime: input: sensor_input = "data/raw/{pid}/empatica_{sensor}_raw.csv", time_segments = "data/interim/time_segments/{pid}_time_segments.csv", pid_file = "data/external/participant_files/{pid}.yaml", tzcodes_file = input_tzcodes_file, params: device_type = "empatica", timezone_parameters = config["TIMEZONE"], pid = "{pid}", time_segments_type = config["TIME_SEGMENTS"]["TYPE"], include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] output: "data/raw/{pid}/empatica_{sensor}_with_datetime.csv" resources: mem_mb=50000 script: "../src/data/datetime/readable_datetime.R" rule extract_event_information_from_esm: input: esm_raw_input = "data/raw/{pid}/phone_esm_raw.csv", pid_file = "data/external/participant_files/{pid}.yaml" params: stage = "extract", pid = "{pid}" output: "data/raw/ers/{pid}_ers.csv", "data/raw/ers/{pid}_stress_event_targets.csv" script: "../src/features/phone_esm/straw/process_user_event_related_segments.py" rule merge_event_related_segments_files: input: ers_files = expand("data/raw/ers/{pid}_ers.csv", pid=config["PIDS"]), se_files = expand("data/raw/ers/{pid}_stress_event_targets.csv", pid=config["PIDS"]) params: stage = "merge" output: "data/external/straw_events.csv", "data/external/stress_event_targets.csv" script: "../src/features/phone_esm/straw/process_user_event_related_segments.py"