2020-08-03 19:09:16 +02:00
|
|
|
rule restore_sql_file:
|
|
|
|
input:
|
|
|
|
sql_file = "data/external/rapids_example.sql",
|
|
|
|
db_credentials = ".env"
|
|
|
|
params:
|
|
|
|
group = config["DOWNLOAD_PARTICIPANTS"]["GROUP"]
|
|
|
|
output:
|
|
|
|
touch("data/interim/restore_sql_file.done")
|
|
|
|
script:
|
|
|
|
"../src/data/restore_sql_file.py"
|
|
|
|
|
2020-08-03 23:30:15 +02:00
|
|
|
rule create_example_participant_files:
|
|
|
|
output:
|
|
|
|
expand("data/external/{pid}", pid = ["example01", "example02"])
|
|
|
|
shell:
|
|
|
|
"echo 'a748ee1a-1d0b-4ae9-9074-279a2b6ba524\nandroid\ntest01\n2020/04/23,2020/05/04\n' >> ./data/external/example01 && echo '13dbc8a3-dae3-4834-823a-4bc96a7d459d\nios\ntest02\n2020/04/23,2020/05/04\n' >> ./data/external/example02"
|
|
|
|
|
2020-02-10 22:45:34 +01:00
|
|
|
rule download_participants:
|
|
|
|
params:
|
|
|
|
group = config["DOWNLOAD_PARTICIPANTS"]["GROUP"],
|
2020-02-18 22:41:10 +01:00
|
|
|
ignored_device_ids = config["DOWNLOAD_PARTICIPANTS"]["IGNORED_DEVICE_IDS"],
|
|
|
|
timezone = config["TIMEZONE"]
|
2020-03-03 17:08:18 +01:00
|
|
|
priority: 1
|
2020-02-10 22:45:34 +01:00
|
|
|
script:
|
|
|
|
"../src/data/download_participants.R"
|
|
|
|
|
2019-10-24 18:11:24 +02:00
|
|
|
rule download_dataset:
|
|
|
|
input:
|
|
|
|
"data/external/{pid}"
|
|
|
|
params:
|
2019-10-24 23:27:00 +02:00
|
|
|
group = config["DOWNLOAD_DATASET"]["GROUP"],
|
2020-02-20 21:51:22 +01:00
|
|
|
table = "{sensor}",
|
2020-06-30 23:34:18 +02:00
|
|
|
timezone = config["TIMEZONE"],
|
|
|
|
aware_multiplatform_tables = config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "," + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"] + "," + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "," + config["CONVERSATION"]["DB_TABLE"]["IOS"],
|
2020-08-11 22:18:06 +02:00
|
|
|
unifiable_sensors = {"calls": config["CALLS"]["DB_TABLE"], "battery": config["BATTERY"]["DB_TABLE"], "screen": config["SCREEN"]["DB_TABLE"], "ios_activity_recognition": config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"], "ios_conversation": config["CONVERSATION"]["DB_TABLE"]["IOS"]}
|
2019-10-24 18:11:24 +02:00
|
|
|
output:
|
2019-10-24 22:08:05 +02:00
|
|
|
"data/raw/{pid}/{sensor}_raw.csv"
|
2019-10-24 18:11:24 +02:00
|
|
|
script:
|
2019-10-24 22:08:05 +02:00
|
|
|
"../src/data/download_dataset.R"
|
|
|
|
|
2020-07-23 03:54:19 +02:00
|
|
|
rule compute_day_segments:
|
|
|
|
input:
|
2020-08-28 19:53:00 +02:00
|
|
|
config["DAY_SEGMENTS"]["FILE"]
|
2020-08-26 18:09:53 +02:00
|
|
|
params:
|
2020-08-28 19:53:00 +02:00
|
|
|
day_segments_type = config["DAY_SEGMENTS"]["TYPE"]
|
2020-07-23 03:54:19 +02:00
|
|
|
output:
|
2020-08-28 19:53:00 +02:00
|
|
|
segments_file = "data/interim/day_segments.csv",
|
|
|
|
segments_labels_file = "data/interim/day_segments_labels.csv",
|
2020-07-23 03:54:19 +02:00
|
|
|
script:
|
|
|
|
"../src/data/compute_day_segments.py"
|
|
|
|
|
2020-06-23 17:33:34 +02:00
|
|
|
PHONE_SENSORS = []
|
2020-08-28 19:53:00 +02:00
|
|
|
PHONE_SENSORS.extend([config["MESSAGES"]["DB_TABLE"], config["CALLS"]["DB_TABLE"], config["LOCATIONS"]["DB_TABLE"], config["BLUETOOTH"]["DB_TABLE"], config["BATTERY"]["DB_TABLE"], config["SCREEN"]["DB_TABLE"], config["LIGHT"]["DB_TABLE"], config["ACCELEROMETER"]["DB_TABLE"], config["APPLICATIONS_FOREGROUND"]["DB_TABLE"], config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]])
|
2020-08-12 00:02:52 +02:00
|
|
|
PHONE_SENSORS.extend(config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"])
|
2020-07-24 22:35:41 +02:00
|
|
|
|
|
|
|
if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0:
|
2020-07-27 02:11:25 +02:00
|
|
|
PHONE_SENSORS.append(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"])
|
2020-07-24 22:35:41 +02:00
|
|
|
if len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
|
2020-07-27 02:11:25 +02:00
|
|
|
PHONE_SENSORS.append(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])
|
2020-07-24 22:35:41 +02:00
|
|
|
|
2020-06-23 17:33:34 +02:00
|
|
|
|
2019-10-24 22:08:05 +02:00
|
|
|
rule readable_datetime:
|
|
|
|
input:
|
2020-07-23 03:54:19 +02:00
|
|
|
sensor_input = "data/raw/{pid}/{sensor}_raw.csv",
|
2020-08-28 19:53:00 +02:00
|
|
|
day_segments = "data/interim/day_segments.csv"
|
2019-10-24 22:08:05 +02:00
|
|
|
params:
|
|
|
|
timezones = None,
|
2020-08-26 18:09:53 +02:00
|
|
|
fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
2020-08-28 19:53:00 +02:00
|
|
|
day_segments_type = config["DAY_SEGMENTS"]["TYPE"]
|
2020-01-15 23:18:10 +01:00
|
|
|
wildcard_constraints:
|
2020-06-23 17:33:34 +02:00
|
|
|
sensor = '.*(' + '|'.join([re.escape(x) for x in PHONE_SENSORS]) + ').*' # only process smartphone sensors, not fitbit
|
2019-10-24 22:08:05 +02:00
|
|
|
output:
|
2020-07-23 18:00:51 +02:00
|
|
|
"data/raw/{pid}/{sensor}_with_datetime.csv"
|
2019-10-24 22:08:05 +02:00
|
|
|
script:
|
2019-11-05 18:34:22 +01:00
|
|
|
"../src/data/readable_datetime.R"
|
|
|
|
|
2020-07-09 19:01:50 +02:00
|
|
|
rule phone_sensed_bins:
|
2019-11-05 18:34:22 +01:00
|
|
|
input:
|
2020-08-12 00:02:52 +02:00
|
|
|
all_sensors = optional_phone_sensed_bins_input
|
2019-11-05 18:34:22 +01:00
|
|
|
params:
|
2020-07-09 19:01:50 +02:00
|
|
|
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
2019-11-05 18:34:22 +01:00
|
|
|
output:
|
2020-07-09 19:01:50 +02:00
|
|
|
"data/interim/{pid}/phone_sensed_bins.csv"
|
2019-11-05 18:34:22 +01:00
|
|
|
script:
|
2020-07-09 19:01:50 +02:00
|
|
|
"../src/data/phone_sensed_bins.R"
|
2019-11-12 20:57:27 +01:00
|
|
|
|
2020-07-09 19:01:50 +02:00
|
|
|
rule phone_valid_sensed_days:
|
2019-12-04 17:33:25 +01:00
|
|
|
input:
|
2020-07-09 19:01:50 +02:00
|
|
|
phone_sensed_bins = "data/interim/{pid}/phone_sensed_bins.csv"
|
2019-12-04 17:33:25 +01:00
|
|
|
params:
|
2020-07-22 02:41:24 +02:00
|
|
|
min_valid_hours_per_day = "{min_valid_hours_per_day}",
|
2020-07-24 18:58:48 +02:00
|
|
|
min_valid_bins_per_hour = "{min_valid_bins_per_hour}"
|
2019-12-04 17:33:25 +01:00
|
|
|
output:
|
2020-07-24 18:58:48 +02:00
|
|
|
"data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv"
|
2019-12-04 17:33:25 +01:00
|
|
|
script:
|
2020-07-09 19:01:50 +02:00
|
|
|
"../src/data/phone_valid_sensed_days.R"
|
|
|
|
|
2019-12-04 17:33:25 +01:00
|
|
|
|
2019-11-12 20:57:27 +01:00
|
|
|
rule unify_ios_android:
|
|
|
|
input:
|
|
|
|
sensor_data = "data/raw/{pid}/{sensor}_with_datetime.csv",
|
|
|
|
participant_info = "data/external/{pid}"
|
|
|
|
params:
|
2020-06-30 23:34:18 +02:00
|
|
|
sensor = "{sensor}",
|
2020-08-11 22:18:06 +02:00
|
|
|
unifiable_sensors = {"calls": config["CALLS"]["DB_TABLE"], "battery": config["BATTERY"]["DB_TABLE"], "screen": config["SCREEN"]["DB_TABLE"], "ios_activity_recognition": config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"], "ios_conversation": config["CONVERSATION"]["DB_TABLE"]["IOS"]}
|
2019-11-12 20:57:27 +01:00
|
|
|
output:
|
|
|
|
"data/raw/{pid}/{sensor}_with_datetime_unified.csv"
|
|
|
|
script:
|
2019-12-10 00:23:00 +01:00
|
|
|
"../src/data/unify_ios_android.R"
|
|
|
|
|
2020-08-28 19:53:00 +02:00
|
|
|
rule process_location_types:
|
2019-12-10 00:23:00 +01:00
|
|
|
input:
|
2020-08-28 19:53:00 +02:00
|
|
|
locations = "data/raw/{pid}/{sensor}_with_datetime.csv",
|
|
|
|
phone_sensed_bins = rules.phone_sensed_bins.output,
|
|
|
|
day_segments = "data/interim/day_segments.csv"
|
2019-12-10 00:23:00 +01:00
|
|
|
params:
|
2020-07-09 19:01:50 +02:00
|
|
|
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"],
|
2020-08-28 19:53:00 +02:00
|
|
|
timezone = config["LOCATIONS"]["TIMEZONE"],
|
|
|
|
consecutive_threshold = config["LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
|
|
|
|
time_since_valid_location = config["LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
|
|
|
|
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
|
|
|
locations_to_use = "{locations_to_used}"
|
2019-12-10 00:23:00 +01:00
|
|
|
output:
|
2020-08-28 19:53:00 +02:00
|
|
|
"data/raw/{pid}/{sensor}_processed_{locations_to_used}.csv"
|
2019-12-10 00:23:00 +01:00
|
|
|
script:
|
2020-08-28 19:53:00 +02:00
|
|
|
"../src/data/process_location_types.R"
|
2020-01-15 23:18:10 +01:00
|
|
|
|
2020-01-16 00:28:56 +01:00
|
|
|
rule application_genres:
|
|
|
|
input:
|
2020-06-23 17:33:34 +02:00
|
|
|
"data/raw/{pid}/{sensor}_with_datetime.csv"
|
2020-01-16 00:28:56 +01:00
|
|
|
params:
|
|
|
|
catalogue_source = config["APPLICATION_GENRES"]["CATALOGUE_SOURCE"],
|
|
|
|
catalogue_file = config["APPLICATION_GENRES"]["CATALOGUE_FILE"],
|
|
|
|
update_catalogue_file = config["APPLICATION_GENRES"]["UPDATE_CATALOGUE_FILE"],
|
|
|
|
scrape_missing_genres = config["APPLICATION_GENRES"]["SCRAPE_MISSING_GENRES"]
|
|
|
|
output:
|
2020-06-23 17:33:34 +02:00
|
|
|
"data/interim/{pid}/{sensor}_with_datetime_with_genre.csv"
|
2020-01-16 00:28:56 +01:00
|
|
|
script:
|
|
|
|
"../src/data/application_genres.R"
|
|
|
|
|
2020-06-23 17:33:34 +02:00
|
|
|
rule fitbit_heartrate_with_datetime:
|
2020-01-15 23:18:10 +01:00
|
|
|
input:
|
2020-06-23 17:33:34 +02:00
|
|
|
expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["HEARTRATE"]["DB_TABLE"])
|
2020-01-15 23:18:10 +01:00
|
|
|
params:
|
|
|
|
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
2020-06-23 17:33:34 +02:00
|
|
|
fitbit_sensor = "heartrate"
|
2020-01-15 23:18:10 +01:00
|
|
|
output:
|
2020-06-23 17:33:34 +02:00
|
|
|
summary_data = "data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv",
|
|
|
|
intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv"
|
2020-01-15 23:18:10 +01:00
|
|
|
script:
|
2020-01-16 23:20:04 +01:00
|
|
|
"../src/data/fitbit_readable_datetime.py"
|
2020-01-16 00:28:56 +01:00
|
|
|
|
2020-06-23 17:33:34 +02:00
|
|
|
rule fitbit_step_with_datetime:
|
|
|
|
input:
|
|
|
|
expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["STEP"]["DB_TABLE"])
|
|
|
|
params:
|
|
|
|
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
|
|
|
fitbit_sensor = "steps"
|
|
|
|
output:
|
|
|
|
intraday_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv"
|
|
|
|
script:
|
|
|
|
"../src/data/fitbit_readable_datetime.py"
|
|
|
|
|
|
|
|
rule fitbit_sleep_with_datetime:
|
|
|
|
input:
|
|
|
|
expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["SLEEP"]["DB_TABLE"])
|
|
|
|
params:
|
|
|
|
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
|
|
|
fitbit_sensor = "sleep"
|
|
|
|
output:
|
|
|
|
summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
|
|
|
|
intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
|
|
|
|
script:
|
|
|
|
"../src/data/fitbit_readable_datetime.py"
|
2020-09-01 00:51:06 +02:00
|
|
|
|
|
|
|
rule join_wifi_tables:
|
|
|
|
input:
|
|
|
|
unpack(optional_wifi_input)
|
|
|
|
output:
|
|
|
|
"data/raw/{pid}/wifi_with_datetime_visibleandconnected.csv"
|
|
|
|
script:
|
|
|
|
"../src/data/join_visible_and_connected_wifi.R"
|