rapids/rules/preprocessing.snakefile

68 lines
2.3 KiB
Plaintext
Raw Normal View History

2019-10-24 18:11:24 +02:00
rule download_dataset:
input:
"data/external/{pid}"
params:
group = config["DOWNLOAD_DATASET"]["GROUP"],
2019-10-24 22:08:05 +02:00
table = "{sensor}"
2019-10-24 18:11:24 +02:00
output:
2019-10-24 22:08:05 +02:00
"data/raw/{pid}/{sensor}_raw.csv"
2019-10-24 18:11:24 +02:00
script:
2019-10-24 22:08:05 +02:00
"../src/data/download_dataset.R"
rule readable_datetime:
input:
sensor_input = rules.download_dataset.output
params:
timezones = None,
fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"]
2019-10-24 22:08:05 +02:00
output:
"data/raw/{pid}/{sensor}_with_datetime.csv"
script:
2019-11-05 18:34:22 +01:00
"../src/data/readable_datetime.R"
rule phone_valid_sensed_days:
input:
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["SENSORS"])
params:
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"],
min_valid_hours = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS"],
min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_BINS_PER_HOUR"]
output:
"data/interim/{pid}/phone_valid_sensed_days.csv"
script:
"../src/data/phone_valid_sensed_days.R"
rule phone_sensed_bins:
input:
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["SENSORS"])
params:
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"]
output:
"data/interim/{pid}/phone_sensed_bins.csv"
script:
"../src/data/phone_sensed_bins.R"
rule unify_ios_android:
input:
sensor_data = "data/raw/{pid}/{sensor}_with_datetime.csv",
participant_info = "data/external/{pid}"
params:
sensor = "{sensor}"
output:
"data/raw/{pid}/{sensor}_with_datetime_unified.csv"
script:
2019-12-10 00:23:00 +01:00
"../src/data/unify_ios_android.R"
rule resample_fused_location:
input:
locations = "data/raw/{pid}/locations_raw.csv",
phone_sensed_bins = rules.phone_sensed_bins.output
params:
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"],
timezone = config["RESAMPLE_FUSED_LOCATION"]["TIMEZONE"],
consecutive_threshold = config["RESAMPLE_FUSED_LOCATION"]["CONSECUTIVE_THRESHOLD"],
time_since_valid_location = config["RESAMPLE_FUSED_LOCATION"]["TIME_SINCE_VALID_LOCATION"]
output:
"data/raw/{pid}/locations_resampled.csv"
script:
"../src/data/resample_fused_location.R"