Add valid sensed days
parent
6eb7bc9e70
commit
cca1633728
|
@ -8,6 +8,7 @@ rule all:
|
||||||
expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||||
expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||||
expand("data/processed/{pid}/battery_deltas.csv", pid=config["PIDS"]),
|
expand("data/processed/{pid}/battery_deltas.csv", pid=config["PIDS"]),
|
||||||
|
expand("data/interim/{pid}/phone_valid_sensed_days.csv", pid=config["PIDS"]),
|
||||||
expand("data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv",
|
expand("data/processed/{pid}/com_sms_{sms_type}_{day_segment}_{metric}.csv",
|
||||||
pid=config["PIDS"],
|
pid=config["PIDS"],
|
||||||
sms_type = config["COM_SMS"]["SMS_TYPES"],
|
sms_type = config["COM_SMS"]["SMS_TYPES"],
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# Valid database table names
|
# Valid database table names
|
||||||
SENSORS: [messages, calls, battery, screen]
|
SENSORS: [applications_crashes, applications_foreground, applications_notifications, battery, bluetooth, calls, locations, messages, plugin_ambient_noise, plugin_device_usage, plugin_google_activity_recognition, screen]
|
||||||
|
|
||||||
# Participants to include in the analysis
|
# Participants to include in the analysis
|
||||||
# You must create a file for each participant
|
# You must create a file for each participant
|
||||||
|
@ -31,4 +31,9 @@ COM_CALL:
|
||||||
CALL_TYPE_TAKEN : [incoming, outgoing]
|
CALL_TYPE_TAKEN : [incoming, outgoing]
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
METRICS_MISSED: [count, distinctcontacts]
|
METRICS_MISSED: [count, distinctcontacts]
|
||||||
METRICS_TAKEN: [count, distinctcontacts, meanduration, sumduration, hubermduration, varqnduration, entropyduration]
|
METRICS_TAKEN: [count, distinctcontacts, meanduration, sumduration, hubermduration, varqnduration, entropyduration]
|
||||||
|
|
||||||
|
PHONE_VALID_SENSED_DAYS:
|
||||||
|
BIN_SIZE: 5 # (in minutes)
|
||||||
|
MIN_VALID_HOURS: 20 # (out of 24)
|
||||||
|
MIN_BINS_PER_HOUR: 8 # (out of 60min/BIN_SIZE bins)
|
|
@ -18,4 +18,16 @@ rule readable_datetime:
|
||||||
output:
|
output:
|
||||||
"data/raw/{pid}/{sensor}_with_datetime.csv"
|
"data/raw/{pid}/{sensor}_with_datetime.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/readable_datetime.R"
|
"../src/data/readable_datetime.R"
|
||||||
|
|
||||||
|
rule phone_valid_sensed_days:
|
||||||
|
input:
|
||||||
|
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["SENSORS"])
|
||||||
|
params:
|
||||||
|
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"],
|
||||||
|
min_valid_hours = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS"],
|
||||||
|
min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_BINS_PER_HOUR"]
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/phone_valid_sensed_days.csv"
|
||||||
|
script:
|
||||||
|
"../src/data/phone_valid_sensed_days.R"
|
|
@ -0,0 +1,31 @@
|
||||||
|
source("packrat/init.R")
|
||||||
|
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
all_sensors <- snakemake@input[["all_sensors"]]
|
||||||
|
bin_size <- snakemake@params[["bin_size"]]
|
||||||
|
min_valid_hours <- snakemake@params[["min_valid_hours"]]
|
||||||
|
min_bins_per_hour <- snakemake@params[["min_bins_per_hour"]]
|
||||||
|
output_file <- snakemake@output[[1]]
|
||||||
|
|
||||||
|
# Load all sensors and extract timestamps
|
||||||
|
all_sensor_data <- data.frame(timestamp = c())
|
||||||
|
for(sensor in all_sensors){
|
||||||
|
sensor_data <- read.csv(sensor, stringsAsFactors = F) %>% select(local_date, local_hour, local_minute)
|
||||||
|
all_sensor_data <- rbind(all_sensor_data, sensor_data)
|
||||||
|
}
|
||||||
|
|
||||||
|
phone_valid_sensed_days <- all_sensor_data %>%
|
||||||
|
mutate(bin = (local_minute %/% bin_size) * bin_size) %>% # bin rows into bin_size-minute bins
|
||||||
|
group_by(local_date, local_hour, bin) %>%
|
||||||
|
summarise(minute_period = first(bin)) %>% #filter repeated bins (if rows were logged within bin_size minutes)
|
||||||
|
ungroup() %>%
|
||||||
|
group_by(local_date, local_hour) %>%
|
||||||
|
summarise(bins = n()) %>% # Count how many bins there are per hour
|
||||||
|
ungroup() %>%
|
||||||
|
filter(bins >= min_bins_per_hour) %>% # Discard those hours where there were fewer than min_bins_per_hour
|
||||||
|
group_by(local_date) %>%
|
||||||
|
summarise(valid_hours = n()) %>% # Count how many valid hours each day has
|
||||||
|
filter(valid_hours >= min_valid_hours) # Discard those days where there were fewer than min_valid_hours
|
||||||
|
|
||||||
|
write.csv(phone_valid_sensed_days, output_file)
|
Loading…
Reference in New Issue