Add phone sensed bins (sensor row count for every n min bin)
parent
dbe2e236a9
commit
15a9e33728
|
@ -11,6 +11,7 @@ rule all:
|
||||||
expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]),
|
expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]),
|
||||||
expand("data/processed/{pid}/google_activity_recognition_deltas.csv", pid=config["PIDS"]),
|
expand("data/processed/{pid}/google_activity_recognition_deltas.csv", pid=config["PIDS"]),
|
||||||
expand("data/interim/{pid}/phone_valid_sensed_days.csv", pid=config["PIDS"]),
|
expand("data/interim/{pid}/phone_valid_sensed_days.csv", pid=config["PIDS"]),
|
||||||
|
expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
|
||||||
expand("data/processed/{pid}/sms_{sms_type}_{day_segment}.csv",
|
expand("data/processed/{pid}/sms_{sms_type}_{day_segment}.csv",
|
||||||
pid=config["PIDS"],
|
pid=config["PIDS"],
|
||||||
sms_type = config["SMS"]["TYPES"],
|
sms_type = config["SMS"]["TYPES"],
|
||||||
|
|
|
@ -32,6 +32,16 @@ rule phone_valid_sensed_days:
|
||||||
script:
|
script:
|
||||||
"../src/data/phone_valid_sensed_days.R"
|
"../src/data/phone_valid_sensed_days.R"
|
||||||
|
|
||||||
|
rule phone_sensed_bins:
|
||||||
|
input:
|
||||||
|
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["SENSORS"])
|
||||||
|
params:
|
||||||
|
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"]
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/phone_sensed_bins.csv"
|
||||||
|
script:
|
||||||
|
"../src/data/phone_sensed_bins.R"
|
||||||
|
|
||||||
rule unify_ios_android:
|
rule unify_ios_android:
|
||||||
input:
|
input:
|
||||||
sensor_data = "data/raw/{pid}/{sensor}_with_datetime.csv",
|
sensor_data = "data/raw/{pid}/{sensor}_with_datetime.csv",
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
source("packrat/init.R")
|
||||||
|
|
||||||
|
library(dplyr)
|
||||||
|
library(tidyr)
|
||||||
|
|
||||||
|
all_sensors <- snakemake@input[["all_sensors"]]
|
||||||
|
bin_size <- snakemake@params[["bin_size"]]
|
||||||
|
output_file <- snakemake@output[[1]]
|
||||||
|
|
||||||
|
# Load all sensors and extract timestamps
|
||||||
|
all_sensor_data <- data.frame(timestamp = c())
|
||||||
|
for(sensor in all_sensors){
|
||||||
|
sensor_data <- read.csv(sensor, stringsAsFactors = F) %>%
|
||||||
|
select(local_date, local_hour, local_minute) %>%
|
||||||
|
mutate(sensor = basename(sensor))
|
||||||
|
all_sensor_data <- rbind(all_sensor_data, sensor_data)
|
||||||
|
}
|
||||||
|
|
||||||
|
phone_sensed_bins <- all_sensor_data %>%
|
||||||
|
mutate(bin = (local_minute %/% bin_size) * bin_size) %>% # bin rows into bin_size-minute bins
|
||||||
|
group_by(local_date, local_hour, bin) %>%
|
||||||
|
summarise(sensor_count = n_distinct(sensor)) %>%
|
||||||
|
ungroup() %>%
|
||||||
|
complete(nesting(local_date),
|
||||||
|
local_hour = seq(0, 23, 1),
|
||||||
|
bin = seq(0, (59 %/% bin_size) * bin_size, bin_size),
|
||||||
|
fill = list(sensor_count=0)) %>%
|
||||||
|
pivot_wider(names_from = c(local_hour, bin), values_from = sensor_count)
|
||||||
|
|
||||||
|
write.csv(phone_sensed_bins, output_file)
|
||||||
|
|
Loading…
Reference in New Issue