Change the way we compute battery episodes
parent
f806cb44ac
commit
a851a460e4
|
@ -54,13 +54,13 @@ rule calls_r_features:
|
|||
script:
|
||||
"../src/features/calls/calls_entry.R"
|
||||
|
||||
rule battery_deltas:
|
||||
rule battery_episodes:
|
||||
input:
|
||||
expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["BATTERY"]["DB_TABLE"])
|
||||
expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor=config["BATTERY"]["DB_TABLE"])
|
||||
output:
|
||||
"data/processed/{pid}/battery_deltas.csv"
|
||||
"data/interim/{pid}/battery_episodes.csv"
|
||||
script:
|
||||
"../src/features/battery_deltas.R"
|
||||
"../src/features/battery/episodes/battery_episodes.R"
|
||||
|
||||
rule screen_episodes:
|
||||
input:
|
||||
|
@ -172,7 +172,7 @@ rule activity_features:
|
|||
|
||||
rule battery_features:
|
||||
input:
|
||||
"data/processed/{pid}/battery_deltas.csv"
|
||||
"data/interim/{pid}/battery_episodes.csv"
|
||||
params:
|
||||
day_segment = "{day_segment}",
|
||||
features = config["BATTERY"]["FEATURES"]
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
source("renv/activate.R")
|
||||
library("dplyr")
|
||||
|
||||
battery <- read.csv(snakemake@input[[1]])
|
||||
|
||||
if(nrow(battery) > 0){
|
||||
# TODO expose this in the config file
|
||||
threshold_between_rows = 30
|
||||
battery_episodes <- battery %>%
|
||||
filter(battery_status >= 2 ) %>% # discard unknown states
|
||||
mutate(start_timestamp = timestamp,
|
||||
end_timestamp = lead(start_timestamp) - 1,
|
||||
time_diff = (end_timestamp - start_timestamp) / 1000 / 60,
|
||||
time_diff = if_else(time_diff > threshold_between_rows, threshold_between_rows, time_diff),
|
||||
episode_id = 1:n()) %>%
|
||||
select(episode_id, start_timestamp, end_timestamp, battery_level)
|
||||
} else {
|
||||
battery_episodes <- data.frame(episode_id = numeric(),
|
||||
start_timestamp = numeric(),
|
||||
end_timestamp = character(),
|
||||
battery_level = character())
|
||||
}
|
||||
|
||||
write.csv(battery_episodes, snakemake@output[[1]], row.names = FALSE)
|
|
@ -1,34 +0,0 @@
|
|||
source("renv/activate.R")
|
||||
|
||||
library("tidyverse")
|
||||
|
||||
battery <- read.csv(snakemake@input[[1]])
|
||||
|
||||
if(nrow(battery) > 0){
|
||||
consumption <- battery %>%
|
||||
mutate(group = ifelse(lag(battery_status) != battery_status, 1, 0) %>% coalesce(0),
|
||||
group_id = cumsum(group) + 1) %>%
|
||||
filter(battery_status == 2 | battery_status == 3) %>%
|
||||
group_by(group_id) %>%
|
||||
summarize(battery_diff = first(battery_level) - last(battery_level),
|
||||
time_diff = (last(timestamp) - first(timestamp)) / (1000 * 60),
|
||||
local_start_date_time = first(local_date_time),
|
||||
local_end_date_time = last(local_date_time),
|
||||
local_start_date = first(local_date),
|
||||
local_end_date = last(local_date),
|
||||
local_start_day_segment = first(local_day_segment),
|
||||
local_end_day_segment = last(local_day_segment)) %>%
|
||||
select(-group_id) %>%
|
||||
filter(time_diff > 6) # Avoids including quick cycles
|
||||
} else {
|
||||
consumption <- data.frame(battery_diff = numeric(),
|
||||
time_diff = numeric(),
|
||||
local_start_date_time = character(),
|
||||
local_end_date_time = character(),
|
||||
local_start_date = character(),
|
||||
local_end_date = character(),
|
||||
local_start_day_segment = character(),
|
||||
local_end_day_segment = character())
|
||||
}
|
||||
|
||||
write.csv(consumption, snakemake@output[[1]], row.names = FALSE)
|
|
@ -1,18 +0,0 @@
|
|||
import pandas as pd
|
||||
import itertools
|
||||
from screen.screen_base import base_screen_features
|
||||
|
||||
screen_data = pd.read_csv(snakemake.input["screen_deltas"], parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"])
|
||||
phone_sensed_bins = pd.read_csv(snakemake.input["phone_sensed_bins"], parse_dates=["local_date"], index_col="local_date")
|
||||
phone_sensed_bins[phone_sensed_bins > 0] = 1
|
||||
day_segment = snakemake.params["day_segment"]
|
||||
screen_features = pd.DataFrame(columns=["local_date"])
|
||||
|
||||
requested_features_deltas = ["firstuseafter" + "{0:0=2d}".format(snakemake.params["reference_hour_first_use"]) if feature_name == "firstuseafter" else feature_name for feature_name in snakemake.params["features_deltas"]]
|
||||
requested_features = ["".join(feature) for feature in itertools.product(requested_features_deltas, snakemake.params["episode_types"])]
|
||||
|
||||
screen_features = screen_features.merge(base_screen_features(screen_data, phone_sensed_bins, day_segment, snakemake.params), on="local_date", how="outer")
|
||||
|
||||
assert len(requested_features) + 1 == screen_features.shape[1], "The number of features in the output dataframe (=" + str(screen_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your screen feature extraction functions"
|
||||
|
||||
screen_features.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue