Add phone yield sensor
parent
555811211d
commit
f02ca2624d
53
Snakefile
53
Snakefile
|
@ -12,20 +12,13 @@ files_to_compute = []
|
|||
if len(config["PIDS"]) == 0:
|
||||
raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")
|
||||
|
||||
if config["PHONE_VALID_SENSED_BINS"]["COMPUTE"] or config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]: # valid sensed bins is necessary for sensed days, so we add these files anyways if sensed days are requested
|
||||
if len(config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]) == 0:
|
||||
raise ValueError("If you want to compute PHONE_VALID_SENSED_BINS or PHONE_VALID_SENSED_DAYS, you need to add at least one PHONE_SENSOR to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml")
|
||||
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"])))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"])))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_timestamps.csv", pid=config["PIDS"]))
|
||||
|
||||
if config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv",
|
||||
pid=config["PIDS"],
|
||||
min_valid_hours_per_day=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"],
|
||||
min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
for provider in config["PHONE_DATA_YIELD"]["PROVIDERS"].keys():
|
||||
if config["PHONE_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_DATA_YIELD"]["SENSORS"])))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_data_yield_features/phone_data_yield_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_DATA_YIELD"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_data_yield.csv", pid=config["PIDS"]))
|
||||
|
||||
for provider in config["PHONE_MESSAGES"]["PROVIDERS"].keys():
|
||||
if config["PHONE_MESSAGES"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
@ -73,10 +66,10 @@ for provider in config["PHONE_BATTERY"]["PROVIDERS"].keys():
|
|||
|
||||
for provider in config["PHONE_SCREEN"]["PROVIDERS"].keys():
|
||||
if config["PHONE_SCREEN"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
if "PHONE_SCREEN" in config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]:
|
||||
if "PHONE_SCREEN" in config["PHONE_DATA_YIELD"]["SENSORS"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||
else:
|
||||
raise ValueError("Error: Add PHONE_SCREEN (and as many phone sensor as you have in your database) to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
||||
raise ValueError("Error: Add PHONE_SCREEN (and as many phone sensor as you have in your database) to [PHONE_DATA_YIELD][SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime_unified.csv", pid=config["PIDS"]))
|
||||
|
@ -133,10 +126,10 @@ for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
|
|||
for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
||||
if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "FUSED_RESAMPLED":
|
||||
if "PHONE_LOCATIONS" in config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]:
|
||||
if "PHONE_LOCATIONS" in config["PHONE_DATA_YIELD"]["SENSORS"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||
else:
|
||||
raise ValueError("Error: Add PHONE_LOCATIONS (and as many PHONE_SENSORS as you have) to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
|
||||
raise ValueError("Error: Add PHONE_LOCATIONS (and as many SENSORS as you have) to [PHONE_DATA_YIELD][SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
|
||||
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"]))
|
||||
|
@ -200,22 +193,22 @@ for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||
|
||||
# visualization for data exploration
|
||||
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
# if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
||||
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
|
||||
if config["HISTOGRAM_VALID_SENSED_HOURS"]["PLOT"]:
|
||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/histogram_valid_sensed_hours.html", min_valid_hours_per_day=config["HISTOGRAM_VALID_SENSED_HOURS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
# if config["HISTOGRAM_VALID_SENSED_HOURS"]["PLOT"]:
|
||||
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/histogram_valid_sensed_hours.html", min_valid_hours_per_day=config["HISTOGRAM_VALID_SENSED_HOURS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
|
||||
if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]:
|
||||
files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
# if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]:
|
||||
# files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
|
||||
if config["HEATMAP_SENSED_BINS"]["PLOT"]:
|
||||
files_to_compute.extend(expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"]))
|
||||
files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
|
||||
# if config["HEATMAP_SENSED_BINS"]["PLOT"]:
|
||||
# files_to_compute.extend(expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"]))
|
||||
# files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
|
||||
|
||||
if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
|
||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
# if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
|
||||
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||
|
||||
|
||||
rule all:
|
||||
|
|
49
config.yaml
49
config.yaml
|
@ -27,8 +27,8 @@ CREATE_PARTICIPANT_FILES:
|
|||
|
||||
# See https://www.rapids.science/setup/configuration/#day-segments
|
||||
DAY_SEGMENTS: &day_segments
|
||||
TYPE: EVENT # FREQUENCY, PERIODIC, EVENT
|
||||
FILE: "data/external/daysegments_event.csv"
|
||||
TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT
|
||||
FILE: "data/external/daysegments_periodic.csv"
|
||||
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs
|
||||
|
||||
# See https://www.rapids.science/setup/configuration/#device-data-source-configuration
|
||||
|
@ -54,27 +54,22 @@ DEVICE_DATA:
|
|||
############## PHONE ###########################################################
|
||||
################################################################################
|
||||
|
||||
PHONE_VALID_SENSED_BINS:
|
||||
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
||||
BIN_SIZE: &bin_size 5 # (in minutes)
|
||||
# Add as many PHONE sensors as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
|
||||
# If you are extracting screen or Barnett/Doryab location features, PHONE_SCREEN and PHONE_LOCATIONS tables are mandatory.
|
||||
# You can choose any of the keys shown below, just make sure its TABLE exists in your database!
|
||||
# PHONE_MESSAGES, PHONE_CALLS, PHONE_LOCATIONS, PHONE_BLUETOOTH, PHONE_ACTIVITY_RECOGNITION, PHONE_BATTERY, PHONE_SCREEN, PHONE_LIGHT,
|
||||
# PHONE_ACCELEROMETER, PHONE_APPLICATIONS_FOREGROUND, PHONE_WIFI_VISIBLE, PHONE_WIFI_CONNECTED, PHONE_CONVERSATION
|
||||
PHONE_SENSORS: []
|
||||
|
||||
PHONE_VALID_SENSED_DAYS:
|
||||
COMPUTE: False
|
||||
MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour [6] # (out of 60min/BIN_SIZE bins)
|
||||
MIN_VALID_HOURS_PER_DAY: &min_valid_hours_per_day [16] # (out of 24) MIN_HOURS_PER_DAY
|
||||
PHONE_DATA_YIELD:
|
||||
SENSORS: [PHONE_MESSAGES, PHONE_CALLS, PHONE_ACCELEROMETER]
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours]
|
||||
MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1 representing the number of minutes with at least
|
||||
SRC_LANGUAGE: "r"
|
||||
SRC_FOLDER: "rapids" # inside src/features/phone_data_yield
|
||||
|
||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||
PHONE_MESSAGES:
|
||||
TABLE: messages
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: True
|
||||
COMPUTE: False
|
||||
MESSAGES_TYPES : [received, sent]
|
||||
FEATURES:
|
||||
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
|
||||
|
@ -329,8 +324,8 @@ FITBIT_CALORIES:
|
|||
HEATMAP_FEATURES_CORRELATIONS:
|
||||
PLOT: False
|
||||
MIN_ROWS_RATIO: 0.5
|
||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
|
||||
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
|
||||
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
|
||||
PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
|
||||
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
|
||||
CORR_THRESHOLD: 0.1
|
||||
|
@ -338,25 +333,25 @@ HEATMAP_FEATURES_CORRELATIONS:
|
|||
|
||||
HISTOGRAM_VALID_SENSED_HOURS:
|
||||
PLOT: False
|
||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
|
||||
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
|
||||
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
|
||||
|
||||
HEATMAP_DAYS_BY_SENSORS:
|
||||
PLOT: False
|
||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
|
||||
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
|
||||
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
|
||||
EXPECTED_NUM_OF_DAYS: -1
|
||||
DB_TABLES: [accelerometer, applications_foreground, battery, bluetooth, calls, light, locations, messages, screen, wifi, sensor_wifi, plugin_google_activity_recognition, plugin_ios_activity_recognition, plugin_studentlife_audio_android, plugin_studentlife_audio]
|
||||
|
||||
HEATMAP_SENSED_BINS:
|
||||
PLOT: False
|
||||
BIN_SIZE: *bin_size
|
||||
BIN_SIZE: #*bin_size
|
||||
|
||||
OVERALL_COMPLIANCE_HEATMAP:
|
||||
PLOT: False
|
||||
ONLY_SHOW_VALID_DAYS: False
|
||||
EXPECTED_NUM_OF_DAYS: -1
|
||||
BIN_SIZE: *bin_size
|
||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
||||
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
|
||||
BIN_SIZE: #*bin_size
|
||||
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
|
||||
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
|
||||
|
||||
|
|
|
@ -6,6 +6,32 @@ rule join_features_from_providers:
|
|||
script:
|
||||
"../src/features/join_features_from_providers.R"
|
||||
|
||||
rule phone_data_yield_python_features:
|
||||
input:
|
||||
sensor_data = "data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["PHONE_DATA_YIELD"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "phone_data_yield"
|
||||
output:
|
||||
"data/interim/{pid}/phone_data_yield_features/phone_data_yield_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule phone_data_yield_r_features:
|
||||
input:
|
||||
sensor_data = "data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["PHONE_DATA_YIELD"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "phone_data_yield"
|
||||
output:
|
||||
"data/interim/{pid}/phone_data_yield_features/phone_data_yield_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule phone_accelerometer_python_features:
|
||||
input:
|
||||
sensor_data = "data/raw/{pid}/phone_accelerometer_with_datetime.csv",
|
||||
|
|
|
@ -77,23 +77,27 @@ rule phone_readable_datetime:
|
|||
script:
|
||||
"../src/data/readable_datetime.R"
|
||||
|
||||
rule phone_sensed_bins:
|
||||
rule phone_yielded_timestamps:
|
||||
input:
|
||||
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor = map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]))
|
||||
params:
|
||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
||||
all_sensors = expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor = map(str.lower, config["PHONE_DATA_YIELD"]["SENSORS"]))
|
||||
output:
|
||||
"data/interim/{pid}/phone_sensed_bins.csv"
|
||||
"data/interim/{pid}/phone_yielded_timestamps.csv"
|
||||
script:
|
||||
"../src/data/phone_sensed_bins.R"
|
||||
"../src/data/phone_yielded_timestamps.R"
|
||||
|
||||
rule phone_sensed_timestamps:
|
||||
rule phone_yielded_timestamps_with_datetime:
|
||||
input:
|
||||
all_sensors = expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor = map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]))
|
||||
sensor_input = "data/interim/{pid}/phone_yielded_timestamps.csv",
|
||||
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
||||
params:
|
||||
timezones = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["TYPE"],
|
||||
fixed_timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
|
||||
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
||||
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
||||
output:
|
||||
"data/interim/{pid}/phone_sensed_timestamps.csv"
|
||||
"data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv"
|
||||
script:
|
||||
"../src/data/phone_sensed_timestamps.R"
|
||||
"../src/data/readable_datetime.R"
|
||||
|
||||
rule phone_valid_sensed_days:
|
||||
input:
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
library("dplyr", warn.conflicts = F)
|
||||
library(tidyr)
|
||||
library(readr)
|
||||
|
||||
compute_data_yield_features <- function(data, feature_name, day_segment, provider){
|
||||
data <- data %>% filter_data_by_segment(day_segment)
|
||||
features <- data %>%
|
||||
separate(timestamps_segment, into = c("start_timestamp", "end_timestamp"), convert = T, sep = ",") %>%
|
||||
mutate(duration_minutes = (end_timestamp - start_timestamp) / 60000,
|
||||
timestamp_since_segment_start = timestamp - start_timestamp,
|
||||
minute_bin = timestamp_since_segment_start %/% 60000, # 60 * 1000
|
||||
hour_bin = timestamp_since_segment_start %/% 3600000) %>% # (60 * 60 * 1000)
|
||||
group_by(local_segment, hour_bin) %>%
|
||||
summarise(minute_count = n_distinct(minute_bin),
|
||||
duration_minutes = first(duration_minutes),
|
||||
valid_hour = (minute_count/min(duration_minutes, 60)) > provider$MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS) %>%
|
||||
group_by(local_segment) %>%
|
||||
summarise(valid_yielded_minutes = sum(minute_count),
|
||||
valid_yielded_hours = sum(valid_hour == TRUE) / 1.0,
|
||||
duration_minutes = first(duration_minutes),
|
||||
duration_hours = duration_minutes / 60.0,
|
||||
phone_data_yield_rapids_ratiovalidyieldedminutes = valid_yielded_minutes / duration_minutes,
|
||||
phone_data_yield_rapids_ratiovalidyieldedhours = if_else(duration_hours > 1, valid_yielded_hours / duration_hours, valid_yielded_hours))
|
||||
return(features)
|
||||
}
|
||||
|
||||
|
||||
|
||||
rapids_features <- function(sensor_data_files, day_segment, provider){
|
||||
|
||||
yield_data <- read_csv(sensor_data_files[["sensor_data"]], col_types = cols_only(timestamp ="d", assigned_segments = "c"))
|
||||
requested_features <- provider[["FEATURES"]]
|
||||
|
||||
# Output dataframe
|
||||
features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
|
||||
|
||||
# The name of the features this function can compute
|
||||
base_features_names <- c("ratiovalidyieldedminutes", "ratiovalidyieldedhours")
|
||||
|
||||
# The subset of requested features this function can compute
|
||||
features_to_compute <- intersect(base_features_names, requested_features)
|
||||
|
||||
features <- compute_data_yield_features(yield_data, feature_name, day_segment, provider) %>%
|
||||
select(c("local_segment", paste0("phone_data_yield_rapids_", features_to_compute)))
|
||||
|
||||
return(features)
|
||||
}
|
Loading…
Reference in New Issue