- Fix bug when no phone data yield is needed to process location data

- Remove location rows with the same timestamp based on their accuracy
feature/aware_sql_split
JulioV 2021-05-26 14:04:29 -04:00
parent 9b21196f35
commit 32472461ec
4 changed files with 15 additions and 3 deletions

View File

@ -4,6 +4,8 @@
- Fix bug that did not correctly parse participants with more than 2 phones or more than 1 wearable
- New keyboard features
- Add the `EXCLUDE_SLEEP` module for steps intraday features
- Fix bug when no phone data yield is needed to process location data
- Remove location rows with the same timestamp based on their accuracy
## v1.2.0
- Sleep summary and intraday features are more consistent.
- Add wake and bedtime features for sleep summary data.

View File

@ -11,6 +11,11 @@ def get_script_language(script_path):
# Features.smk #########################################################################################################
def optional_phone_yield_input_for_locations(wildcards):
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] in ["ALL_RESAMPLED","FUSED_RESAMPLED"]:
return "data/interim/{pid}/phone_yielded_timestamps.csv"
return []
def get_barnett_daily(wildcards):
if wildcards.provider_key.upper() == "BARNETT":
return "data/interim/{pid}/phone_locations_barnett_daily.csv"

View File

@ -94,7 +94,7 @@ rule unify_ios_android:
rule process_phone_locations_types:
input:
locations = "data/raw/{pid}/phone_locations_raw.csv",
phone_sensed_timestamps = "data/interim/{pid}/phone_yielded_timestamps.csv",
phone_sensed_timestamps = optional_phone_yield_input_for_locations,
params:
consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],

View File

@ -7,10 +7,13 @@ consecutive_threshold <- snakemake@params[["consecutive_threshold"]]
time_since_valid_location <- snakemake@params[["time_since_valid_location"]]
locations_to_use <- snakemake@params[["locations_to_use"]]
phone_sensed_timestamps <- read_csv(snakemake@input[["phone_sensed_timestamps"]], col_types = cols_only(timestamp = col_double()))
locations <- read.csv(snakemake@input[["locations"]]) %>%
filter(double_latitude != 0 & double_longitude != 0) %>%
drop_na(double_longitude, double_latitude)
drop_na(double_longitude, double_latitude) %>%
group_by(timestamp) %>% # keep only the row with the best accuracy if two or more have the same timestamp
filter(accuracy == min(accuracy, na.rm=TRUE)) %>%
filter(row_number()==1) %>%
ungroup()
if(!locations_to_use %in% c("ALL", "FUSED_RESAMPLED", "GPS", "ALL_RESAMPLED")){
print("Unkown location filter, provide one of the following three: ALL, GPS, ALL_RESAMPLED, or FUSED_RESAMPLED")
@ -39,6 +42,8 @@ if(locations_to_use == "ALL"){
}
if(nrow(locations) > 0){
phone_sensed_timestamps <- read_csv(snakemake@input[["phone_sensed_timestamps"]], col_types = cols_only(timestamp = col_double()))
processed_locations <- locations %>%
distinct(timestamp, .keep_all = TRUE) %>%
bind_rows(phone_sensed_timestamps) %>%