- Fix bug when no phone data yield is needed to process location data
- Remove location rows with the same timestamp based on their accuracyfeature/aware_sql_split
parent
9b21196f35
commit
32472461ec
|
@ -4,6 +4,8 @@
|
|||
- Fix bug that did not correctly parse participants with more than 2 phones or more than 1 wearable
|
||||
- New keyboard features
|
||||
- Add the `EXCLUDE_SLEEP` module for steps intraday features
|
||||
- Fix bug when no phone data yield is needed to process location data
|
||||
- Remove location rows with the same timestamp based on their accuracy
|
||||
## v1.2.0
|
||||
- Sleep summary and intraday features are more consistent.
|
||||
- Add wake and bedtime features for sleep summary data.
|
||||
|
|
|
@ -11,6 +11,11 @@ def get_script_language(script_path):
|
|||
|
||||
|
||||
# Features.smk #########################################################################################################
|
||||
def optional_phone_yield_input_for_locations(wildcards):
|
||||
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] in ["ALL_RESAMPLED","FUSED_RESAMPLED"]:
|
||||
return "data/interim/{pid}/phone_yielded_timestamps.csv"
|
||||
return []
|
||||
|
||||
def get_barnett_daily(wildcards):
|
||||
if wildcards.provider_key.upper() == "BARNETT":
|
||||
return "data/interim/{pid}/phone_locations_barnett_daily.csv"
|
||||
|
|
|
@ -94,7 +94,7 @@ rule unify_ios_android:
|
|||
rule process_phone_locations_types:
|
||||
input:
|
||||
locations = "data/raw/{pid}/phone_locations_raw.csv",
|
||||
phone_sensed_timestamps = "data/interim/{pid}/phone_yielded_timestamps.csv",
|
||||
phone_sensed_timestamps = optional_phone_yield_input_for_locations,
|
||||
params:
|
||||
consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
|
||||
time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
|
||||
|
|
|
@ -7,10 +7,13 @@ consecutive_threshold <- snakemake@params[["consecutive_threshold"]]
|
|||
time_since_valid_location <- snakemake@params[["time_since_valid_location"]]
|
||||
locations_to_use <- snakemake@params[["locations_to_use"]]
|
||||
|
||||
phone_sensed_timestamps <- read_csv(snakemake@input[["phone_sensed_timestamps"]], col_types = cols_only(timestamp = col_double()))
|
||||
locations <- read.csv(snakemake@input[["locations"]]) %>%
|
||||
filter(double_latitude != 0 & double_longitude != 0) %>%
|
||||
drop_na(double_longitude, double_latitude)
|
||||
drop_na(double_longitude, double_latitude) %>%
|
||||
group_by(timestamp) %>% # keep only the row with the best accuracy if two or more have the same timestamp
|
||||
filter(accuracy == min(accuracy, na.rm=TRUE)) %>%
|
||||
filter(row_number()==1) %>%
|
||||
ungroup()
|
||||
|
||||
if(!locations_to_use %in% c("ALL", "FUSED_RESAMPLED", "GPS", "ALL_RESAMPLED")){
|
||||
print("Unkown location filter, provide one of the following three: ALL, GPS, ALL_RESAMPLED, or FUSED_RESAMPLED")
|
||||
|
@ -39,6 +42,8 @@ if(locations_to_use == "ALL"){
|
|||
}
|
||||
|
||||
if(nrow(locations) > 0){
|
||||
phone_sensed_timestamps <- read_csv(snakemake@input[["phone_sensed_timestamps"]], col_types = cols_only(timestamp = col_double()))
|
||||
|
||||
processed_locations <- locations %>%
|
||||
distinct(timestamp, .keep_all = TRUE) %>%
|
||||
bind_rows(phone_sensed_timestamps) %>%
|
||||
|
|
Loading…
Reference in New Issue