- Fix bug when no phone data yield is needed to process location data
- Remove location rows with the same timestamp based on their accuracyfeature/aware_sql_split
parent
9b21196f35
commit
32472461ec
|
@ -4,6 +4,8 @@
|
||||||
- Fix bug that did not correctly parse participants with more than 2 phones or more than 1 wearable
|
- Fix bug that did not correctly parse participants with more than 2 phones or more than 1 wearable
|
||||||
- New keyboard features
|
- New keyboard features
|
||||||
- Add the `EXCLUDE_SLEEP` module for steps intraday features
|
- Add the `EXCLUDE_SLEEP` module for steps intraday features
|
||||||
|
- Fix bug when no phone data yield is needed to process location data
|
||||||
|
- Remove location rows with the same timestamp based on their accuracy
|
||||||
## v1.2.0
|
## v1.2.0
|
||||||
- Sleep summary and intraday features are more consistent.
|
- Sleep summary and intraday features are more consistent.
|
||||||
- Add wake and bedtime features for sleep summary data.
|
- Add wake and bedtime features for sleep summary data.
|
||||||
|
|
|
@ -11,6 +11,11 @@ def get_script_language(script_path):
|
||||||
|
|
||||||
|
|
||||||
# Features.smk #########################################################################################################
|
# Features.smk #########################################################################################################
|
||||||
|
def optional_phone_yield_input_for_locations(wildcards):
|
||||||
|
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] in ["ALL_RESAMPLED","FUSED_RESAMPLED"]:
|
||||||
|
return "data/interim/{pid}/phone_yielded_timestamps.csv"
|
||||||
|
return []
|
||||||
|
|
||||||
def get_barnett_daily(wildcards):
|
def get_barnett_daily(wildcards):
|
||||||
if wildcards.provider_key.upper() == "BARNETT":
|
if wildcards.provider_key.upper() == "BARNETT":
|
||||||
return "data/interim/{pid}/phone_locations_barnett_daily.csv"
|
return "data/interim/{pid}/phone_locations_barnett_daily.csv"
|
||||||
|
|
|
@ -94,7 +94,7 @@ rule unify_ios_android:
|
||||||
rule process_phone_locations_types:
|
rule process_phone_locations_types:
|
||||||
input:
|
input:
|
||||||
locations = "data/raw/{pid}/phone_locations_raw.csv",
|
locations = "data/raw/{pid}/phone_locations_raw.csv",
|
||||||
phone_sensed_timestamps = "data/interim/{pid}/phone_yielded_timestamps.csv",
|
phone_sensed_timestamps = optional_phone_yield_input_for_locations,
|
||||||
params:
|
params:
|
||||||
consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
|
consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
|
||||||
time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
|
time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
|
||||||
|
|
|
@ -7,10 +7,13 @@ consecutive_threshold <- snakemake@params[["consecutive_threshold"]]
|
||||||
time_since_valid_location <- snakemake@params[["time_since_valid_location"]]
|
time_since_valid_location <- snakemake@params[["time_since_valid_location"]]
|
||||||
locations_to_use <- snakemake@params[["locations_to_use"]]
|
locations_to_use <- snakemake@params[["locations_to_use"]]
|
||||||
|
|
||||||
phone_sensed_timestamps <- read_csv(snakemake@input[["phone_sensed_timestamps"]], col_types = cols_only(timestamp = col_double()))
|
|
||||||
locations <- read.csv(snakemake@input[["locations"]]) %>%
|
locations <- read.csv(snakemake@input[["locations"]]) %>%
|
||||||
filter(double_latitude != 0 & double_longitude != 0) %>%
|
filter(double_latitude != 0 & double_longitude != 0) %>%
|
||||||
drop_na(double_longitude, double_latitude)
|
drop_na(double_longitude, double_latitude) %>%
|
||||||
|
group_by(timestamp) %>% # keep only the row with the best accuracy if two or more have the same timestamp
|
||||||
|
filter(accuracy == min(accuracy, na.rm=TRUE)) %>%
|
||||||
|
filter(row_number()==1) %>%
|
||||||
|
ungroup()
|
||||||
|
|
||||||
if(!locations_to_use %in% c("ALL", "FUSED_RESAMPLED", "GPS", "ALL_RESAMPLED")){
|
if(!locations_to_use %in% c("ALL", "FUSED_RESAMPLED", "GPS", "ALL_RESAMPLED")){
|
||||||
print("Unkown location filter, provide one of the following three: ALL, GPS, ALL_RESAMPLED, or FUSED_RESAMPLED")
|
print("Unkown location filter, provide one of the following three: ALL, GPS, ALL_RESAMPLED, or FUSED_RESAMPLED")
|
||||||
|
@ -39,6 +42,8 @@ if(locations_to_use == "ALL"){
|
||||||
}
|
}
|
||||||
|
|
||||||
if(nrow(locations) > 0){
|
if(nrow(locations) > 0){
|
||||||
|
phone_sensed_timestamps <- read_csv(snakemake@input[["phone_sensed_timestamps"]], col_types = cols_only(timestamp = col_double()))
|
||||||
|
|
||||||
processed_locations <- locations %>%
|
processed_locations <- locations %>%
|
||||||
distinct(timestamp, .keep_all = TRUE) %>%
|
distinct(timestamp, .keep_all = TRUE) %>%
|
||||||
bind_rows(phone_sensed_timestamps) %>%
|
bind_rows(phone_sensed_timestamps) %>%
|
||||||
|
|
Loading…
Reference in New Issue