Refactor location_barnett features: replace "metrics" with "features"
Co-authored-by: Meng Li <AnnieLM1996@gmail.com>pull/95/head
parent
fa67359b5f
commit
2d7d3bfccf
|
@ -69,7 +69,7 @@ RESAMPLE_FUSED_LOCATION:
|
|||
|
||||
BARNETT_LOCATION:
|
||||
DAY_SEGMENTS: [daily] # These metrics are only available on a daily basis
|
||||
METRICS: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
|
||||
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
|
||||
LOCATIONS_TO_USE: ALL # ALL, ALL_EXCEPT_FUSED OR RESAMPLE_FUSED
|
||||
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
TIMEZONE: *timezone
|
||||
|
|
|
@ -759,7 +759,7 @@ stdlux lux The standard deviation of ambient luminance in lux u
|
|||
Location (Barnett’s) Features
|
||||
""""""""""""""""""""""""""""""
|
||||
Barnett’s location features are based on the concept of flights and pauses. GPS coordinates are converted into a
|
||||
sequence of flights (straight line movements) and pauses (time spent stationary). Data is imputed before metrics
|
||||
sequence of flights (straight line movements) and pauses (time spent stationary). Data is imputed before features
|
||||
are computed (https://arxiv.org/abs/1606.06328)
|
||||
|
||||
See `Location (Barnett’s) Config Code`_
|
||||
|
@ -779,7 +779,7 @@ See `Location (Barnett’s) Config Code`_
|
|||
|
||||
.. - Apply readable dateime to Sensor dataset: ``expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),``
|
||||
|
||||
- Extract Sensor Metrics: ``expand("data/processed/{pid}/location_barnett.csv", pid=config["PIDS"]),``
|
||||
- Extract Sensor Features: ``expand("data/processed/{pid}/location_barnett.csv", pid=config["PIDS"]),``
|
||||
|
||||
**Rule Chain:**
|
||||
|
||||
|
@ -799,9 +799,9 @@ See `Location (Barnett’s) Config Code`_
|
|||
|
||||
- **Script:** ``src/data/resample_fused_location.R`` - See the resample_fused_location.R_ script.
|
||||
|
||||
- **Rule:** ``rules/features.snakefile/location_barnett_metrics`` - See the location_barnett_metrics_ rule.
|
||||
- **Rule:** ``rules/features.snakefile/location_barnett_features`` - See the location_barnett_features_ rule.
|
||||
|
||||
- **Script:** ``src/features/location_barnett_metrics.R`` - See the location_barnett_metrics.R_ script.
|
||||
- **Script:** ``src/features/location_barnett_features.R`` - See the location_barnett_features.R_ script.
|
||||
|
||||
|
||||
.. _location-parameters:
|
||||
|
@ -814,14 +814,14 @@ Name Description
|
|||
location_to_use The specifies which of the location data will be use in the analysis. Possible options are ``ALL``, ``ALL_EXCEPT_FUSED`` OR ``RESAMPLE_FUSED``
|
||||
accuracy_limit This is in meters. The sensor drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius specified.
|
||||
timezone The timezone used to calculate location.
|
||||
metrics The different measures that can be retrieved from the Location dataset. See :ref:`Available Location Metrics <location-available-metrics>` Table below
|
||||
features The different measures that can be retrieved from the Location dataset. See :ref:`Available Location Features <location-available-features>` Table below
|
||||
================= ===================
|
||||
|
||||
.. _location-available-metrics:
|
||||
.. _location-available-features:
|
||||
|
||||
**Available Location Metrics**
|
||||
**Available Location Features**
|
||||
|
||||
The following table shows a list of the available metrics for Location dataset.
|
||||
The following table shows a list of the available features for Location dataset.
|
||||
|
||||
================ ========= =============
|
||||
Name Units Description
|
||||
|
@ -839,7 +839,7 @@ stdflightdur meters Std flight duration. The standard deviation of
|
|||
probpause Pause probability. The fraction of a day spent in a pause (as opposed to a flight)
|
||||
siglocentropy Significant location entropy. Entropy measurement based on the proportion of time spent at each significant location visited during a day.
|
||||
minsmissing
|
||||
circdnrtn Circadian routine. A continuous metric that can take any value between 0 and 1, where 0 represents a daily routine completely different from any other sensed days and 1 a routine the same as every other sensed day.
|
||||
circdnrtn Circadian routine. A continuous feature that can take any value between 0 and 1, where 0 represents a daily routine completely different from any other sensed days and 1 a routine the same as every other sensed day.
|
||||
wkenddayrtn Weekend circadian routine. Same as Circadian routine but computed separately for weekends and weekdays.
|
||||
================ ========= =============
|
||||
|
||||
|
@ -1102,7 +1102,7 @@ See `Fitbit: Steps Config Code`_
|
|||
Name Description
|
||||
======================= ===================
|
||||
day_segment The particular ``day_segments`` that will be analyzed. The available options are ``daily``, ``morning``, ``afternoon``, ``evening``, ``night``
|
||||
features The different measures that can be retrieved from the dataset. See :ref:`Available Fitbit: Steps Metrics <fitbit-steps-available-metrics>` Table below
|
||||
features The different measures that can be retrieved from the dataset. See :ref:`Available Fitbit: Steps Features <fitbit-steps-available-features>` Table below
|
||||
threshold_active_bout The maximum number of steps per minute necessary for a bout to be ``sedentary``. That is, if the step count per minute is greater than this value the bout has a status of ``active``.
|
||||
======================= ===================
|
||||
|
||||
|
@ -1182,8 +1182,8 @@ stddurationactivebout minutes Std duration active bout: The standard
|
|||
.. _phone_sensed_bins.R: https://github.com/carissalow/rapids/blob/master/src/data/phone_sensed_bins.R
|
||||
.. _resample_fused_location: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/preprocessing.snakefile#L67
|
||||
.. _resample_fused_location.R: https://github.com/carissalow/rapids/blob/master/src/data/resample_fused_location.R
|
||||
.. _location_barnett_metrics: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L49
|
||||
.. _location_barnett_metrics.R: https://github.com/carissalow/rapids/blob/master/src/features/location_barnett_metrics.R
|
||||
.. _location_barnett_features: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L49
|
||||
.. _location_barnett_features.R: https://github.com/carissalow/rapids/blob/master/src/features/location_barnett_features.R
|
||||
.. _`Screen Config Code`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L88
|
||||
.. _screen_deltas: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L33
|
||||
.. _screen_deltas.R: https://github.com/carissalow/rapids/blob/master/src/features/screen_deltas.R
|
||||
|
|
|
@ -47,12 +47,12 @@ rule google_activity_recognition_deltas:
|
|||
script:
|
||||
"../src/features/google_activity_recognition_deltas.R"
|
||||
|
||||
rule location_barnett_metrics:
|
||||
rule location_barnett_features:
|
||||
input:
|
||||
raw = "data/raw/{pid}/locations_raw.csv",
|
||||
fused = rules.resample_fused_location.output
|
||||
params:
|
||||
metrics = config["BARNETT_LOCATION"]["METRICS"],
|
||||
features = config["BARNETT_LOCATION"]["FEATURES"],
|
||||
locations_to_use = config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"],
|
||||
accuracy_limit = config["BARNETT_LOCATION"]["ACCURACY_LIMIT"],
|
||||
timezone = config["BARNETT_LOCATION"]["TIMEZONE"],
|
||||
|
@ -60,7 +60,7 @@ rule location_barnett_metrics:
|
|||
output:
|
||||
"data/processed/{pid}/location_barnett_{day_segment}.csv"
|
||||
script:
|
||||
"../src/features/location_barnett_metrics.R"
|
||||
"../src/features/location_barnett_features.R"
|
||||
|
||||
rule bluetooth_features:
|
||||
input:
|
||||
|
|
|
@ -2,7 +2,7 @@ source("packrat/init.R")
|
|||
|
||||
library(dplyr)
|
||||
|
||||
write_empty_file <- function(file_path, metrics_to_include){
|
||||
write_empty_file <- function(file_path, requested_feature){
|
||||
write.csv(data.frame(local_date= character(),
|
||||
location_barnett_hometime= numeric(),
|
||||
location_barnett_disttravelled= numeric(),
|
||||
|
@ -19,7 +19,7 @@ write_empty_file <- function(file_path, metrics_to_include){
|
|||
location_barnett_minsmissing= numeric(),
|
||||
location_barnett_circdnrtn= numeric(),
|
||||
location_barnett_wkenddayrtn= numeric()
|
||||
) %>% select(metrics_to_include), file_path, row.names = F)
|
||||
) %>% select(requested_feature), file_path, row.names = F)
|
||||
}
|
||||
|
||||
# Load Ian Barnett's code. Taken from https://scholar.harvard.edu/ibarnett/software/gpsmobility
|
||||
|
@ -29,9 +29,9 @@ sapply(file.sources,source,.GlobalEnv)
|
|||
locations_to_use <- snakemake@params[["locations_to_use"]]
|
||||
accuracy_limit <- snakemake@params[["accuracy_limit"]]
|
||||
timezone <- snakemake@params[["timezone"]]
|
||||
metrics_to_include <- intersect(unlist(snakemake@params["metrics"], use.names = F),
|
||||
requested_feature <- intersect(unlist(snakemake@params["features"], use.names = F),
|
||||
c("hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","minsmissing","circdnrtn","wkenddayrtn"))
|
||||
metrics_to_include <- c("local_date", paste("location_barnett", metrics_to_include, sep = "_"))
|
||||
requested_feature <- c("local_date", paste("location_barnett", requested_feature, sep = "_"))
|
||||
|
||||
# By deafult we use all raw locations: fused without resampling and not fused (gps, network)
|
||||
location <- read.csv(snakemake@input[["raw"]], stringsAsFactors = F) %>%
|
||||
|
@ -50,16 +50,16 @@ if(locations_to_use == "ALL_EXCEPT_FUSED"){
|
|||
if (nrow(location) > 1){
|
||||
features <- MobilityFeatures(location, ACCURACY_LIM = accuracy_limit, tz = timezone)
|
||||
if(is.null(features)){
|
||||
write_empty_file(snakemake@output[[1]], metrics_to_include)
|
||||
write_empty_file(snakemake@output[[1]], requested_feature)
|
||||
} else{
|
||||
# Copy index (dates) as a column
|
||||
outmatrix <- cbind(rownames(features$featavg), features$featavg)
|
||||
outmatrix <- as.data.frame(outmatrix)
|
||||
outmatrix[-1] <- lapply(lapply(outmatrix[-1], as.character), as.numeric)
|
||||
colnames(outmatrix)=c("local_date",tolower(paste("location_barnett", colnames(features$featavg), sep = "_")))
|
||||
write.csv(outmatrix %>% select(metrics_to_include), snakemake@output[[1]], row.names = F)
|
||||
write.csv(outmatrix %>% select(requested_feature), snakemake@output[[1]], row.names = F)
|
||||
}
|
||||
|
||||
} else {
|
||||
write_empty_file(snakemake@output[[1]], metrics_to_include)
|
||||
write_empty_file(snakemake@output[[1]], requested_feature)
|
||||
}
|
Loading…
Reference in New Issue