Refactor valid sensed bins and days

pull/95/head
JulioV 2020-07-09 13:01:50 -04:00
parent 64406a4316
commit d2704296f3
11 changed files with 104 additions and 95 deletions

View File

@ -13,6 +13,17 @@ files_to_compute = []
if len(config["PIDS"]) == 0:
raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")
if config["PHONE_VALID_SENSED_BINS"]["COMPUTE"]:
if len(config["PHONE_VALID_SENSED_BINS"]["TABLES"]) == 0:
raise ValueError("If you want to compute PHONE_VALID_SENSED_BINS, you need to add at least one table to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml")
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
if config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]:
if len(config["PHONE_VALID_SENSED_BINS"]["TABLES"]) == 0:
raise ValueError("If you want to compute PHONE_VALID_SENSED_DAYS, you need to add at least one table to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml")
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_valid_sensed_days.csv", pid=config["PIDS"]))
if config["MESSAGES"]["COMPUTE"]:
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
@ -26,8 +37,11 @@ if config["CALLS"]["COMPUTE"]:
if config["BARNETT_LOCATION"]["COMPUTE"]:
# TODO add files_to_compute.extend(optional_location_input(None))
if config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED" and config["BARNETT_LOCATION"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]:
raise ValueError("Error: Add your locations table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
if config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED":
if config["BARNETT_LOCATION"]["DB_TABLE"] in config[""]["TABLES"]:
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
else:
raise ValueError("Error: Add your locations table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BARNETT_LOCATION"]["DB_TABLE"]))
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["BARNETT_LOCATION"]["DB_TABLE"]))
files_to_compute.extend(expand("data/processed/{pid}/location_barnett_{segment}.csv", pid=config["PIDS"], segment = config["BARNETT_LOCATION"]["DAY_SEGMENTS"]))
@ -49,8 +63,10 @@ if config["BATTERY"]["COMPUTE"]:
files_to_compute.extend(expand("data/processed/{pid}/battery_{day_segment}.csv", pid = config["PIDS"], day_segment = config["BATTERY"]["DAY_SEGMENTS"]))
if config["SCREEN"]["COMPUTE"]:
if config["SCREEN"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]:
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
if config["SCREEN"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["TABLES"]:
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
else:
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))

View File

@ -1,7 +1,3 @@
# Add as many sensor tables as you have, they all improve the computation of PHONE_SENSED_BINS.
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
TABLES_FOR_SENSED_BINS: []
# Participants to include in the analysis
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
PIDS: [test01]
@ -31,6 +27,18 @@ DOWNLOAD_DATASET:
READABLE_DATETIME:
FIXED_TIMEZONE: *timezone
PHONE_VALID_SENSED_BINS:
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
BIN_SIZE: 5 # (in minutes)
# Add as many sensor tables as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
TABLES: []
PHONE_VALID_SENSED_DAYS:
COMPUTE: False
MIN_VALID_HOURS_PER_DAY: 16 # (out of 24) MIN_HOURS_PER_DAY
MIN_VALID_BINS_PER_HOUR: 6 # (out of 60min/BIN_SIZE bins)
# Communication SMS features config, TYPES and FEATURES keys need to match
MESSAGES:
COMPUTE: False
@ -58,11 +66,6 @@ APPLICATION_GENRES:
UPDATE_CATALOGUE_FILE: false # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
SCRAPE_MISSING_GENRES: false # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PHONE_VALID_SENSED_DAYS:
BIN_SIZE: 5 # (in minutes)
MIN_VALID_HOURS: 20 # (out of 24)
MIN_BINS_PER_HOUR: 8 # (out of 60min/BIN_SIZE bins)
RESAMPLE_FUSED_LOCATION:
CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row

View File

@ -10,9 +10,8 @@ RAPIDS Features
COMPUTE: True
DB_TABLE: messages
...
If you want to extract phone_valid_sensed_days.csv, screen features or locaton features based on fused location data don't forget to configure ``TABLES_FOR_SENSED_BINS`` (see below).
If you want to extract phone_valid_sensed_days.csv, screen features or locaton features based on fused location data don't forget to configure ``[PHONE_VALID_SENSED_BINS][TABLES]`` (see below).
.. _global-sensor-doc:
@ -21,11 +20,9 @@ Global Parameters
.. _sensor-list:
- ``TABLES_FOR_SENSED_BINS`` - Add as many sensor tables as you have in your database. All sensors included are used to compute ``phone_sensed_bins.csv`` (bins of time when the smartphone was sensing data). In turn, these bins are used to compute ``PHONE_VALID_SENSED_DAYS`` (see below), ``episodepersensedminutes`` feature of :ref:`Screen<screen-sensor-doc>` and to resample fused location data if you configure Barnett's location features to use ``RESAMPLE_FUSED``. See TABLES_FOR_SENSED_BINS_ variable in ``config`` file (therefore, when you are extracting screen or Barnett's location features, screen and locations tables are mandatory).
.. _pid:
- ``PID`` - The list of participant ids to be included in the analysis. These should match the names of the files created in the ``data/external`` directory (:ref:`see more details<db-configuration>`).
- ``PIDS`` - The list of participant ids to be included in the analysis. These should match the names of the files created in the ``data/external`` directory (:ref:`see more details<db-configuration>`).
.. _day-segments:
@ -52,19 +49,24 @@ Global Parameters
- ``FIXED_TIMEZONE``. See ``TIMEZONE`` above. This assumes that all data of all participants was collected within one time zone.
- Support for multiple time zones for each participant coming soon based on the ``timezone`` table collected by Aware.
- ``PHONE_VALID_SENSED_BINS``
Contains three attributes: ``COMPUTE``, ``BIN_SIZE`` and ``TABLES``. See the PHONE_VALID_SENSED_BINS_ section in the ``config.yaml`` file
Set the ``COMPUTE`` flag to True if you want to get this file (``data/interim/{pid}/phone_sensed_bins``). Phone valid sensed bins is a matrix of days x bins where we divide every hour of every day into N bins of size ``BIN_SIZE`` (in minutes). Each bin contains the number of rows that were recorded in that interval by all the sensors listed in ``TABLES``. Add as many sensor tables to ``TABLES`` as you have in your database because valid sensed bins are used to compute ``PHONE_VALID_SENSED_DAYS`` :ref:`PHONE_VALID_SENSED_BINS<phone-valid-sensed-days>`, ``episodepersensedminutes`` feature of :ref:`Screen<screen-sensor-doc>` and to resample fused location data if you configure Barnett's location features to use ``RESAMPLE_FUSED``.
The ``COMPUTE`` flag is automatically ignored (set internally to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features.
.. _phone-valid-sensed-days:
- ``PHONE_VALID_SENSED_DAYS``.
Contains three attributes: ``BIN_SIZE``, ``MIN_VALID_HOURS``, ``MIN_BINS_PER_HOUR``.
Contains three attributes: ``COMPUTE``, ``MIN_VALID_HOURS_PER_DAY``, ``MIN_VALID_BINS_PER_HOUR``. See the PHONE_VALID_SENSED_DAYS_ section in ``config.yaml``.
On any given day, Aware could have sensed data only for a few minutes or for 24 hours. Daily estimates of features should be considered more reliable the more hours Aware was running and logging data (for example, 10 calls logged on a day when only one hour of data was recorded is a less reliable feature compared to 10 calls on a day when 23 hours of data were recorded.
On any given day, Aware could have sensed data only for a few minutes or for 24 hours. Daily estimates of features should be considered more reliable the more hours Aware was running and logging data, for example, 10 calls logged on a day when only one hour of data was recorded is a less reliable feature compared to 10 calls on a day when 23 hours of data were recorded.
Therefore, we define a valid hour as those that contain a minimum number of valid bins. In turn, a valid bin are those that contain at least one row of data from any sensor logged within that period. We divide an hour into N bins of size ``BIN_SIZE`` (in minutes) and we mark an hour as valid if contains at least ``MIN_BINS_PER_HOUR`` (out of the total possible number of bins that can be captured in an hour based on their length i.e. 60min/``BIN_SIZE`` bins). Days with valid sensed hours less than ``MIN_VALID_HOURS`` will be excluded form the output of this file. See PHONE_VALID_SENSED_DAYS_ in ``config.yaml``.
Therefore, we define a valid hour as those that contain a minimum number of valid bins. A valid bin are those that contain at least one row of data from any sensor logged within that period (See ``PHONE_VALID_SENSED_BINS`` above). We mark an hour as valid if contains at least ``MIN_VALID_BINS_PER_HOUR`` (out of the total possible number of bins that can be captured in an hour based on their length i.e. 60min/``BIN_SIZE`` bins). In turn, we mark a day as valid if it has at least ``MIN_VALID_HOURS_PER_DAY``.
Note that RAPIDS *DOES NOT* filter your feature files automatically, you need to do this manually based on ``"data/interim/{pid}/phone_valid_sensed_days.csv"``.
You can get access to every phone's sensed bins matrix (days x bins) in ``data/interim/{pid}/phone_sensed_bins.csv``. As mentioned above, RAPIDS uses this file to compute ``phone_valid_sensed_days.csv``, ``episodepersensedminutes`` feature of :ref:`Screen<screen-sensor-doc>` and to resample fused location data if you configure Barnett's location features to use ``RESAMPLE_FUSED``.
Note that RAPIDS *DOES NOT* filter your feature files automatically, you need to do this manually based on ``"data/interim/{pid}/phone_valid_sensed_days.csv"``.
.. _individual-sensor-settings:
@ -969,31 +971,31 @@ Active and sedentary bouts. If the step count per minute is smaller than ``THRES
.. -------------------------Links ------------------------------------ ..
.. _TABLES_FOR_SENSED_BINS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L3
.. _`Messages Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L35
.. _PHONE_VALID_SENSED_BINS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L30
.. _`Messages Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L43
.. _AWARE: https://awareframework.com/what-is-aware/
.. _`List of Timezones`: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
.. _DAY_SEGMENTS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L10
.. _PHONE_VALID_SENSED_DAYS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L61
.. _`Call Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L45
.. _`WiFi Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L169
.. _`Bluetooth Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L81
.. _`Accelerometer Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L115
.. _`Applications Foreground Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L125
.. _`Battery Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L95
.. _`Activity Recognition Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L87
.. _`Light Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L109
.. _`Location (Barnetts) Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L71
.. _`Screen Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L101
.. _`Fitbit: Sleep Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L162
.. _DAY_SEGMENTS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L6
.. _PHONE_VALID_SENSED_DAYS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L37
.. _`Call Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L53
.. _`WiFi Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L172
.. _`Bluetooth Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L84
.. _`Accelerometer Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L118
.. _`Applications Foreground Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L128
.. _`Battery Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L98
.. _`Activity Recognition Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L90
.. _`Light Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L112
.. _`Location (Barnetts) Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L74
.. _`Screen Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L104
.. _`Fitbit: Sleep Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L165
.. _`version 1`: https://dev.fitbit.com/build/reference/web-api/sleep-v1/
.. _`version 1.2`: https://dev.fitbit.com/build/reference/web-api/sleep/
.. _`Conversation Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L191
.. _`this Fitbit forum post`: https://community.fitbit.com/t5/Alta/What-does-Restless-mean-in-sleep-tracking/td-p/2989011
.. _shortData: https://dev.fitbit.com/build/reference/web-api/sleep/#interpreting-the-sleep-stage-and-short-data
.. _`Fitbit: Heart Rate Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L138
.. _`Fitbit: Steps Config Code`: https://github.com/carissalow/rapids/blob/29b04b0601b62379fbdb76de685f3328b8dde2a2/config.yaml#L145
.. _`Fitbit: Heart Rate Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L141
.. _`Fitbit: Steps Config Code`: https://github.com/carissalow/rapids/blob/29b04b0601b62379fbdb76de685f3328b8dde2a2/config.yaml#L148
.. _`Fitbit documentation`: https://help.fitbit.com/articles/en_US/Help_article/1565
.. _top1global: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L108
.. _top1global: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L136
.. _`Beiwe Summary Statistics`: http://wiki.beiwe.org/wiki/Summary_Statistics
.. _`Pause-Flight Model`: https://academic.oup.com/biostatistics/advance-article/doi/10.1093/biostatistics/kxy059/5145908

View File

@ -50,7 +50,7 @@ This is expected behavior. The advantage of using ``snakemake`` under the hood i
Execution halted
**Solution:**
Please make sure the sensors listed in ``TABLES_FOR_SENSED_BINS`` and each sensor section you activated in ``config.yaml`` match your database tables.
Please make sure the sensors listed in ``[PHONE_VALID_SENSED_BINS][TABLES]`` and each sensor section you activated in ``config.yaml`` match your database tables.

View File

@ -10,7 +10,7 @@ The ``config.yaml`` File
RAPIDS configuration settings are defined in ``config.yaml`` (See `config.yaml`_). This is the only file that you need to understand in order to compute the features that RAPIDS ships with.
It has global settings like ``TABLES_FOR_SENSED_BINS``, ``PIDS``, ``DAY_SEGMENTS``, among others (see :ref:`global-sensor-doc` for more information). As well as per sensor settings, for example, for the :ref:`messages-sensor-doc`::
It has global settings like ``PIDS``, ``DAY_SEGMENTS``, among others (see :ref:`global-sensor-doc` for more information). As well as per sensor settings, for example, for the :ref:`messages-sensor-doc`::
| ``MESSAGES:``
| ``COMPUTE: True``

View File

@ -157,7 +157,7 @@ rule screen_features:
reference_hour_first_use = config["SCREEN"]["REFERENCE_HOUR_FIRST_USE"],
features_deltas = config["SCREEN"]["FEATURES_DELTAS"],
episode_types = config["SCREEN"]["EPISODE_TYPES"],
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"]
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
output:
"data/processed/{pid}/screen_{day_segment}.csv"
script:

View File

@ -37,27 +37,27 @@ rule readable_datetime:
script:
"../src/data/readable_datetime.R"
rule phone_sensed_bins:
input:
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["PHONE_VALID_SENSED_BINS"]["TABLES"])
params:
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
output:
"data/interim/{pid}/phone_sensed_bins.csv"
script:
"../src/data/phone_sensed_bins.R"
rule phone_valid_sensed_days:
input:
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["TABLES_FOR_SENSED_BINS"])
phone_sensed_bins = "data/interim/{pid}/phone_sensed_bins.csv"
params:
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"],
min_valid_hours = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS"],
min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_BINS_PER_HOUR"]
min_valid_hours_per_day = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"],
min_valid_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]
output:
"data/interim/{pid}/phone_valid_sensed_days.csv"
script:
"../src/data/phone_valid_sensed_days.R"
rule phone_sensed_bins:
input:
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["TABLES_FOR_SENSED_BINS"])
params:
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"]
output:
"data/interim/{pid}/phone_sensed_bins.csv"
script:
"../src/data/phone_sensed_bins.R"
rule unify_ios_android:
input:
@ -76,7 +76,7 @@ rule resample_fused_location:
locations = "data/raw/{pid}/{sensor}_raw.csv",
phone_sensed_bins = rules.phone_sensed_bins.output
params:
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"],
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"],
timezone = config["RESAMPLE_FUSED_LOCATION"]["TIMEZONE"],
consecutive_threshold = config["RESAMPLE_FUSED_LOCATION"]["CONSECUTIVE_THRESHOLD"],
time_since_valid_location = config["RESAMPLE_FUSED_LOCATION"]["TIME_SINCE_VALID_LOCATION"]

View File

@ -5,7 +5,7 @@ rule heatmap_rows:
params:
table = "{sensor}",
pid = "{pid}",
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"]
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
output:
"reports/figures/{pid}/{sensor}_heatmap_rows.html"
script:
@ -17,7 +17,7 @@ rule compliance_heatmap:
pid_file = "data/external/{pid}"
params:
pid = "{pid}",
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"]
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
output:
"reports/figures/{pid}/compliance_heatmap.html"
script:
@ -30,8 +30,8 @@ rule overall_compliance_heatmap:
pid_files = expand("data/external/{pid}", pid=config["PIDS"])
params:
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"],
min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_BINS_PER_HOUR"]
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"],
min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]
output:
"reports/figures/overall_compliance_heatmap.html"
script:

View File

@ -1,31 +1,20 @@
source("renv/activate.R")
library("dplyr")
library("tidyr")
library(dplyr)
all_sensors <- snakemake@input[["all_sensors"]]
bin_size <- snakemake@params[["bin_size"]]
min_valid_hours <- snakemake@params[["min_valid_hours"]]
min_bins_per_hour <- snakemake@params[["min_bins_per_hour"]]
phone_sensed_bins <- read.csv(snakemake@input[["phone_sensed_bins"]])
min_valid_hours_per_day <- snakemake@params[["min_valid_hours_per_day"]]
min_valid_bins_per_hour <- snakemake@params[["min_valid_bins_per_hour"]]
output_file <- snakemake@output[[1]]
# Load all sensors and extract timestamps
all_sensor_data <- data.frame(timestamp = c())
for(sensor in all_sensors){
sensor_data <- read.csv(sensor, stringsAsFactors = F) %>% select(local_date, local_hour, local_minute)
all_sensor_data <- rbind(all_sensor_data, sensor_data)
}
phone_valid_sensed_days <- all_sensor_data %>%
mutate(bin = (local_minute %/% bin_size) * bin_size) %>% # bin rows into bin_size-minute bins
group_by(local_date, local_hour, bin) %>%
summarise(minute_period = first(bin)) %>% #filter repeated bins (if rows were logged within bin_size minutes)
ungroup() %>%
group_by(local_date, local_hour) %>%
summarise(bins = n()) %>% # Count how many bins there are per hour
ungroup() %>%
filter(bins >= min_bins_per_hour) %>% # Discard those hours where there were fewer than min_bins_per_hour
group_by(local_date) %>%
summarise(valid_hours = n()) %>% # Count how many valid hours each day has
filter(valid_hours >= min_valid_hours) # Discard those days where there were fewer than min_valid_hours
phone_valid_sensed_days <- phone_sensed_bins %>%
pivot_longer(cols = -local_date, names_to = c("hour", "bin"), names_sep = "_") %>%
filter(value > 0) %>%
group_by(local_date, hour) %>%
summarise(valid_bins = n()) %>%
filter(valid_bins >= min_valid_bins_per_hour) %>%
group_by(local_date) %>%
summarise(valid_sensed_hours = n()) %>%
mutate(is_valid_sensed_day = ifelse(valid_sensed_hours >= min_valid_hours_per_day, TRUE, FALSE))
write.csv(phone_valid_sensed_days, output_file, row.names = FALSE)

View File

@ -23,8 +23,8 @@ if config["CALLS"]["COMPUTE"]:
files_to_compute.extend(expand("data/processed/{pid}/calls_{call_type}_{segment}.csv", pid=config["PIDS"], call_type=config["CALLS"]["TYPES"], segment = config["CALLS"]["DAY_SEGMENTS"]))
if config["SCREEN"]["COMPUTE"]:
if config["SCREEN"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]:
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
if config["SCREEN"]["DB_TABLE"] not in config["PHONE_VALID_SENSED_BINS"]["TABLES"]:
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
files_to_compute.extend(expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]))

View File

@ -1,7 +1,3 @@
# Add as many sensor tables as you have, they all improve the computation of PHONE_SENSED_BINS.
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
TABLES_FOR_SENSED_BINS: [messages, calls, screen, battery, bluetooth, wifi]
# Participants to include in the analysis
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
PIDS: [test01, test02, test03, test04]
@ -11,6 +7,9 @@ PIDS: [test01, test02, test03, test04]
DAY_SEGMENTS: &day_segments
[daily, morning, afternoon, evening, night]
PHONE_VALID_SENSED_BINS:
TABLES: [messages, calls, screen, battery, bluetooth, wifi]
# Communication SMS features config, TYPES and FEATURES keys need to match
MESSAGES:
COMPUTE: True