Refactor valid sensed bins and days
parent
64406a4316
commit
d2704296f3
24
Snakefile
24
Snakefile
|
@ -13,6 +13,17 @@ files_to_compute = []
|
|||
if len(config["PIDS"]) == 0:
|
||||
raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")
|
||||
|
||||
if config["PHONE_VALID_SENSED_BINS"]["COMPUTE"]:
|
||||
if len(config["PHONE_VALID_SENSED_BINS"]["TABLES"]) == 0:
|
||||
raise ValueError("If you want to compute PHONE_VALID_SENSED_BINS, you need to add at least one table to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml")
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||
|
||||
if config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]:
|
||||
if len(config["PHONE_VALID_SENSED_BINS"]["TABLES"]) == 0:
|
||||
raise ValueError("If you want to compute PHONE_VALID_SENSED_DAYS, you need to add at least one table to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml")
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_valid_sensed_days.csv", pid=config["PIDS"]))
|
||||
|
||||
if config["MESSAGES"]["COMPUTE"]:
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
|
||||
|
@ -26,8 +37,11 @@ if config["CALLS"]["COMPUTE"]:
|
|||
|
||||
if config["BARNETT_LOCATION"]["COMPUTE"]:
|
||||
# TODO add files_to_compute.extend(optional_location_input(None))
|
||||
if config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED" and config["BARNETT_LOCATION"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]:
|
||||
raise ValueError("Error: Add your locations table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
|
||||
if config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED":
|
||||
if config["BARNETT_LOCATION"]["DB_TABLE"] in config[""]["TABLES"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||
else:
|
||||
raise ValueError("Error: Add your locations table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BARNETT_LOCATION"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["BARNETT_LOCATION"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/processed/{pid}/location_barnett_{segment}.csv", pid=config["PIDS"], segment = config["BARNETT_LOCATION"]["DAY_SEGMENTS"]))
|
||||
|
@ -49,8 +63,10 @@ if config["BATTERY"]["COMPUTE"]:
|
|||
files_to_compute.extend(expand("data/processed/{pid}/battery_{day_segment}.csv", pid = config["PIDS"], day_segment = config["BATTERY"]["DAY_SEGMENTS"]))
|
||||
|
||||
if config["SCREEN"]["COMPUTE"]:
|
||||
if config["SCREEN"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]:
|
||||
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
||||
if config["SCREEN"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["TABLES"]:
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||
else:
|
||||
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
||||
|
|
21
config.yaml
21
config.yaml
|
@ -1,7 +1,3 @@
|
|||
# Add as many sensor tables as you have, they all improve the computation of PHONE_SENSED_BINS.
|
||||
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
|
||||
TABLES_FOR_SENSED_BINS: []
|
||||
|
||||
# Participants to include in the analysis
|
||||
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
|
||||
PIDS: [test01]
|
||||
|
@ -31,6 +27,18 @@ DOWNLOAD_DATASET:
|
|||
READABLE_DATETIME:
|
||||
FIXED_TIMEZONE: *timezone
|
||||
|
||||
PHONE_VALID_SENSED_BINS:
|
||||
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
||||
BIN_SIZE: 5 # (in minutes)
|
||||
# Add as many sensor tables as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
|
||||
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
|
||||
TABLES: []
|
||||
|
||||
PHONE_VALID_SENSED_DAYS:
|
||||
COMPUTE: False
|
||||
MIN_VALID_HOURS_PER_DAY: 16 # (out of 24) MIN_HOURS_PER_DAY
|
||||
MIN_VALID_BINS_PER_HOUR: 6 # (out of 60min/BIN_SIZE bins)
|
||||
|
||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||
MESSAGES:
|
||||
COMPUTE: False
|
||||
|
@ -58,11 +66,6 @@ APPLICATION_GENRES:
|
|||
UPDATE_CATALOGUE_FILE: false # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||
SCRAPE_MISSING_GENRES: false # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||
|
||||
PHONE_VALID_SENSED_DAYS:
|
||||
BIN_SIZE: 5 # (in minutes)
|
||||
MIN_VALID_HOURS: 20 # (out of 24)
|
||||
MIN_BINS_PER_HOUR: 8 # (out of 60min/BIN_SIZE bins)
|
||||
|
||||
RESAMPLE_FUSED_LOCATION:
|
||||
CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||
TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||
|
|
|
@ -11,8 +11,7 @@ RAPIDS Features
|
|||
DB_TABLE: messages
|
||||
...
|
||||
|
||||
|
||||
If you want to extract phone_valid_sensed_days.csv, screen features or locaton features based on fused location data don't forget to configure ``TABLES_FOR_SENSED_BINS`` (see below).
|
||||
If you want to extract phone_valid_sensed_days.csv, screen features or locaton features based on fused location data don't forget to configure ``[PHONE_VALID_SENSED_BINS][TABLES]`` (see below).
|
||||
|
||||
.. _global-sensor-doc:
|
||||
|
||||
|
@ -21,11 +20,9 @@ Global Parameters
|
|||
|
||||
.. _sensor-list:
|
||||
|
||||
- ``TABLES_FOR_SENSED_BINS`` - Add as many sensor tables as you have in your database. All sensors included are used to compute ``phone_sensed_bins.csv`` (bins of time when the smartphone was sensing data). In turn, these bins are used to compute ``PHONE_VALID_SENSED_DAYS`` (see below), ``episodepersensedminutes`` feature of :ref:`Screen<screen-sensor-doc>` and to resample fused location data if you configure Barnett's location features to use ``RESAMPLE_FUSED``. See TABLES_FOR_SENSED_BINS_ variable in ``config`` file (therefore, when you are extracting screen or Barnett's location features, screen and locations tables are mandatory).
|
||||
|
||||
.. _pid:
|
||||
|
||||
- ``PID`` - The list of participant ids to be included in the analysis. These should match the names of the files created in the ``data/external`` directory (:ref:`see more details<db-configuration>`).
|
||||
- ``PIDS`` - The list of participant ids to be included in the analysis. These should match the names of the files created in the ``data/external`` directory (:ref:`see more details<db-configuration>`).
|
||||
|
||||
.. _day-segments:
|
||||
|
||||
|
@ -52,20 +49,25 @@ Global Parameters
|
|||
- ``FIXED_TIMEZONE``. See ``TIMEZONE`` above. This assumes that all data of all participants was collected within one time zone.
|
||||
- Support for multiple time zones for each participant coming soon based on the ``timezone`` table collected by Aware.
|
||||
|
||||
- ``PHONE_VALID_SENSED_BINS``
|
||||
Contains three attributes: ``COMPUTE``, ``BIN_SIZE`` and ``TABLES``. See the PHONE_VALID_SENSED_BINS_ section in the ``config.yaml`` file
|
||||
|
||||
Set the ``COMPUTE`` flag to True if you want to get this file (``data/interim/{pid}/phone_sensed_bins``). Phone valid sensed bins is a matrix of days x bins where we divide every hour of every day into N bins of size ``BIN_SIZE`` (in minutes). Each bin contains the number of rows that were recorded in that interval by all the sensors listed in ``TABLES``. Add as many sensor tables to ``TABLES`` as you have in your database because valid sensed bins are used to compute ``PHONE_VALID_SENSED_DAYS`` :ref:`PHONE_VALID_SENSED_BINS<phone-valid-sensed-days>`, ``episodepersensedminutes`` feature of :ref:`Screen<screen-sensor-doc>` and to resample fused location data if you configure Barnett's location features to use ``RESAMPLE_FUSED``.
|
||||
|
||||
The ``COMPUTE`` flag is automatically ignored (set internally to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features.
|
||||
|
||||
.. _phone-valid-sensed-days:
|
||||
|
||||
- ``PHONE_VALID_SENSED_DAYS``.
|
||||
|
||||
Contains three attributes: ``BIN_SIZE``, ``MIN_VALID_HOURS``, ``MIN_BINS_PER_HOUR``.
|
||||
Contains three attributes: ``COMPUTE``, ``MIN_VALID_HOURS_PER_DAY``, ``MIN_VALID_BINS_PER_HOUR``. See the PHONE_VALID_SENSED_DAYS_ section in ``config.yaml``.
|
||||
|
||||
On any given day, Aware could have sensed data only for a few minutes or for 24 hours. Daily estimates of features should be considered more reliable the more hours Aware was running and logging data (for example, 10 calls logged on a day when only one hour of data was recorded is a less reliable feature compared to 10 calls on a day when 23 hours of data were recorded.
|
||||
On any given day, Aware could have sensed data only for a few minutes or for 24 hours. Daily estimates of features should be considered more reliable the more hours Aware was running and logging data, for example, 10 calls logged on a day when only one hour of data was recorded is a less reliable feature compared to 10 calls on a day when 23 hours of data were recorded.
|
||||
|
||||
Therefore, we define a valid hour as those that contain a minimum number of valid bins. In turn, a valid bin are those that contain at least one row of data from any sensor logged within that period. We divide an hour into N bins of size ``BIN_SIZE`` (in minutes) and we mark an hour as valid if contains at least ``MIN_BINS_PER_HOUR`` (out of the total possible number of bins that can be captured in an hour based on their length i.e. 60min/``BIN_SIZE`` bins). Days with valid sensed hours less than ``MIN_VALID_HOURS`` will be excluded form the output of this file. See PHONE_VALID_SENSED_DAYS_ in ``config.yaml``.
|
||||
Therefore, we define a valid hour as those that contain a minimum number of valid bins. A valid bin are those that contain at least one row of data from any sensor logged within that period (See ``PHONE_VALID_SENSED_BINS`` above). We mark an hour as valid if contains at least ``MIN_VALID_BINS_PER_HOUR`` (out of the total possible number of bins that can be captured in an hour based on their length i.e. 60min/``BIN_SIZE`` bins). In turn, we mark a day as valid if it has at least ``MIN_VALID_HOURS_PER_DAY``.
|
||||
|
||||
Note that RAPIDS *DOES NOT* filter your feature files automatically, you need to do this manually based on ``"data/interim/{pid}/phone_valid_sensed_days.csv"``.
|
||||
|
||||
You can get access to every phone's sensed bins matrix (days x bins) in ``data/interim/{pid}/phone_sensed_bins.csv``. As mentioned above, RAPIDS uses this file to compute ``phone_valid_sensed_days.csv``, ``episodepersensedminutes`` feature of :ref:`Screen<screen-sensor-doc>` and to resample fused location data if you configure Barnett's location features to use ``RESAMPLE_FUSED``.
|
||||
|
||||
.. _individual-sensor-settings:
|
||||
|
||||
|
||||
|
@ -969,31 +971,31 @@ Active and sedentary bouts. If the step count per minute is smaller than ``THRES
|
|||
|
||||
.. -------------------------Links ------------------------------------ ..
|
||||
|
||||
.. _TABLES_FOR_SENSED_BINS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L3
|
||||
.. _`Messages Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L35
|
||||
.. _PHONE_VALID_SENSED_BINS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L30
|
||||
.. _`Messages Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L43
|
||||
.. _AWARE: https://awareframework.com/what-is-aware/
|
||||
.. _`List of Timezones`: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
|
||||
.. _DAY_SEGMENTS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L10
|
||||
.. _PHONE_VALID_SENSED_DAYS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L61
|
||||
.. _`Call Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L45
|
||||
.. _`WiFi Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L169
|
||||
.. _`Bluetooth Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L81
|
||||
.. _`Accelerometer Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L115
|
||||
.. _`Applications Foreground Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L125
|
||||
.. _`Battery Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L95
|
||||
.. _`Activity Recognition Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L87
|
||||
.. _`Light Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L109
|
||||
.. _`Location (Barnett’s) Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L71
|
||||
.. _`Screen Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L101
|
||||
.. _`Fitbit: Sleep Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L162
|
||||
.. _DAY_SEGMENTS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L6
|
||||
.. _PHONE_VALID_SENSED_DAYS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L37
|
||||
.. _`Call Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L53
|
||||
.. _`WiFi Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L172
|
||||
.. _`Bluetooth Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L84
|
||||
.. _`Accelerometer Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L118
|
||||
.. _`Applications Foreground Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L128
|
||||
.. _`Battery Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L98
|
||||
.. _`Activity Recognition Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L90
|
||||
.. _`Light Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L112
|
||||
.. _`Location (Barnett’s) Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L74
|
||||
.. _`Screen Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L104
|
||||
.. _`Fitbit: Sleep Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L165
|
||||
.. _`version 1`: https://dev.fitbit.com/build/reference/web-api/sleep-v1/
|
||||
.. _`version 1.2`: https://dev.fitbit.com/build/reference/web-api/sleep/
|
||||
.. _`Conversation Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L191
|
||||
.. _`this Fitbit forum post`: https://community.fitbit.com/t5/Alta/What-does-Restless-mean-in-sleep-tracking/td-p/2989011
|
||||
.. _shortData: https://dev.fitbit.com/build/reference/web-api/sleep/#interpreting-the-sleep-stage-and-short-data
|
||||
.. _`Fitbit: Heart Rate Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L138
|
||||
.. _`Fitbit: Steps Config Code`: https://github.com/carissalow/rapids/blob/29b04b0601b62379fbdb76de685f3328b8dde2a2/config.yaml#L145
|
||||
.. _`Fitbit: Heart Rate Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L141
|
||||
.. _`Fitbit: Steps Config Code`: https://github.com/carissalow/rapids/blob/29b04b0601b62379fbdb76de685f3328b8dde2a2/config.yaml#L148
|
||||
.. _`Fitbit documentation`: https://help.fitbit.com/articles/en_US/Help_article/1565
|
||||
.. _top1global: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L108
|
||||
.. _top1global: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L136
|
||||
.. _`Beiwe Summary Statistics`: http://wiki.beiwe.org/wiki/Summary_Statistics
|
||||
.. _`Pause-Flight Model`: https://academic.oup.com/biostatistics/advance-article/doi/10.1093/biostatistics/kxy059/5145908
|
||||
|
|
|
@ -50,7 +50,7 @@ This is expected behavior. The advantage of using ``snakemake`` under the hood i
|
|||
Execution halted
|
||||
|
||||
**Solution:**
|
||||
Please make sure the sensors listed in ``TABLES_FOR_SENSED_BINS`` and each sensor section you activated in ``config.yaml`` match your database tables.
|
||||
Please make sure the sensors listed in ``[PHONE_VALID_SENSED_BINS][TABLES]`` and each sensor section you activated in ``config.yaml`` match your database tables.
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ The ``config.yaml`` File
|
|||
|
||||
RAPIDS configuration settings are defined in ``config.yaml`` (See `config.yaml`_). This is the only file that you need to understand in order to compute the features that RAPIDS ships with.
|
||||
|
||||
It has global settings like ``TABLES_FOR_SENSED_BINS``, ``PIDS``, ``DAY_SEGMENTS``, among others (see :ref:`global-sensor-doc` for more information). As well as per sensor settings, for example, for the :ref:`messages-sensor-doc`::
|
||||
It has global settings like ``PIDS``, ``DAY_SEGMENTS``, among others (see :ref:`global-sensor-doc` for more information). As well as per sensor settings, for example, for the :ref:`messages-sensor-doc`::
|
||||
|
||||
| ``MESSAGES:``
|
||||
| ``COMPUTE: True``
|
||||
|
|
|
@ -157,7 +157,7 @@ rule screen_features:
|
|||
reference_hour_first_use = config["SCREEN"]["REFERENCE_HOUR_FIRST_USE"],
|
||||
features_deltas = config["SCREEN"]["FEATURES_DELTAS"],
|
||||
episode_types = config["SCREEN"]["EPISODE_TYPES"],
|
||||
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"]
|
||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
||||
output:
|
||||
"data/processed/{pid}/screen_{day_segment}.csv"
|
||||
script:
|
||||
|
|
|
@ -37,27 +37,27 @@ rule readable_datetime:
|
|||
script:
|
||||
"../src/data/readable_datetime.R"
|
||||
|
||||
rule phone_sensed_bins:
|
||||
input:
|
||||
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["PHONE_VALID_SENSED_BINS"]["TABLES"])
|
||||
params:
|
||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
||||
output:
|
||||
"data/interim/{pid}/phone_sensed_bins.csv"
|
||||
script:
|
||||
"../src/data/phone_sensed_bins.R"
|
||||
|
||||
rule phone_valid_sensed_days:
|
||||
input:
|
||||
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["TABLES_FOR_SENSED_BINS"])
|
||||
phone_sensed_bins = "data/interim/{pid}/phone_sensed_bins.csv"
|
||||
params:
|
||||
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"],
|
||||
min_valid_hours = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS"],
|
||||
min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_BINS_PER_HOUR"]
|
||||
min_valid_hours_per_day = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"],
|
||||
min_valid_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]
|
||||
output:
|
||||
"data/interim/{pid}/phone_valid_sensed_days.csv"
|
||||
script:
|
||||
"../src/data/phone_valid_sensed_days.R"
|
||||
|
||||
rule phone_sensed_bins:
|
||||
input:
|
||||
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["TABLES_FOR_SENSED_BINS"])
|
||||
params:
|
||||
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"]
|
||||
output:
|
||||
"data/interim/{pid}/phone_sensed_bins.csv"
|
||||
script:
|
||||
"../src/data/phone_sensed_bins.R"
|
||||
|
||||
rule unify_ios_android:
|
||||
input:
|
||||
|
@ -76,7 +76,7 @@ rule resample_fused_location:
|
|||
locations = "data/raw/{pid}/{sensor}_raw.csv",
|
||||
phone_sensed_bins = rules.phone_sensed_bins.output
|
||||
params:
|
||||
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"],
|
||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"],
|
||||
timezone = config["RESAMPLE_FUSED_LOCATION"]["TIMEZONE"],
|
||||
consecutive_threshold = config["RESAMPLE_FUSED_LOCATION"]["CONSECUTIVE_THRESHOLD"],
|
||||
time_since_valid_location = config["RESAMPLE_FUSED_LOCATION"]["TIME_SINCE_VALID_LOCATION"]
|
||||
|
|
|
@ -5,7 +5,7 @@ rule heatmap_rows:
|
|||
params:
|
||||
table = "{sensor}",
|
||||
pid = "{pid}",
|
||||
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"]
|
||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
||||
output:
|
||||
"reports/figures/{pid}/{sensor}_heatmap_rows.html"
|
||||
script:
|
||||
|
@ -17,7 +17,7 @@ rule compliance_heatmap:
|
|||
pid_file = "data/external/{pid}"
|
||||
params:
|
||||
pid = "{pid}",
|
||||
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"]
|
||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
||||
output:
|
||||
"reports/figures/{pid}/compliance_heatmap.html"
|
||||
script:
|
||||
|
@ -30,8 +30,8 @@ rule overall_compliance_heatmap:
|
|||
pid_files = expand("data/external/{pid}", pid=config["PIDS"])
|
||||
params:
|
||||
local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
|
||||
bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"],
|
||||
min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_BINS_PER_HOUR"]
|
||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"],
|
||||
min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]
|
||||
output:
|
||||
"reports/figures/overall_compliance_heatmap.html"
|
||||
script:
|
||||
|
|
|
@ -1,31 +1,20 @@
|
|||
source("renv/activate.R")
|
||||
library("dplyr")
|
||||
library("tidyr")
|
||||
|
||||
library(dplyr)
|
||||
|
||||
all_sensors <- snakemake@input[["all_sensors"]]
|
||||
bin_size <- snakemake@params[["bin_size"]]
|
||||
min_valid_hours <- snakemake@params[["min_valid_hours"]]
|
||||
min_bins_per_hour <- snakemake@params[["min_bins_per_hour"]]
|
||||
phone_sensed_bins <- read.csv(snakemake@input[["phone_sensed_bins"]])
|
||||
min_valid_hours_per_day <- snakemake@params[["min_valid_hours_per_day"]]
|
||||
min_valid_bins_per_hour <- snakemake@params[["min_valid_bins_per_hour"]]
|
||||
output_file <- snakemake@output[[1]]
|
||||
|
||||
# Load all sensors and extract timestamps
|
||||
all_sensor_data <- data.frame(timestamp = c())
|
||||
for(sensor in all_sensors){
|
||||
sensor_data <- read.csv(sensor, stringsAsFactors = F) %>% select(local_date, local_hour, local_minute)
|
||||
all_sensor_data <- rbind(all_sensor_data, sensor_data)
|
||||
}
|
||||
|
||||
phone_valid_sensed_days <- all_sensor_data %>%
|
||||
mutate(bin = (local_minute %/% bin_size) * bin_size) %>% # bin rows into bin_size-minute bins
|
||||
group_by(local_date, local_hour, bin) %>%
|
||||
summarise(minute_period = first(bin)) %>% #filter repeated bins (if rows were logged within bin_size minutes)
|
||||
ungroup() %>%
|
||||
group_by(local_date, local_hour) %>%
|
||||
summarise(bins = n()) %>% # Count how many bins there are per hour
|
||||
ungroup() %>%
|
||||
filter(bins >= min_bins_per_hour) %>% # Discard those hours where there were fewer than min_bins_per_hour
|
||||
phone_valid_sensed_days <- phone_sensed_bins %>%
|
||||
pivot_longer(cols = -local_date, names_to = c("hour", "bin"), names_sep = "_") %>%
|
||||
filter(value > 0) %>%
|
||||
group_by(local_date, hour) %>%
|
||||
summarise(valid_bins = n()) %>%
|
||||
filter(valid_bins >= min_valid_bins_per_hour) %>%
|
||||
group_by(local_date) %>%
|
||||
summarise(valid_hours = n()) %>% # Count how many valid hours each day has
|
||||
filter(valid_hours >= min_valid_hours) # Discard those days where there were fewer than min_valid_hours
|
||||
summarise(valid_sensed_hours = n()) %>%
|
||||
mutate(is_valid_sensed_day = ifelse(valid_sensed_hours >= min_valid_hours_per_day, TRUE, FALSE))
|
||||
|
||||
write.csv(phone_valid_sensed_days, output_file, row.names = FALSE)
|
||||
|
|
|
@ -23,8 +23,8 @@ if config["CALLS"]["COMPUTE"]:
|
|||
files_to_compute.extend(expand("data/processed/{pid}/calls_{call_type}_{segment}.csv", pid=config["PIDS"], call_type=config["CALLS"]["TYPES"], segment = config["CALLS"]["DAY_SEGMENTS"]))
|
||||
|
||||
if config["SCREEN"]["COMPUTE"]:
|
||||
if config["SCREEN"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]:
|
||||
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
||||
if config["SCREEN"]["DB_TABLE"] not in config["PHONE_VALID_SENSED_BINS"]["TABLES"]:
|
||||
raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
|
||||
files_to_compute.extend(expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]))
|
||||
|
|
|
@ -1,7 +1,3 @@
|
|||
# Add as many sensor tables as you have, they all improve the computation of PHONE_SENSED_BINS.
|
||||
# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
|
||||
TABLES_FOR_SENSED_BINS: [messages, calls, screen, battery, bluetooth, wifi]
|
||||
|
||||
# Participants to include in the analysis
|
||||
# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
|
||||
PIDS: [test01, test02, test03, test04]
|
||||
|
@ -11,6 +7,9 @@ PIDS: [test01, test02, test03, test04]
|
|||
DAY_SEGMENTS: &day_segments
|
||||
[daily, morning, afternoon, evening, night]
|
||||
|
||||
PHONE_VALID_SENSED_BINS:
|
||||
TABLES: [messages, calls, screen, battery, bluetooth, wifi]
|
||||
|
||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||
MESSAGES:
|
||||
COMPUTE: True
|
||||
|
|
Loading…
Reference in New Issue