diff --git a/Snakefile b/Snakefile index 1663be85..51b724e8 100644 --- a/Snakefile +++ b/Snakefile @@ -13,6 +13,17 @@ files_to_compute = [] if len(config["PIDS"]) == 0: raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external") +if config["PHONE_VALID_SENSED_BINS"]["COMPUTE"]: + if len(config["PHONE_VALID_SENSED_BINS"]["TABLES"]) == 0: + raise ValueError("If you want to compute PHONE_VALID_SENSED_BINS, you need to add at least one table to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml") + files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"])) + +if config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]: + if len(config["PHONE_VALID_SENSED_BINS"]["TABLES"]) == 0: + raise ValueError("If you want to compute PHONE_VALID_SENSED_DAYS, you need to add at least one table to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml") + files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"])) + files_to_compute.extend(expand("data/interim/{pid}/phone_valid_sensed_days.csv", pid=config["PIDS"])) + if config["MESSAGES"]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"])) files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"])) @@ -26,8 +37,11 @@ if config["CALLS"]["COMPUTE"]: if config["BARNETT_LOCATION"]["COMPUTE"]: # TODO add files_to_compute.extend(optional_location_input(None)) - if config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED" and config["BARNETT_LOCATION"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]: - raise ValueError("Error: Add your locations table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)") + if config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED": + if config["BARNETT_LOCATION"]["DB_TABLE"] in config[""]["TABLES"]: + files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"])) + else: + raise ValueError("Error: Add your locations table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)") files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BARNETT_LOCATION"]["DB_TABLE"])) files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["BARNETT_LOCATION"]["DB_TABLE"])) files_to_compute.extend(expand("data/processed/{pid}/location_barnett_{segment}.csv", pid=config["PIDS"], segment = config["BARNETT_LOCATION"]["DAY_SEGMENTS"])) @@ -49,8 +63,10 @@ if config["BATTERY"]["COMPUTE"]: files_to_compute.extend(expand("data/processed/{pid}/battery_{day_segment}.csv", pid = config["PIDS"], day_segment = config["BATTERY"]["DAY_SEGMENTS"])) if config["SCREEN"]["COMPUTE"]: - if config["SCREEN"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]: - raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)") + if config["SCREEN"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["TABLES"]: + files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"])) + else: + raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)") files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"])) files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"])) files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"])) diff --git a/config.yaml b/config.yaml index 7f10ba82..df05a52b 100644 --- a/config.yaml +++ b/config.yaml @@ -1,7 +1,3 @@ -# Add as many sensor tables as you have, they all improve the computation of PHONE_SENSED_BINS. -# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory. -TABLES_FOR_SENSED_BINS: [] - # Participants to include in the analysis # You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically PIDS: [test01] @@ -31,6 +27,18 @@ DOWNLOAD_DATASET: READABLE_DATETIME: FIXED_TIMEZONE: *timezone +PHONE_VALID_SENSED_BINS: + COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features + BIN_SIZE: 5 # (in minutes) + # Add as many sensor tables as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS. + # If you are extracting screen or Barnett's location features, screen and locations tables are mandatory. + TABLES: [] + +PHONE_VALID_SENSED_DAYS: + COMPUTE: False + MIN_VALID_HOURS_PER_DAY: 16 # (out of 24) MIN_HOURS_PER_DAY + MIN_VALID_BINS_PER_HOUR: 6 # (out of 60min/BIN_SIZE bins) + # Communication SMS features config, TYPES and FEATURES keys need to match MESSAGES: COMPUTE: False @@ -58,11 +66,6 @@ APPLICATION_GENRES: UPDATE_CATALOGUE_FILE: false # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE SCRAPE_MISSING_GENRES: false # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway -PHONE_VALID_SENSED_DAYS: - BIN_SIZE: 5 # (in minutes) - MIN_VALID_HOURS: 20 # (out of 24) - MIN_BINS_PER_HOUR: 8 # (out of 60min/BIN_SIZE bins) - RESAMPLE_FUSED_LOCATION: CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row diff --git a/docs/features/extracted.rst b/docs/features/extracted.rst index 1b8d2a79..7e653b30 100644 --- a/docs/features/extracted.rst +++ b/docs/features/extracted.rst @@ -10,9 +10,8 @@ RAPIDS Features COMPUTE: True DB_TABLE: messages ... - - - If you want to extract phone_valid_sensed_days.csv, screen features or locaton features based on fused location data don't forget to configure ``TABLES_FOR_SENSED_BINS`` (see below). + +If you want to extract phone_valid_sensed_days.csv, screen features or locaton features based on fused location data don't forget to configure ``[PHONE_VALID_SENSED_BINS][TABLES]`` (see below). .. _global-sensor-doc: @@ -21,11 +20,9 @@ Global Parameters .. _sensor-list: -- ``TABLES_FOR_SENSED_BINS`` - Add as many sensor tables as you have in your database. All sensors included are used to compute ``phone_sensed_bins.csv`` (bins of time when the smartphone was sensing data). In turn, these bins are used to compute ``PHONE_VALID_SENSED_DAYS`` (see below), ``episodepersensedminutes`` feature of :ref:`Screen` and to resample fused location data if you configure Barnett's location features to use ``RESAMPLE_FUSED``. See TABLES_FOR_SENSED_BINS_ variable in ``config`` file (therefore, when you are extracting screen or Barnett's location features, screen and locations tables are mandatory). - .. _pid: -- ``PID`` - The list of participant ids to be included in the analysis. These should match the names of the files created in the ``data/external`` directory (:ref:`see more details`). +- ``PIDS`` - The list of participant ids to be included in the analysis. These should match the names of the files created in the ``data/external`` directory (:ref:`see more details`). .. _day-segments: @@ -52,19 +49,24 @@ Global Parameters - ``FIXED_TIMEZONE``. See ``TIMEZONE`` above. This assumes that all data of all participants was collected within one time zone. - Support for multiple time zones for each participant coming soon based on the ``timezone`` table collected by Aware. +- ``PHONE_VALID_SENSED_BINS`` + Contains three attributes: ``COMPUTE``, ``BIN_SIZE`` and ``TABLES``. See the PHONE_VALID_SENSED_BINS_ section in the ``config.yaml`` file + + Set the ``COMPUTE`` flag to True if you want to get this file (``data/interim/{pid}/phone_sensed_bins``). Phone valid sensed bins is a matrix of days x bins where we divide every hour of every day into N bins of size ``BIN_SIZE`` (in minutes). Each bin contains the number of rows that were recorded in that interval by all the sensors listed in ``TABLES``. Add as many sensor tables to ``TABLES`` as you have in your database because valid sensed bins are used to compute ``PHONE_VALID_SENSED_DAYS`` :ref:`PHONE_VALID_SENSED_BINS`, ``episodepersensedminutes`` feature of :ref:`Screen` and to resample fused location data if you configure Barnett's location features to use ``RESAMPLE_FUSED``. + + The ``COMPUTE`` flag is automatically ignored (set internally to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features. + .. _phone-valid-sensed-days: - ``PHONE_VALID_SENSED_DAYS``. - Contains three attributes: ``BIN_SIZE``, ``MIN_VALID_HOURS``, ``MIN_BINS_PER_HOUR``. + Contains three attributes: ``COMPUTE``, ``MIN_VALID_HOURS_PER_DAY``, ``MIN_VALID_BINS_PER_HOUR``. See the PHONE_VALID_SENSED_DAYS_ section in ``config.yaml``. - On any given day, Aware could have sensed data only for a few minutes or for 24 hours. Daily estimates of features should be considered more reliable the more hours Aware was running and logging data (for example, 10 calls logged on a day when only one hour of data was recorded is a less reliable feature compared to 10 calls on a day when 23 hours of data were recorded. + On any given day, Aware could have sensed data only for a few minutes or for 24 hours. Daily estimates of features should be considered more reliable the more hours Aware was running and logging data, for example, 10 calls logged on a day when only one hour of data was recorded is a less reliable feature compared to 10 calls on a day when 23 hours of data were recorded. - Therefore, we define a valid hour as those that contain a minimum number of valid bins. In turn, a valid bin are those that contain at least one row of data from any sensor logged within that period. We divide an hour into N bins of size ``BIN_SIZE`` (in minutes) and we mark an hour as valid if contains at least ``MIN_BINS_PER_HOUR`` (out of the total possible number of bins that can be captured in an hour based on their length i.e. 60min/``BIN_SIZE`` bins). Days with valid sensed hours less than ``MIN_VALID_HOURS`` will be excluded form the output of this file. See PHONE_VALID_SENSED_DAYS_ in ``config.yaml``. + Therefore, we define a valid hour as those that contain a minimum number of valid bins. A valid bin are those that contain at least one row of data from any sensor logged within that period (See ``PHONE_VALID_SENSED_BINS`` above). We mark an hour as valid if contains at least ``MIN_VALID_BINS_PER_HOUR`` (out of the total possible number of bins that can be captured in an hour based on their length i.e. 60min/``BIN_SIZE`` bins). In turn, we mark a day as valid if it has at least ``MIN_VALID_HOURS_PER_DAY``. - Note that RAPIDS *DOES NOT* filter your feature files automatically, you need to do this manually based on ``"data/interim/{pid}/phone_valid_sensed_days.csv"``. - - You can get access to every phone's sensed bins matrix (days x bins) in ``data/interim/{pid}/phone_sensed_bins.csv``. As mentioned above, RAPIDS uses this file to compute ``phone_valid_sensed_days.csv``, ``episodepersensedminutes`` feature of :ref:`Screen` and to resample fused location data if you configure Barnett's location features to use ``RESAMPLE_FUSED``. + Note that RAPIDS *DOES NOT* filter your feature files automatically, you need to do this manually based on ``"data/interim/{pid}/phone_valid_sensed_days.csv"``. .. _individual-sensor-settings: @@ -969,31 +971,31 @@ Active and sedentary bouts. If the step count per minute is smaller than ``THRES .. -------------------------Links ------------------------------------ .. -.. _TABLES_FOR_SENSED_BINS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L3 -.. _`Messages Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L35 +.. _PHONE_VALID_SENSED_BINS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L30 +.. _`Messages Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L43 .. _AWARE: https://awareframework.com/what-is-aware/ .. _`List of Timezones`: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones -.. _DAY_SEGMENTS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L10 -.. _PHONE_VALID_SENSED_DAYS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L61 -.. _`Call Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L45 -.. _`WiFi Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L169 -.. _`Bluetooth Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L81 -.. _`Accelerometer Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L115 -.. _`Applications Foreground Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L125 -.. _`Battery Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L95 -.. _`Activity Recognition Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L87 -.. _`Light Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L109 -.. _`Location (Barnett’s) Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L71 -.. _`Screen Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L101 -.. _`Fitbit: Sleep Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L162 +.. _DAY_SEGMENTS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L6 +.. _PHONE_VALID_SENSED_DAYS: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L37 +.. _`Call Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L53 +.. _`WiFi Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L172 +.. _`Bluetooth Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L84 +.. _`Accelerometer Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L118 +.. _`Applications Foreground Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L128 +.. _`Battery Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L98 +.. _`Activity Recognition Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L90 +.. _`Light Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L112 +.. _`Location (Barnett’s) Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L74 +.. _`Screen Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L104 +.. _`Fitbit: Sleep Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L165 .. _`version 1`: https://dev.fitbit.com/build/reference/web-api/sleep-v1/ .. _`version 1.2`: https://dev.fitbit.com/build/reference/web-api/sleep/ .. _`Conversation Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L191 .. _`this Fitbit forum post`: https://community.fitbit.com/t5/Alta/What-does-Restless-mean-in-sleep-tracking/td-p/2989011 .. _shortData: https://dev.fitbit.com/build/reference/web-api/sleep/#interpreting-the-sleep-stage-and-short-data -.. _`Fitbit: Heart Rate Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L138 -.. _`Fitbit: Steps Config Code`: https://github.com/carissalow/rapids/blob/29b04b0601b62379fbdb76de685f3328b8dde2a2/config.yaml#L145 +.. _`Fitbit: Heart Rate Config Code`: https://github.com/carissalow/rapids/blob/0c53fd275e628819cf79cf5b87006ce1ad9e597c/config.yaml#L141 +.. _`Fitbit: Steps Config Code`: https://github.com/carissalow/rapids/blob/29b04b0601b62379fbdb76de685f3328b8dde2a2/config.yaml#L148 .. _`Fitbit documentation`: https://help.fitbit.com/articles/en_US/Help_article/1565 -.. _top1global: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L108 +.. _top1global: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L136 .. _`Beiwe Summary Statistics`: http://wiki.beiwe.org/wiki/Summary_Statistics .. _`Pause-Flight Model`: https://academic.oup.com/biostatistics/advance-article/doi/10.1093/biostatistics/kxy059/5145908 diff --git a/docs/usage/faq.rst b/docs/usage/faq.rst index 44a28a51..30920b44 100644 --- a/docs/usage/faq.rst +++ b/docs/usage/faq.rst @@ -50,7 +50,7 @@ This is expected behavior. The advantage of using ``snakemake`` under the hood i Execution halted **Solution:** -Please make sure the sensors listed in ``TABLES_FOR_SENSED_BINS`` and each sensor section you activated in ``config.yaml`` match your database tables. +Please make sure the sensors listed in ``[PHONE_VALID_SENSED_BINS][TABLES]`` and each sensor section you activated in ``config.yaml`` match your database tables. diff --git a/docs/usage/snakemake_docs.rst b/docs/usage/snakemake_docs.rst index 8cd2e098..6051bf10 100644 --- a/docs/usage/snakemake_docs.rst +++ b/docs/usage/snakemake_docs.rst @@ -10,7 +10,7 @@ The ``config.yaml`` File RAPIDS configuration settings are defined in ``config.yaml`` (See `config.yaml`_). This is the only file that you need to understand in order to compute the features that RAPIDS ships with. -It has global settings like ``TABLES_FOR_SENSED_BINS``, ``PIDS``, ``DAY_SEGMENTS``, among others (see :ref:`global-sensor-doc` for more information). As well as per sensor settings, for example, for the :ref:`messages-sensor-doc`:: +It has global settings like ``PIDS``, ``DAY_SEGMENTS``, among others (see :ref:`global-sensor-doc` for more information). As well as per sensor settings, for example, for the :ref:`messages-sensor-doc`:: | ``MESSAGES:`` | ``COMPUTE: True`` diff --git a/rules/features.snakefile b/rules/features.snakefile index dd29fe9e..741802cc 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -157,7 +157,7 @@ rule screen_features: reference_hour_first_use = config["SCREEN"]["REFERENCE_HOUR_FIRST_USE"], features_deltas = config["SCREEN"]["FEATURES_DELTAS"], episode_types = config["SCREEN"]["EPISODE_TYPES"], - bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"] + bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"] output: "data/processed/{pid}/screen_{day_segment}.csv" script: diff --git a/rules/preprocessing.snakefile b/rules/preprocessing.snakefile index 0fd3ff6f..2f94156f 100644 --- a/rules/preprocessing.snakefile +++ b/rules/preprocessing.snakefile @@ -37,27 +37,27 @@ rule readable_datetime: script: "../src/data/readable_datetime.R" +rule phone_sensed_bins: + input: + all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["PHONE_VALID_SENSED_BINS"]["TABLES"]) + params: + bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"] + output: + "data/interim/{pid}/phone_sensed_bins.csv" + script: + "../src/data/phone_sensed_bins.R" + rule phone_valid_sensed_days: input: - all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["TABLES_FOR_SENSED_BINS"]) + phone_sensed_bins = "data/interim/{pid}/phone_sensed_bins.csv" params: - bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"], - min_valid_hours = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS"], - min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_BINS_PER_HOUR"] + min_valid_hours_per_day = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"], + min_valid_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"] output: "data/interim/{pid}/phone_valid_sensed_days.csv" script: "../src/data/phone_valid_sensed_days.R" -rule phone_sensed_bins: - input: - all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["TABLES_FOR_SENSED_BINS"]) - params: - bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"] - output: - "data/interim/{pid}/phone_sensed_bins.csv" - script: - "../src/data/phone_sensed_bins.R" rule unify_ios_android: input: @@ -76,7 +76,7 @@ rule resample_fused_location: locations = "data/raw/{pid}/{sensor}_raw.csv", phone_sensed_bins = rules.phone_sensed_bins.output params: - bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"], + bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"], timezone = config["RESAMPLE_FUSED_LOCATION"]["TIMEZONE"], consecutive_threshold = config["RESAMPLE_FUSED_LOCATION"]["CONSECUTIVE_THRESHOLD"], time_since_valid_location = config["RESAMPLE_FUSED_LOCATION"]["TIME_SINCE_VALID_LOCATION"] diff --git a/rules/reports.snakefile b/rules/reports.snakefile index e9c0d0f7..871dd2c6 100644 --- a/rules/reports.snakefile +++ b/rules/reports.snakefile @@ -5,7 +5,7 @@ rule heatmap_rows: params: table = "{sensor}", pid = "{pid}", - bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"] + bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"] output: "reports/figures/{pid}/{sensor}_heatmap_rows.html" script: @@ -17,7 +17,7 @@ rule compliance_heatmap: pid_file = "data/external/{pid}" params: pid = "{pid}", - bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"] + bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"] output: "reports/figures/{pid}/compliance_heatmap.html" script: @@ -30,8 +30,8 @@ rule overall_compliance_heatmap: pid_files = expand("data/external/{pid}", pid=config["PIDS"]) params: local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"], - bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"], - min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_BINS_PER_HOUR"] + bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"], + min_bins_per_hour = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"] output: "reports/figures/overall_compliance_heatmap.html" script: diff --git a/src/data/phone_valid_sensed_days.R b/src/data/phone_valid_sensed_days.R index 8488af76..9b13aaad 100644 --- a/src/data/phone_valid_sensed_days.R +++ b/src/data/phone_valid_sensed_days.R @@ -1,31 +1,20 @@ source("renv/activate.R") +library("dplyr") +library("tidyr") -library(dplyr) - -all_sensors <- snakemake@input[["all_sensors"]] -bin_size <- snakemake@params[["bin_size"]] -min_valid_hours <- snakemake@params[["min_valid_hours"]] -min_bins_per_hour <- snakemake@params[["min_bins_per_hour"]] +phone_sensed_bins <- read.csv(snakemake@input[["phone_sensed_bins"]]) +min_valid_hours_per_day <- snakemake@params[["min_valid_hours_per_day"]] +min_valid_bins_per_hour <- snakemake@params[["min_valid_bins_per_hour"]] output_file <- snakemake@output[[1]] -# Load all sensors and extract timestamps -all_sensor_data <- data.frame(timestamp = c()) -for(sensor in all_sensors){ - sensor_data <- read.csv(sensor, stringsAsFactors = F) %>% select(local_date, local_hour, local_minute) - all_sensor_data <- rbind(all_sensor_data, sensor_data) -} - -phone_valid_sensed_days <- all_sensor_data %>% - mutate(bin = (local_minute %/% bin_size) * bin_size) %>% # bin rows into bin_size-minute bins - group_by(local_date, local_hour, bin) %>% - summarise(minute_period = first(bin)) %>% #filter repeated bins (if rows were logged within bin_size minutes) - ungroup() %>% - group_by(local_date, local_hour) %>% - summarise(bins = n()) %>% # Count how many bins there are per hour - ungroup() %>% - filter(bins >= min_bins_per_hour) %>% # Discard those hours where there were fewer than min_bins_per_hour - group_by(local_date) %>% - summarise(valid_hours = n()) %>% # Count how many valid hours each day has - filter(valid_hours >= min_valid_hours) # Discard those days where there were fewer than min_valid_hours +phone_valid_sensed_days <- phone_sensed_bins %>% + pivot_longer(cols = -local_date, names_to = c("hour", "bin"), names_sep = "_") %>% + filter(value > 0) %>% + group_by(local_date, hour) %>% + summarise(valid_bins = n()) %>% + filter(valid_bins >= min_valid_bins_per_hour) %>% + group_by(local_date) %>% + summarise(valid_sensed_hours = n()) %>% + mutate(is_valid_sensed_day = ifelse(valid_sensed_hours >= min_valid_hours_per_day, TRUE, FALSE)) write.csv(phone_valid_sensed_days, output_file, row.names = FALSE) diff --git a/tests/Snakefile b/tests/Snakefile index d9757039..c96216c2 100644 --- a/tests/Snakefile +++ b/tests/Snakefile @@ -23,8 +23,8 @@ if config["CALLS"]["COMPUTE"]: files_to_compute.extend(expand("data/processed/{pid}/calls_{call_type}_{segment}.csv", pid=config["PIDS"], call_type=config["CALLS"]["TYPES"], segment = config["CALLS"]["DAY_SEGMENTS"])) if config["SCREEN"]["COMPUTE"]: - if config["SCREEN"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]: - raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)") + if config["SCREEN"]["DB_TABLE"] not in config["PHONE_VALID_SENSED_BINS"]["TABLES"]: + raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)") files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"])) files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"])) files_to_compute.extend(expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"])) diff --git a/tests/settings/testing_config.yaml b/tests/settings/testing_config.yaml index 27449c09..1f6057f7 100644 --- a/tests/settings/testing_config.yaml +++ b/tests/settings/testing_config.yaml @@ -1,7 +1,3 @@ -# Add as many sensor tables as you have, they all improve the computation of PHONE_SENSED_BINS. -# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory. -TABLES_FOR_SENSED_BINS: [messages, calls, screen, battery, bluetooth, wifi] - # Participants to include in the analysis # You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically PIDS: [test01, test02, test03, test04] @@ -11,6 +7,9 @@ PIDS: [test01, test02, test03, test04] DAY_SEGMENTS: &day_segments [daily, morning, afternoon, evening, night] +PHONE_VALID_SENSED_BINS: + TABLES: [messages, calls, screen, battery, bluetooth, wifi] + # Communication SMS features config, TYPES and FEATURES keys need to match MESSAGES: COMPUTE: True