Compare commits

..

No commits in common. "4485c4c95e6eddad00fb6b5221d2946930394970" and "b99a3c19edfc556765a64da642ce828033dacfcb" have entirely different histories.

9 changed files with 123 additions and 152 deletions

View File

@ -7,32 +7,32 @@ PIDS: [nokia_0000003]
# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
CREATE_PARTICIPANT_FILES:
USERNAMES_CSV: "data/external/main_study_usernames.csv"
CSV_FILE_PATH: "data/external/main_study_participants.csv" # see docs for required format
USERNAMES_CSV: "data/external/example_usernames.csv"
CSV_FILE_PATH: "data/external/example_participants.csv" # see docs for required format
PHONE_SECTION:
ADD: True
IGNORED_DEVICE_IDS: []
FITBIT_SECTION:
ADD: False
ADD: True
IGNORED_DEVICE_IDS: []
EMPATICA_SECTION:
ADD: False
ADD: True
IGNORED_DEVICE_IDS: []
# See https://www.rapids.science/latest/setup/configuration/#time-segments
TIME_SEGMENTS: &time_segments
TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT
FILE: "data/external/timesegments_daily.csv"
INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
FILE: "data/external/timesegments_periodic.csv"
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
TIMEZONE:
TYPE: MULTIPLE
TYPE: SINGLE
SINGLE:
TZCODE: Europe/Ljubljana
MULTIPLE:
TZCODES_FILE: data/external/multiple_timezones.csv
IF_MISSING_TZCODE: USE_DEFAULT
TZCODES_FILE: data/external/multiple_timezones_example.csv
IF_MISSING_TZCODE: STOP
DEFAULT_TZCODE: Europe/Ljubljana
FITBIT:
ALLOW_MULTIPLE_TZ_PER_DEVICE: False
@ -66,12 +66,12 @@ PHONE_ACCELEROMETER:
CONTAINER: accelerometer
PROVIDERS:
RAPIDS:
COMPUTE: True
COMPUTE: False
FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
SRC_SCRIPT: src/features/phone_accelerometer/rapids/main.py
PANDA:
COMPUTE: True
COMPUTE: False
VALID_SENSED_MINUTES: False
FEATURES:
exertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
@ -81,12 +81,12 @@ PHONE_ACCELEROMETER:
# See https://www.rapids.science/latest/features/phone-activity-recognition/
PHONE_ACTIVITY_RECOGNITION:
CONTAINER:
ANDROID: google_ar
ANDROID: plugin_google_activity_recognition
IOS: plugin_ios_activity_recognition
EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same AR episode.
PROVIDERS:
RAPIDS:
COMPUTE: True
COMPUTE: False
FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"]
ACTIVITY_CLASSES:
STATIONARY: ["still", "tilting"]
@ -106,7 +106,7 @@ PHONE_APPLICATIONS_CRASHES:
# See https://www.rapids.science/latest/features/phone-applications-foreground/
PHONE_APPLICATIONS_FOREGROUND:
CONTAINER: applications
CONTAINER: applications_foreground
APPLICATION_CATEGORIES:
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
@ -114,8 +114,8 @@ PHONE_APPLICATIONS_FOREGROUND:
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PROVIDERS:
RAPIDS:
COMPUTE: True
INCLUDE_EPISODE_FEATURES: True
COMPUTE: False
INCLUDE_EPISODE_FEATURES: False
SINGLE_CATEGORIES: ["all", "email"]
MULTIPLE_CATEGORIES:
social: ["socialnetworks", "socialmediatools"]
@ -125,7 +125,7 @@ PHONE_APPLICATIONS_FOREGROUND:
dating: ["com.tinder", "com.relance.happycouple", "com.kiwi.joyride"]
SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps
EXCLUDED_CATEGORIES: []
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"] # TODO list system apps?
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
FEATURES:
APP_EVENTS: ["countevent", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
APP_EPISODES: ["countepisode", "minduration", "maxduration", "meanduration", "sumduration"]
@ -135,7 +135,7 @@ PHONE_APPLICATIONS_FOREGROUND:
# See https://www.rapids.science/latest/features/phone-applications-notifications/
PHONE_APPLICATIONS_NOTIFICATIONS:
CONTAINER: notifications
CONTAINER: applications_notifications
APPLICATION_CATEGORIES:
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
@ -149,7 +149,7 @@ PHONE_BATTERY:
EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
PROVIDERS:
RAPIDS:
COMPUTE: True
COMPUTE: False
FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
SRC_SCRIPT: src/features/phone_battery/rapids/main.py
@ -158,12 +158,12 @@ PHONE_BLUETOOTH:
CONTAINER: bluetooth
PROVIDERS:
RAPIDS:
COMPUTE: True
COMPUTE: False
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
SRC_SCRIPT: src/features/phone_bluetooth/rapids/main.R
DORYAB:
COMPUTE: True
COMPUTE: False
FEATURES:
ALL:
DEVICES: ["countscans", "uniquedevices", "meanscans", "stdscans"]
@ -194,7 +194,7 @@ PHONE_CALLS:
SRC_SCRIPT: src/features/phone_calls/rapids/main.R
# See https://www.rapids.science/latest/features/phone-conversation/
PHONE_CONVERSATION: # TODO Adapt for speech
PHONE_CONVERSATION:
CONTAINER:
ANDROID: plugin_studentlife_audio_android
IOS: plugin_studentlife_audio
@ -213,21 +213,10 @@ PHONE_CONVERSATION: # TODO Adapt for speech
# See https://www.rapids.science/latest/features/phone-data-yield/
PHONE_DATA_YIELD:
SENSORS: [PHONE_ACCELEROMETER,
PHONE_ACTIVITY_RECOGNITION,
PHONE_APPLICATIONS_FOREGROUND,
PHONE_APPLICATIONS_NOTIFICATIONS,
PHONE_BATTERY,
PHONE_BLUETOOTH,
PHONE_CALLS,
PHONE_LIGHT,
PHONE_LOCATIONS,
PHONE_MESSAGES,
PHONE_SCREEN,
PHONE_WIFI_VISIBLE]
SENSORS: []
PROVIDERS:
RAPIDS:
COMPUTE: True
COMPUTE: False
FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours]
MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1, minimum percentage of valid minutes in an hour to be considered valid.
SRC_SCRIPT: src/features/phone_data_yield/rapids/main.R
@ -243,10 +232,10 @@ PHONE_KEYBOARD:
# See https://www.rapids.science/latest/features/phone-light/
PHONE_LIGHT:
CONTAINER: light_sensor
CONTAINER: light
PROVIDERS:
RAPIDS:
COMPUTE: True
COMPUTE: False
FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
SRC_SCRIPT: src/features/phone_light/rapids/main.py
@ -260,7 +249,7 @@ PHONE_LOCATIONS:
PROVIDERS:
DORYAB:
COMPUTE: True
COMPUTE: False
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome", "homelabel"]
DBSCAN_EPS: 100 # meters
DBSCAN_MINSAMPLES: 5
@ -275,7 +264,7 @@ PHONE_LOCATIONS:
SRC_SCRIPT: src/features/phone_locations/doryab/main.py
BARNETT:
COMPUTE: True
COMPUTE: False
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
@ -290,10 +279,10 @@ PHONE_LOG:
# See https://www.rapids.science/latest/features/phone-messages/
PHONE_MESSAGES:
CONTAINER: sms
CONTAINER: messages
PROVIDERS:
RAPIDS:
COMPUTE: True
COMPUTE: False
MESSAGES_TYPES : [received, sent]
FEATURES:
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
@ -305,7 +294,7 @@ PHONE_SCREEN:
CONTAINER: screen
PROVIDERS:
RAPIDS:
COMPUTE: True
COMPUTE: False
REFERENCE_HOUR_FIRST_USE: 0
IGNORE_EPISODES_SHORTER_THAN: 0 # in minutes, set to 0 to disable
IGNORE_EPISODES_LONGER_THAN: 360 # in minutes, set to 0 to disable
@ -327,7 +316,7 @@ PHONE_WIFI_VISIBLE:
CONTAINER: wifi
PROVIDERS:
RAPIDS:
COMPUTE: True
COMPUTE: False
FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
SRC_SCRIPT: src/features/phone_wifi_visible/rapids/main.R
@ -554,16 +543,16 @@ EMPATICA_TAGS:
# See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#1-histograms-of-phone-data-yield
HISTOGRAM_PHONE_DATA_YIELD:
PLOT: True
PLOT: False
# See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#2-heatmaps-of-overall-data-yield
HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT:
PLOT: True
PLOT: False
TIME: RELATIVE_TIME # ABSOLUTE_TIME or RELATIVE_TIME
# See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#3-heatmap-of-recorded-phone-sensors
HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT:
PLOT: True
PLOT: False
# See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#4-heatmap-of-sensor-row-count
HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT:
@ -574,7 +563,7 @@ HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT:
# See https://www.rapids.science/latest/visualizations/feature-visualizations/#1-heatmap-correlation-matrix
HEATMAP_FEATURE_CORRELATION_MATRIX:
PLOT: True
PLOT: False
MIN_ROWS_RATIO: 0.5
CORR_THRESHOLD: 0.1
CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}

View File

@ -1,2 +0,0 @@
label,start_time,length,repeats_on,repeats_value
daily,03:59:59,23H 59M 59S,every_day,0
1 label start_time length repeats_on repeats_value
2 daily 03:59:59 23H 59M 59S every_day 0

View File

@ -1,116 +1,112 @@
name: rapids
name: rapids202108
channels:
- conda-forge
- defaults
dependencies:
- _py-xgboost-mutex=2.0
- appdirs=1.4.*
- appdirs=1.4.4
- arrow=0.16.0
- asn1crypto=1.4.*
- astropy=4.2.*
- attrs=20.3.*
- binaryornot=0.4.*
- asn1crypto=1.4.0
- astropy=4.2.1
- attrs=20.3.0
- binaryornot=0.4.4
- blas=1.0
- brotlipy=0.7.*
- bzip2=1.0.*
- ca-certificates
- certifi
- brotlipy=0.7.0
- bzip2=1.0.8
- ca-certificates=2021.7.5
- certifi=2021.5.30
- cffi=1.14.4
- chardet=3.0.*
- click=7.1.*
- cookiecutter=1.6.*
- cryptography=3.3.*
- datrie=0.8.*
- chardet=3.0.4
- click=7.1.2
- cookiecutter=1.6.0
- cryptography=3.3.1
- datrie=0.8.2
- docutils=0.16
- future=0.18.2
- gitdb=4.0.*
- gitdb2=4.0.*
- gitpython=3.1.*
- gitdb=4.0.5
- gitdb2=4.0.2
- gitpython=3.1.11
- idna=2.10
- imbalanced-learn=0.6.*
- importlib-metadata=2.0.*
- importlib_metadata=2.0.*
- imbalanced-learn=0.6.2
- importlib-metadata=2.0.0
- importlib_metadata=2.0.0
- intel-openmp=2019.4
- jinja2=2.11.2
- jinja2-time=0.2.*
- joblib=1.0.*
- jsonschema=3.2.*
- libblas=3.8.*
- libcblas=3.8.*
- libcxx=10.0.*
- libedit=3.1.*
- jinja2-time=0.2.0
- joblib=1.0.0
- jsonschema=3.2.0
- libblas=3.8.0
- libcblas=3.8.0
- libcxx=10.0.0
- libedit=3.1.20191231
- libffi=3.3
- libgfortran
- liblapack=3.8.*
- libopenblas=0.3.*
- liblapack=3.8.0
- libopenblas=0.3.10
- libxgboost=0.90
- lightgbm=3.1.*
- llvm-openmp=10.0.*
- markupsafe=1.1.*
- lightgbm=3.1.1
- llvm-openmp=10.0.0
- markupsafe=1.1.1
- mkl
- mkl-service=2.3.*
- mkl_fft=1.2.*
- mkl_random=1.1.*
- more-itertools=8.6.*
- mkl-service=2.3.0
- mkl_fft=1.2.0
- mkl_random=1.1.1
- more-itertools=8.6.0
- ncurses=6.2
- numpy=1.19.2
- numpy-base=1.19.2
- openblas=0.3.*
- openssl
- pandas=1.1.*
- pbr=5.5.*
- pip=20.3.*
- openblas=0.3.4
- openssl=1.1.1k
- pandas=1.1.5
- pbr=5.5.1
- pip=20.3.3
- plotly=4.14.1
- poyo=0.5.*
- psutil=5.7.*
- psycopg2
- poyo=0.5.0
- psutil=5.7.2
- py-xgboost=0.90
- pycparser=2.20
- pyerfa=1.7.*
- pyopenssl=20.0.*
- pyprojroot
- pysocks=1.7.*
- python=3.7.*
- python-dateutil=2.8.*
- python-dotenv
- pyerfa=1.7.1.1
- pyopenssl=20.0.1
- pysocks=1.7.1
- python=3.7.9
- python-dateutil=2.8.1
- python_abi=3.7
- pytz=2020.4
- pyyaml=5.3.*
- pyyaml=5.3.1
- readline=8.0
- requests=2.25.0
- retrying=1.3.*
- retrying=1.3.3
- scikit-learn=0.23.2
- scipy=1.5.*
- setuptools=51.0.*
- scipy=1.5.2
- setuptools=51.0.0
- six=1.15.0
- smmap=3.0.*
- smmap2=3.0.*
- sqlalchemy
- smmap=3.0.4
- smmap2=3.0.1
- sqlite=3.33.0
- threadpoolctl=2.1.*
- tk=8.6.*
- threadpoolctl=2.1.0
- tk=8.6.10
- tqdm=4.62.0
- urllib3=1.25.11
- wheel=0.36.2
- whichcraft=0.6.*
- whichcraft=0.6.1
- wrapt=1.12.1
- xgboost=0.90
- xz=5.2.*
- yaml=0.2.*
- zipp=3.4.*
- zlib=1.2.*
- xz=5.2.5
- yaml=0.2.5
- zipp=3.4.0
- zlib=1.2.11
- pip:
- amply==0.1.*
- amply==0.1.4
- configargparse==0.15.1
- decorator==4.4.*
- ipython-genutils==0.2.*
- jupyter-core==4.6.*
- nbformat==5.0.*
- decorator==4.4.2
- ipython-genutils==0.2.0
- jupyter-core==4.6.3
- nbformat==5.0.7
- pulp==2.4
- pyparsing==2.4.*
- pyparsing==2.4.7
- pyrsistent==0.15.5
- ratelimiter==1.2.*
- ratelimiter==1.2.0.post0
- snakemake==5.30.2
- toposort==1.5
- traitlets==4.3.*
- traitlets==4.3.3
prefix: /usr/local/Caskroom/miniconda/base/envs/rapids202108

View File

@ -4,23 +4,6 @@ rule create_example_participant_files:
shell:
"echo 'PHONE:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n PLATFORMS: [android]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example01.yaml && echo 'PHONE:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n PLATFORMS: [ios]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example02.yaml"
rule query_usernames_device_empatica_ids:
params:
baseline_folder = "/mnt/e/STRAWbaseline/"
output:
usernames_file = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"],
timezone_file = "data/external/timezone.csv"
script:
"../../participants/prepare_usernames_file.py"
rule prepare_tzcodes_file:
input:
timezone_file = "data/external/timezone.csv"
output:
tzcodes_file = config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]
script:
"../tools/create_multi_timezones_file.py"
rule prepare_participants_csv:
input:
username_list = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"]

View File

@ -1,6 +1,6 @@
source("renv/activate.R")
#library(RMariaDB)
library(RMariaDB)
library(stringr)
library(purrr)
library(readr)

View File

@ -121,7 +121,7 @@ pull_participants_ids <- function(stream_parameters, usernames, participants_con
filter(username %in% usernames) %>%
select(username, id)
message(paste0("Executing the following query to get participants' IDs: \n", sql_render(query_participant_id)))
message(paste0("Executing the following query to get the participant's id: \n", sql_render(query_participant_id)))
participant_data <- query_participant_id %>% collect()
@ -149,13 +149,7 @@ pull_participants_device_ids <- function(stream_parameters, participants_ids, de
group_by(participant_id) %>%
distinct(device_id, .keep_all = FALSE)
message(
paste0(
"Executing the following query to get the distinct device IDs: \n",
sql_render(query_device_id),
"\n NOTE: This might take a long time."
)
)
message(paste0("Executing the following query to get the distinct device IDs: \n", sql_render(query_device_id)))
device_ids <- query_device_id %>% collect()

View File

@ -66,7 +66,8 @@ PHONE_APPLICATIONS_FOREGROUND:
RAPIDS_COLUMN_MAPPINGS:
TIMESTAMP: timestamp
DEVICE_ID: device_id
PACKAGE_NAME: package_hash
PACKAGE_NAME: package_name
APPLICATION_NAME: application_name
IS_SYSTEM_APP: is_system_app
MUTATION:
COLUMN_MAPPINGS:
@ -77,9 +78,13 @@ PHONE_APPLICATIONS_NOTIFICATIONS:
RAPIDS_COLUMN_MAPPINGS:
TIMESTAMP: timestamp
DEVICE_ID: device_id
PACKAGE_NAME: package_hash
PACKAGE_NAME: package_name
APPLICATION_NAME: application_name
TEXT: text
SOUND: sound
VIBRATE: vibrate
DEFAULTS: defaults
FLAGS: flags
MUTATION:
COLUMN_MAPPINGS:
SCRIPTS: # List any python or r scripts that mutate your raw data

View File

@ -27,14 +27,19 @@ PHONE_APPLICATIONS_FOREGROUND:
- TIMESTAMP
- DEVICE_ID
- PACKAGE_NAME
- APPLICATION_NAME
- IS_SYSTEM_APP
PHONE_APPLICATIONS_NOTIFICATIONS:
- TIMESTAMP
- DEVICE_ID
- PACKAGE_NAME
- APPLICATION_NAME
- TEXT
- SOUND
- VIBRATE
- DEFAULTS
- FLAGS
PHONE_BATTERY:
- TIMESTAMP

View File

@ -21,7 +21,7 @@ prepare_participants_file <- function() {
pid_format <- "p%03d"
datetime_format <- "%Y-%m-%d %H:%M:%S"
participant_data <- read_csv(username_list_csv_location, col_types = "cc", progress = FALSE)
participant_data <- read_csv(username_list_csv_location, col_types = "c", progress = FALSE)
usernames <- participant_data$label
participant_ids <- pull_participants_ids(data_configuration, usernames, participants_container)
@ -45,6 +45,7 @@ prepare_participants_file <- function() {
pid = sprintf(pid_format, participant_id),
start_date = strftime(datetime_start, format=datetime_format, tz = "UTC", usetz = FALSE), #TODO Check what timezone is expected
end_date = strftime(datetime_end, format=datetime_format, tz = "UTC", usetz = FALSE),
empatica_id = "placeholder", #TODO Provide in file?
device_id = map_chr(device_ids, str_c, collapse = ";"),
number_of_devices = map_int(device_ids, length),
fitbit_id = ""