Compare commits
66 Commits
Author | SHA1 | Date |
---|---|---|
junos | bb1e1900a4 | |
junos | c35c75682f | |
Primoz | 936324d234 | |
Primoz | da0a4596f8 | |
Primoz | d4d74818e6 | |
Primoz | 14ff59914b | |
Primoz | 6ab0ac5329 | |
Primoz | b92a3aa37a | |
Primoz | bfd637eb9c | |
Primoz | 0d81ad5756 | |
Primoz | cea451d344 | |
Primoz | e88bbd548f | |
Primoz | cf38d9f175 | |
Primoz | f3ca56cdbf | |
Primoz | 797aa98f4f | |
Primoz | 9baff159cd | |
Primoz | 0f21273508 | |
Primoz | 55517eb737 | |
Primoz | de15a52dba | |
Primoz | 1ad25bb572 | |
Primoz | 9884b383cf | |
Primoz | 2dc89c083c | |
Primoz | 001d400729 | |
Primoz | 1e38d9bf1e | |
Primoz | a34412a18d | |
Primoz | 437459648f | |
Primoz | 53f6cc60d5 | |
Primoz | bbeabeee6f | |
Primoz | 44531c6d94 | |
Primoz | 7ac7cd5a37 | |
Primoz | 68fd69dada | |
Primoz | a4f0d056a0 | |
Primoz | 6286e7a44c | |
Primoz | 9b3447febd | |
Primoz | d6adda30cf | |
Primoz | 8af4ef11dc | |
Primoz | 536b9494cd | |
Primoz | f0b87c9dd0 | |
Primoz | 7fcdb873fe | |
Primoz | 5c7bb0f4c1 | |
Primoz | bd53dc1684 | |
Primoz | d9a574c550 | |
Primoz | 19aa8707c0 | |
Primoz | 247d758cb7 | |
Primoz | 90ee99e4b9 | |
Primoz | 7493aaa643 | |
Primoz | eaf4340afd | |
Primoz | a96ea508c6 | |
Primoz | 52e11cdcab | |
Primoz | 92aff93e65 | |
Primoz | 18b63127de | |
Primoz | 62982866cd | |
Primoz | 0ce6da5444 | |
Primoz | e3b78c8a85 | |
Primoz | 7d85f75d21 | |
Primoz | 385e21409d | |
Primoz | 18002f59e1 | |
Primoz | 3cf7ca41aa | |
Primoz | d5ab5a0394 | |
Primoz | dfbb758902 | |
Primoz | 4ec371ed96 | |
Primoz | d27a4a71c8 | |
Primoz | 15d792089d | |
Primoz | cb351e0ff6 | |
Primoz | 86299d346b | |
Primoz | 3f7ec80c18 |
|
@ -100,9 +100,6 @@ data/external/*
|
||||||
!/data/external/wiki_tz.csv
|
!/data/external/wiki_tz.csv
|
||||||
!/data/external/main_study_usernames.csv
|
!/data/external/main_study_usernames.csv
|
||||||
!/data/external/timezone.csv
|
!/data/external/timezone.csv
|
||||||
!/data/external/play_store_application_genre_catalogue.csv
|
|
||||||
!/data/external/play_store_categories_count.csv
|
|
||||||
|
|
||||||
|
|
||||||
data/raw/*
|
data/raw/*
|
||||||
!/data/raw/.gitkeep
|
!/data/raw/.gitkeep
|
||||||
|
|
126
README.md
126
README.md
|
@ -16,7 +16,7 @@ By [MoSHI](https://www.moshi.pitt.edu/), [University of Pittsburgh](https://www.
|
||||||
|
|
||||||
For RAPIDS installation refer to to the [documentation](https://www.rapids.science/1.8/setup/installation/)
|
For RAPIDS installation refer to to the [documentation](https://www.rapids.science/1.8/setup/installation/)
|
||||||
|
|
||||||
### For the installation of the Docker version
|
## For the installation of the Docker version
|
||||||
|
|
||||||
1. Follow the [instructions](https://www.rapids.science/1.8/setup/installation/) to setup RAPIDS via Docker (from scratch).
|
1. Follow the [instructions](https://www.rapids.science/1.8/setup/installation/) to setup RAPIDS via Docker (from scratch).
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@ Type R to go to the interactive R session and then:
|
||||||
```
|
```
|
||||||
|
|
||||||
6. Install cr-features module
|
6. Install cr-features module
|
||||||
From: https://repo.ijs.si/matjazbostic/calculatingfeatures.git -> branch master.
|
From: https://repo.ijs.si/matjazbostic/calculatingfeatures.git -> branch modifications_for_rapids.
|
||||||
Then follow the "cr-features module" section below.
|
Then follow the "cr-features module" section below.
|
||||||
|
|
||||||
7. Install all required packages from environment.yml, prune also deletes conda packages not present in environment file.
|
7. Install all required packages from environment.yml, prune also deletes conda packages not present in environment file.
|
||||||
|
@ -62,7 +62,7 @@ Then follow the "cr-features module" section below.
|
||||||
conda env export --no-builds | sed 's/^.*libgfortran.*$/ - libgfortran/' | sed 's/^.*mkl=.*$/ - mkl/' > environment.yml
|
conda env export --no-builds | sed 's/^.*libgfortran.*$/ - libgfortran/' | sed 's/^.*mkl=.*$/ - mkl/' > environment.yml
|
||||||
```
|
```
|
||||||
|
|
||||||
### cr-features module
|
## cr-features module
|
||||||
|
|
||||||
This RAPIDS extension uses cr-features library accessible [here](https://repo.ijs.si/matjazbostic/calculatingfeatures).
|
This RAPIDS extension uses cr-features library accessible [here](https://repo.ijs.si/matjazbostic/calculatingfeatures).
|
||||||
|
|
||||||
|
@ -79,123 +79,3 @@ To use cr-features library:
|
||||||
cr-features package has to be built and installed everytime to get the newest version.
|
cr-features package has to be built and installed everytime to get the newest version.
|
||||||
Or an the newest version of the docker image must be used.
|
Or an the newest version of the docker image must be used.
|
||||||
```
|
```
|
||||||
|
|
||||||
## Updating RAPIDS
|
|
||||||
|
|
||||||
To update RAPIDS, first pull and merge [origin]( https://github.com/carissalow/rapids), such as with:
|
|
||||||
|
|
||||||
```commandline
|
|
||||||
git fetch --progress "origin" refs/heads/master
|
|
||||||
git merge --no-ff origin/master
|
|
||||||
```
|
|
||||||
|
|
||||||
Next, update the conda and R virtual environment.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
R -e 'renv::restore(repos = c(CRAN = "https://packagemanager.rstudio.com/all/__linux__/focal/latest"))'
|
|
||||||
```
|
|
||||||
|
|
||||||
## Custom configuration
|
|
||||||
### Credentials
|
|
||||||
|
|
||||||
As mentioned under [Database in RAPIDS documentation](https://www.rapids.science/1.6/snippets/database/), a `credentials.yaml` file is needed to connect to a database.
|
|
||||||
It should contain:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
PSQL_STRAW:
|
|
||||||
database: staw
|
|
||||||
host: 212.235.208.113
|
|
||||||
password: password
|
|
||||||
port: 5432
|
|
||||||
user: staw_db
|
|
||||||
```
|
|
||||||
|
|
||||||
where`password` needs to be specified as well.
|
|
||||||
|
|
||||||
## Possible installation issues
|
|
||||||
### Missing dependencies for RPostgres
|
|
||||||
|
|
||||||
To install `RPostgres` R package (used to connect to the PostgreSQL database), an error might occur:
|
|
||||||
|
|
||||||
```text
|
|
||||||
------------------------- ANTICONF ERROR ---------------------------
|
|
||||||
Configuration failed because libpq was not found. Try installing:
|
|
||||||
* deb: libpq-dev (Debian, Ubuntu, etc)
|
|
||||||
* rpm: postgresql-devel (Fedora, EPEL)
|
|
||||||
* rpm: postgreql8-devel, psstgresql92-devel, postgresql93-devel, or postgresql94-devel (Amazon Linux)
|
|
||||||
* csw: postgresql_dev (Solaris)
|
|
||||||
* brew: libpq (OSX)
|
|
||||||
If libpq is already installed, check that either:
|
|
||||||
(i) 'pkg-config' is in your PATH AND PKG_CONFIG_PATH contains a libpq.pc file; or
|
|
||||||
(ii) 'pg_config' is in your PATH.
|
|
||||||
If neither can detect , you can set INCLUDE_DIR
|
|
||||||
and LIB_DIR manually via:
|
|
||||||
R CMD INSTALL --configure-vars='INCLUDE_DIR=... LIB_DIR=...'
|
|
||||||
--------------------------[ ERROR MESSAGE ]----------------------------
|
|
||||||
<stdin>:1:10: fatal error: libpq-fe.h: No such file or directory
|
|
||||||
compilation terminated.
|
|
||||||
```
|
|
||||||
|
|
||||||
The library requires `libpq` for compiling from source, so install accordingly.
|
|
||||||
|
|
||||||
### Timezone environment variable for tidyverse (relevant for WSL2)
|
|
||||||
|
|
||||||
One of the R packages, `tidyverse` might need access to the `TZ` environment variable during the installation.
|
|
||||||
On Ubuntu 20.04 on WSL2 this triggers the following error:
|
|
||||||
|
|
||||||
```text
|
|
||||||
> install.packages('tidyverse')
|
|
||||||
|
|
||||||
ERROR: configuration failed for package ‘xml2’
|
|
||||||
System has not been booted with systemd as init system (PID 1). Can't operate.
|
|
||||||
Failed to create bus connection: Host is down
|
|
||||||
Warning in system("timedatectl", intern = TRUE) :
|
|
||||||
running command 'timedatectl' had status 1
|
|
||||||
Error in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]) :
|
|
||||||
namespace ‘xml2’ 1.3.1 is already loaded, but >= 1.3.2 is required
|
|
||||||
Calls: <Anonymous> ... namespaceImportFrom -> asNamespace -> loadNamespace
|
|
||||||
Execution halted
|
|
||||||
ERROR: lazy loading failed for package ‘tidyverse’
|
|
||||||
```
|
|
||||||
|
|
||||||
This happens because WSL2 does not use the `timedatectl` service, which provides this variable.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
~$ timedatectl
|
|
||||||
System has not been booted with systemd as init system (PID 1). Can't operate.
|
|
||||||
Failed to create bus connection: Host is down
|
|
||||||
```
|
|
||||||
|
|
||||||
and later
|
|
||||||
|
|
||||||
```bash
|
|
||||||
Warning message:
|
|
||||||
In system("timedatectl", intern = TRUE) :
|
|
||||||
running command 'timedatectl' had status 1
|
|
||||||
Execution halted
|
|
||||||
```
|
|
||||||
|
|
||||||
This can be amended by setting the environment variable manually before attempting to install `tidyverse`:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export TZ='Europe/Ljubljana'
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: if this is needed to avoid runtime issues, you need to either define this environment variable in each new terminal window or (better) define it in your `~/.bashrc` or `~/.bash_profile`.
|
|
||||||
|
|
||||||
## Possible runtime issues
|
|
||||||
### Unix end of line characters
|
|
||||||
|
|
||||||
Upon running rapids, an error might occur:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
/usr/bin/env: ‘python3\r’: No such file or directory
|
|
||||||
```
|
|
||||||
|
|
||||||
This is due to Windows style end of line characters.
|
|
||||||
To amend this, I added a `.gitattributes` files to force `git` to checkout `rapids` using Unix EOL characters.
|
|
||||||
If this still fails, `dos2unix` can be used to change them.
|
|
||||||
|
|
||||||
### System has not been booted with systemd as init system (PID 1)
|
|
||||||
|
|
||||||
See [the installation issue above](#Timezone-environment-variable-for-tidyverse-(relevant-for-WSL2)).
|
|
||||||
|
|
|
@ -174,15 +174,6 @@ for provider in config["PHONE_ESM"]["PROVIDERS"].keys():
|
||||||
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv",pid=config["PIDS"]))
|
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv",pid=config["PIDS"]))
|
||||||
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||||
|
|
||||||
for provider in config["PHONE_SPEECH"]["PROVIDERS"].keys():
|
|
||||||
if config["PHONE_SPEECH"]["PROVIDERS"][provider]["COMPUTE"]:
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_speech_raw.csv",pid=config["PIDS"]))
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_speech_with_datetime.csv",pid=config["PIDS"]))
|
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_speech_features/phone_speech_{language}_{provider_key}.csv",pid=config["PIDS"],language=get_script_language(config["PHONE_SPEECH"]["PROVIDERS"][provider]["SRC_SCRIPT"]),provider_key=provider.lower()))
|
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/phone_speech.csv", pid=config["PIDS"]))
|
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
|
||||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
|
||||||
|
|
||||||
# We can delete these if's as soon as we add feature PROVIDERS to any of these sensors
|
# We can delete these if's as soon as we add feature PROVIDERS to any of these sensors
|
||||||
if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict):
|
if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict):
|
||||||
for provider in config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"].keys():
|
for provider in config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"].keys():
|
||||||
|
|
63
config.yaml
63
config.yaml
|
@ -3,7 +3,7 @@
|
||||||
########################################################################################################################
|
########################################################################################################################
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/setup/configuration/#participant-files
|
# See https://www.rapids.science/latest/setup/configuration/#participant-files
|
||||||
PIDS: ['p031', 'p032', 'p033', 'p034', 'p035', 'p036', 'p037', 'p038', 'p039', 'p040', 'p042', 'p043', 'p044', 'p045', 'p046', 'p049', 'p050', 'p052', 'p053', 'p054', 'p055', 'p057', 'p058', 'p059', 'p060', 'p061', 'p062', 'p064', 'p067', 'p068', 'p069', 'p070', 'p071', 'p072', 'p073', 'p074', 'p075', 'p076', 'p077', 'p078', 'p079', 'p080', 'p081', 'p082', 'p083', 'p084', 'p085', 'p086', 'p088', 'p089', 'p090', 'p091', 'p092', 'p093', 'p106', 'p107']
|
PIDS: ['p03'] #['p031', 'p032', 'p033', 'p034', 'p035', 'p036', 'p037', 'p038', 'p039', 'p040', 'p042', 'p043', 'p044', 'p045', 'p046', 'p049', 'p050', 'p052', 'p053', 'p054', 'p055', 'p057', 'p058', 'p059', 'p060', 'p061', 'p062', 'p064', 'p067', 'p068', 'p069', 'p070', 'p071', 'p072', 'p073', 'p074', 'p075', 'p076', 'p077', 'p078', 'p079', 'p080', 'p081', 'p082', 'p083', 'p084', 'p085', 'p086', 'p088', 'p089', 'p090', 'p091', 'p092', 'p093', 'p106', 'p107']
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
|
# See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files
|
||||||
CREATE_PARTICIPANT_FILES:
|
CREATE_PARTICIPANT_FILES:
|
||||||
|
@ -26,9 +26,7 @@ TIME_SEGMENTS: &time_segments
|
||||||
INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
|
INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
|
||||||
TAILORED_EVENTS: # Only relevant if TYPE=EVENT
|
TAILORED_EVENTS: # Only relevant if TYPE=EVENT
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
SEGMENTING_METHOD: "30_before" # 30_before, 90_before, stress_event
|
PARAMETER_ONE: "something"
|
||||||
INTERVAL_OF_INTEREST: 10 # duration of event of interest [minutes]
|
|
||||||
IOI_ERROR_TOLERANCE: 5 # interval of interest erorr tolerance (before and after IOI) [minutes]
|
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
|
# See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study
|
||||||
TIMEZONE:
|
TIMEZONE:
|
||||||
|
@ -104,9 +102,9 @@ PHONE_APPLICATIONS_CRASHES:
|
||||||
CONTAINER: applications_crashes
|
CONTAINER: applications_crashes
|
||||||
APPLICATION_CATEGORIES:
|
APPLICATION_CATEGORIES:
|
||||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||||
CATALOGUE_FILE: "data/external/play_store_application_genre_catalogue.csv"
|
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||||
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||||
SCRAPE_MISSING_CATEGORIES: False # whether to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||||
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/features/phone-applications-foreground/
|
# See https://www.rapids.science/latest/features/phone-applications-foreground/
|
||||||
|
@ -114,32 +112,24 @@ PHONE_APPLICATIONS_FOREGROUND:
|
||||||
CONTAINER: applications
|
CONTAINER: applications
|
||||||
APPLICATION_CATEGORIES:
|
APPLICATION_CATEGORIES:
|
||||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||||
CATALOGUE_FILE: "data/external/play_store_application_genre_catalogue.csv"
|
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||||
# Refer to data/external/play_store_categories_count.csv for a list of categories (genres) and their frequency.
|
PACKAGE_NAMES_HASHED: True
|
||||||
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||||
SCRAPE_MISSING_CATEGORIES: False # whether to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
INCLUDE_EPISODE_FEATURES: True
|
INCLUDE_EPISODE_FEATURES: True
|
||||||
SINGLE_CATEGORIES: ["Productivity", "Tools", "Communication", "Education", "Social"]
|
SINGLE_CATEGORIES: ["all", "email"]
|
||||||
MULTIPLE_CATEGORIES:
|
MULTIPLE_CATEGORIES:
|
||||||
games: ["Puzzle", "Card", "Casual", "Board", "Strategy", "Trivia", "Word", "Adventure", "Role Playing", "Simulation", "Board, Brain Games", "Racing"]
|
social: ["socialnetworks", "socialmediatools"]
|
||||||
social: ["Communication", "Social", "Dating"]
|
entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"]
|
||||||
productivity: ["Tools", "Productivity", "Finance", "Education", "News & Magazines", "Business", "Books & Reference"]
|
|
||||||
health: ["Health & Fitness", "Lifestyle", "Food & Drink", "Sports", "Medical", "Parenting"]
|
|
||||||
entertainment: ["Shopping", "Music & Audio", "Entertainment", "Travel & Local", "Photography", "Video Players & Editors", "Personalization", "House & Home", "Art & Design", "Auto & Vehicles", "Entertainment,Music & Video",
|
|
||||||
"Puzzle", "Card", "Casual", "Board", "Strategy", "Trivia", "Word", "Adventure", "Role Playing", "Simulation", "Board, Brain Games", "Racing" # Add all games.
|
|
||||||
]
|
|
||||||
maps_weather: ["Maps & Navigation", "Weather"]
|
|
||||||
CUSTOM_CATEGORIES:
|
CUSTOM_CATEGORIES:
|
||||||
SINGLE_APPS: []
|
social_media: ["com.google.android.youtube", "com.snapchat.android", "com.instagram.android", "com.zhiliaoapp.musically", "com.facebook.katana"]
|
||||||
EXCLUDED_CATEGORIES: ["System", "STRAW"]
|
dating: ["com.tinder", "com.relance.happycouple", "com.kiwi.joyride"]
|
||||||
# Note: A special option here is "is_system_app".
|
SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps
|
||||||
# This excludes applications that have is_system_app = TRUE, which is a separate column in the table.
|
EXCLUDED_CATEGORIES: []
|
||||||
# However, all of these applications have been assigned System category.
|
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"] # TODO list system apps?
|
||||||
# I will therefore filter by that category, which is a superset and is more complete. JL
|
|
||||||
EXCLUDED_APPS: []
|
|
||||||
FEATURES:
|
FEATURES:
|
||||||
APP_EVENTS: ["countevent", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
|
APP_EVENTS: ["countevent", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
|
||||||
APP_EPISODES: ["countepisode", "minduration", "maxduration", "meanduration", "sumduration"]
|
APP_EPISODES: ["countepisode", "minduration", "maxduration", "meanduration", "sumduration"]
|
||||||
|
@ -251,8 +241,7 @@ PHONE_ESM:
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
STRAW:
|
STRAW:
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
SCALES: ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support",
|
SCALES: ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support"]
|
||||||
"appraisal_stressfulness_period", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"]
|
|
||||||
FEATURES: [mean]
|
FEATURES: [mean]
|
||||||
SRC_SCRIPT: src/features/phone_esm/straw/main.py
|
SRC_SCRIPT: src/features/phone_esm/straw/main.py
|
||||||
|
|
||||||
|
@ -337,15 +326,6 @@ PHONE_SCREEN:
|
||||||
EPISODE_TYPES: ["unlock"]
|
EPISODE_TYPES: ["unlock"]
|
||||||
SRC_SCRIPT: src/features/phone_screen/rapids/main.py
|
SRC_SCRIPT: src/features/phone_screen/rapids/main.py
|
||||||
|
|
||||||
# Custom added sensor
|
|
||||||
PHONE_SPEECH:
|
|
||||||
CONTAINER: speech
|
|
||||||
PROVIDERS:
|
|
||||||
STRAW:
|
|
||||||
COMPUTE: True
|
|
||||||
FEATURES: ["meanspeech", "stdspeech", "nlargest", "nsmallest", "medianspeech"]
|
|
||||||
SRC_SCRIPT: src/features/phone_speech/straw/main.py
|
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/features/phone-wifi-connected/
|
# See https://www.rapids.science/latest/features/phone-wifi-connected/
|
||||||
PHONE_WIFI_CONNECTED:
|
PHONE_WIFI_CONNECTED:
|
||||||
CONTAINER: sensor_wifi
|
CONTAINER: sensor_wifi
|
||||||
|
@ -730,7 +710,6 @@ ALL_CLEANING_OVERALL:
|
||||||
MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5
|
MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5
|
||||||
CORR_THRESHOLD: 0.95
|
CORR_THRESHOLD: 0.95
|
||||||
STANDARDIZATION: True
|
STANDARDIZATION: True
|
||||||
TARGET_STANDARDIZATION: False
|
|
||||||
SRC_SCRIPT: src/features/all_cleaning_overall/straw/main.py
|
SRC_SCRIPT: src/features/all_cleaning_overall/straw/main.py
|
||||||
|
|
||||||
|
|
||||||
|
@ -752,7 +731,5 @@ PARAMS_FOR_ANALYSIS:
|
||||||
|
|
||||||
TARGET:
|
TARGET:
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
LABEL: appraisal_stressfulness_event_mean
|
LABEL: PANAS_negative_affect_mean
|
||||||
ALL_LABELS: [PANAS_positive_affect_mean, PANAS_negative_affect_mean, JCQ_job_demand_mean, JCQ_job_control_mean, JCQ_supervisor_support_mean, JCQ_coworker_support_mean, appraisal_stressfulness_period_mean]
|
ALL_LABELS: [PANAS_positive_affect_mean, PANAS_negative_affect_mean, "JCQ_job_demand_mean", "JCQ_job_control_mean", "JCQ_supervisor_support_mean", "JCQ_coworker_support_mean"]
|
||||||
# PANAS_positive_affect_mean, PANAS_negative_affect_mean, JCQ_job_demand_mean, JCQ_job_control_mean, JCQ_supervisor_support_mean,
|
|
||||||
# JCQ_coworker_support_mean, appraisal_stressfulness_period_mean, appraisal_stressfulness_event_mean, appraisal_threat_mean, appraisal_challenge_mean
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,45 +0,0 @@
|
||||||
genre,n
|
|
||||||
System,261
|
|
||||||
Tools,96
|
|
||||||
Productivity,71
|
|
||||||
Health & Fitness,60
|
|
||||||
Finance,54
|
|
||||||
Communication,39
|
|
||||||
Music & Audio,39
|
|
||||||
Shopping,38
|
|
||||||
Lifestyle,33
|
|
||||||
Education,28
|
|
||||||
News & Magazines,24
|
|
||||||
Maps & Navigation,23
|
|
||||||
Entertainment,21
|
|
||||||
Business,18
|
|
||||||
Travel & Local,18
|
|
||||||
Books & Reference,16
|
|
||||||
Social,16
|
|
||||||
Weather,16
|
|
||||||
Food & Drink,14
|
|
||||||
Sports,14
|
|
||||||
Other,13
|
|
||||||
Photography,13
|
|
||||||
Puzzle,13
|
|
||||||
Video Players & Editors,12
|
|
||||||
Card,9
|
|
||||||
Casual,9
|
|
||||||
Personalization,8
|
|
||||||
Medical,7
|
|
||||||
Board,5
|
|
||||||
Strategy,4
|
|
||||||
House & Home,3
|
|
||||||
Trivia,3
|
|
||||||
Word,3
|
|
||||||
Adventure,2
|
|
||||||
Art & Design,2
|
|
||||||
Auto & Vehicles,2
|
|
||||||
Dating,2
|
|
||||||
Role Playing,2
|
|
||||||
STRAW,2
|
|
||||||
Simulation,2
|
|
||||||
"Board,Brain Games",1
|
|
||||||
"Entertainment,Music & Video",1
|
|
||||||
Parenting,1
|
|
||||||
Racing,1
|
|
|
|
@ -1,39 +0,0 @@
|
||||||
"""
|
|
||||||
Please do not make any changes, as RAPIDS is running on tmux server ...
|
|
||||||
"""
|
|
||||||
# !
|
|
||||||
# !
|
|
||||||
"""
|
|
||||||
Please do not make any changes, as RAPIDS is running on tmux server ...
|
|
||||||
"""
|
|
||||||
# !
|
|
||||||
# !
|
|
||||||
"""
|
|
||||||
Please do not make any changes, as RAPIDS is running on tmux server ...
|
|
||||||
"""
|
|
||||||
# !
|
|
||||||
# !
|
|
||||||
"""
|
|
||||||
Please do not make any changes, as RAPIDS is running on tmux server ...
|
|
||||||
"""
|
|
||||||
# !
|
|
||||||
# !
|
|
||||||
"""
|
|
||||||
Please do not make any changes, as RAPIDS is running on tmux server ...
|
|
||||||
"""
|
|
||||||
# !
|
|
||||||
# !
|
|
||||||
"""
|
|
||||||
Please do not make any changes, as RAPIDS is running on tmux server ...
|
|
||||||
"""
|
|
||||||
# !
|
|
||||||
# !
|
|
||||||
"""
|
|
||||||
Please do not make any changes, as RAPIDS is running on tmux server ...
|
|
||||||
"""
|
|
||||||
# !
|
|
||||||
# !
|
|
||||||
"""
|
|
||||||
Please do not make any changes, as RAPIDS is running on tmux server ...
|
|
||||||
"""
|
|
||||||
# !
|
|
185
environment.yml
185
environment.yml
|
@ -1,30 +1,165 @@
|
||||||
name: rapids
|
name: rapids
|
||||||
channels:
|
channels:
|
||||||
- conda-forge
|
- conda-forge
|
||||||
|
- defaults
|
||||||
dependencies:
|
dependencies:
|
||||||
- auto-sklearn
|
- _libgcc_mutex=0.1
|
||||||
- hmmlearn
|
- _openmp_mutex=4.5
|
||||||
- imbalanced-learn
|
- _py-xgboost-mutex=2.0
|
||||||
- jsonschema
|
- appdirs=1.4.4
|
||||||
- lightgbm
|
- arrow=0.16.0
|
||||||
- matplotlib
|
- asn1crypto=1.4.0
|
||||||
- numpy
|
- astropy=4.2.1
|
||||||
- pandas
|
- attrs=20.3.0
|
||||||
- peakutils
|
- binaryornot=0.4.4
|
||||||
- pip
|
- blas=1.0
|
||||||
- plotly
|
- brotlipy=0.7.0
|
||||||
- python-dateutil
|
- bzip2=1.0.8
|
||||||
- pytz
|
- ca-certificates=2021.7.5
|
||||||
- pywavelets
|
- certifi=2021.5.30
|
||||||
- pyyaml
|
- cffi=1.14.4
|
||||||
- scikit-learn
|
- chardet=3.0.4
|
||||||
- scipy
|
- click=7.1.2
|
||||||
- seaborn
|
- colorama=0.4.4
|
||||||
- setuptools
|
- cookiecutter=1.6.0
|
||||||
- bioconda::snakemake
|
- cryptography=3.3.1
|
||||||
- bioconda::snakemake-minimal
|
- datrie=0.8.2
|
||||||
- tqdm
|
- docutils=0.16
|
||||||
- xgboost
|
- future=0.18.2
|
||||||
|
- gitdb=4.0.5
|
||||||
|
- gitdb2=4.0.2
|
||||||
|
- gitpython=3.1.11
|
||||||
|
- idna=2.10
|
||||||
|
- imbalanced-learn=0.6.2
|
||||||
|
- importlib-metadata=2.0.0
|
||||||
|
- importlib_metadata=2.0.0
|
||||||
|
- intel-openmp=2019.4
|
||||||
|
- jinja2=2.11.2
|
||||||
|
- jinja2-time=0.2.0
|
||||||
|
- joblib=1.0.0
|
||||||
|
- jsonschema=3.2.0
|
||||||
|
- ld_impl_linux-64=2.36.1
|
||||||
|
- libblas=3.8.0
|
||||||
|
- libcblas=3.8.0
|
||||||
|
- libcxx=10.0.0
|
||||||
|
- libcxxabi=10.0.0
|
||||||
|
- libedit=3.1.20191231
|
||||||
|
- libffi=3.3
|
||||||
|
- libgcc-ng=11.2.0
|
||||||
|
- libgfortran
|
||||||
|
- libgfortran
|
||||||
|
- libgfortran
|
||||||
|
- liblapack=3.8.0
|
||||||
|
- libopenblas=0.3.10
|
||||||
|
- libstdcxx-ng=11.2.0
|
||||||
|
- libxgboost=0.90
|
||||||
|
- libzlib=1.2.11
|
||||||
|
- lightgbm=3.1.1
|
||||||
|
- llvm-openmp=10.0.0
|
||||||
|
- markupsafe=1.1.1
|
||||||
|
- mkl
|
||||||
|
- mkl-service=2.3.0
|
||||||
|
- mkl_fft=1.2.0
|
||||||
|
- mkl_random=1.1.1
|
||||||
|
- more-itertools=8.6.0
|
||||||
|
- ncurses=6.2
|
||||||
|
- numpy=1.19.2
|
||||||
|
- numpy-base=1.19.2
|
||||||
|
- openblas=0.3.4
|
||||||
|
- openssl=1.1.1k
|
||||||
|
- pandas=1.1.5
|
||||||
|
- pbr=5.5.1
|
||||||
|
- pip=20.3.3
|
||||||
|
- plotly=4.14.1
|
||||||
|
- poyo=0.5.0
|
||||||
|
- psutil=5.7.2
|
||||||
|
- py-xgboost=0.90
|
||||||
|
- pycparser=2.20
|
||||||
|
- pyerfa=1.7.1.1
|
||||||
|
- pyopenssl=20.0.1
|
||||||
|
- pysocks=1.7.1
|
||||||
|
- python=3.7.9
|
||||||
|
- python-dateutil=2.8.1
|
||||||
|
- python_abi=3.7
|
||||||
|
- pytz=2020.4
|
||||||
|
- pyyaml=5.3.1
|
||||||
|
- readline=8.0
|
||||||
|
- requests=2.25.0
|
||||||
|
- retrying=1.3.3
|
||||||
|
- setuptools=51.0.0
|
||||||
|
- six=1.15.0
|
||||||
|
- smmap=3.0.4
|
||||||
|
- smmap2=3.0.1
|
||||||
|
- sqlite=3.33.0
|
||||||
|
- threadpoolctl=2.1.0
|
||||||
|
- tk=8.6.10
|
||||||
|
- tqdm=4.62.0
|
||||||
|
- urllib3=1.25.11
|
||||||
|
- wheel=0.36.2
|
||||||
|
- whichcraft=0.6.1
|
||||||
|
- wrapt=1.12.1
|
||||||
|
- xgboost=0.90
|
||||||
|
- xz=5.2.5
|
||||||
|
- yaml=0.2.5
|
||||||
|
- zipp=3.4.0
|
||||||
|
- zlib=1.2.11
|
||||||
- pip:
|
- pip:
|
||||||
- biosppy
|
- amply==0.1.4
|
||||||
- cr_features>=0.2
|
- auto-sklearn==0.14.7
|
||||||
|
- bidict==0.22.0
|
||||||
|
- biosppy==0.8.0
|
||||||
|
- build==0.8.0
|
||||||
|
- cached-property==1.5.2
|
||||||
|
- cloudpickle==2.2.0
|
||||||
|
- configargparse==0.15.1
|
||||||
|
- configspace==0.4.21
|
||||||
|
- cr-features==0.2.1
|
||||||
|
- cycler==0.11.0
|
||||||
|
- cython==0.29.32
|
||||||
|
- dask==2022.2.0
|
||||||
|
- decorator==4.4.2
|
||||||
|
- distributed==2022.2.0
|
||||||
|
- distro==1.7.0
|
||||||
|
- emcee==3.1.2
|
||||||
|
- fonttools==4.33.2
|
||||||
|
- fsspec==2022.8.2
|
||||||
|
- h5py==3.6.0
|
||||||
|
- heapdict==1.0.1
|
||||||
|
- hmmlearn==0.2.7
|
||||||
|
- ipython-genutils==0.2.0
|
||||||
|
- jupyter-core==4.6.3
|
||||||
|
- kiwisolver==1.4.2
|
||||||
|
- liac-arff==2.5.0
|
||||||
|
- locket==1.0.0
|
||||||
|
- matplotlib==3.5.1
|
||||||
|
- msgpack==1.0.4
|
||||||
|
- nbformat==5.0.7
|
||||||
|
- opencv-python==4.5.5.64
|
||||||
|
- packaging==21.3
|
||||||
|
- partd==1.3.0
|
||||||
|
- peakutils==1.3.3
|
||||||
|
- pep517==0.13.0
|
||||||
|
- pillow==9.1.0
|
||||||
|
- pulp==2.4
|
||||||
|
- pynisher==0.6.4
|
||||||
|
- pyparsing==2.4.7
|
||||||
|
- pyrfr==0.8.3
|
||||||
|
- pyrsistent==0.15.5
|
||||||
|
- pywavelets==1.3.0
|
||||||
|
- ratelimiter==1.2.0.post0
|
||||||
|
- scikit-learn==0.24.2
|
||||||
|
- scipy==1.7.3
|
||||||
|
- seaborn==0.11.2
|
||||||
|
- shortuuid==1.0.8
|
||||||
|
- smac==1.2
|
||||||
|
- snakemake==5.30.2
|
||||||
|
- sortedcontainers==2.4.0
|
||||||
|
- tblib==1.7.0
|
||||||
|
- tomli==2.0.1
|
||||||
|
- toolz==0.12.0
|
||||||
|
- toposort==1.5
|
||||||
|
- tornado==6.2
|
||||||
|
- traitlets==4.3.3
|
||||||
|
- typing-extensions==4.2.0
|
||||||
|
- zict==2.2.0
|
||||||
|
prefix: /opt/conda/envs/rapids
|
||||||
|
|
334
renv.lock
334
renv.lock
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"R": {
|
"R": {
|
||||||
"Version": "4.2.3",
|
"Version": "4.1.2",
|
||||||
"Repositories": [
|
"Repositories": [
|
||||||
{
|
{
|
||||||
"Name": "CRAN",
|
"Name": "CRAN",
|
||||||
|
@ -46,10 +46,10 @@
|
||||||
},
|
},
|
||||||
"Hmisc": {
|
"Hmisc": {
|
||||||
"Package": "Hmisc",
|
"Package": "Hmisc",
|
||||||
"Version": "5.0-1",
|
"Version": "4.4-2",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "bf9fe82c010a468fb32f913ff56d65e1"
|
"Hash": "66458e906b2112a8b1639964efd77d7c"
|
||||||
},
|
},
|
||||||
"KernSmooth": {
|
"KernSmooth": {
|
||||||
"Package": "KernSmooth",
|
"Package": "KernSmooth",
|
||||||
|
@ -104,7 +104,7 @@
|
||||||
"Package": "RPostgres",
|
"Package": "RPostgres",
|
||||||
"Version": "1.4.4",
|
"Version": "1.4.4",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "RSPM",
|
"Repository": "CRAN",
|
||||||
"Hash": "c593ecb8dbca9faf3906431be610ca28"
|
"Hash": "c593ecb8dbca9faf3906431be610ca28"
|
||||||
},
|
},
|
||||||
"Rcpp": {
|
"Rcpp": {
|
||||||
|
@ -181,7 +181,7 @@
|
||||||
"Package": "base64enc",
|
"Package": "base64enc",
|
||||||
"Version": "0.1-3",
|
"Version": "0.1-3",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "RSPM",
|
"Repository": "CRAN",
|
||||||
"Hash": "543776ae6848fde2f48ff3816d0628bc"
|
"Hash": "543776ae6848fde2f48ff3816d0628bc"
|
||||||
},
|
},
|
||||||
"bit": {
|
"bit": {
|
||||||
|
@ -221,24 +221,17 @@
|
||||||
},
|
},
|
||||||
"broom": {
|
"broom": {
|
||||||
"Package": "broom",
|
"Package": "broom",
|
||||||
"Version": "1.0.4",
|
"Version": "0.7.3",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "f62b2504021369a2449c54bbda362d30"
|
"Hash": "5581a5ddc8fe2ac5e0d092ec2de4c4ae"
|
||||||
},
|
|
||||||
"cachem": {
|
|
||||||
"Package": "cachem",
|
|
||||||
"Version": "1.0.7",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "cda74447c42f529de601fe4d4050daef"
|
|
||||||
},
|
},
|
||||||
"callr": {
|
"callr": {
|
||||||
"Package": "callr",
|
"Package": "callr",
|
||||||
"Version": "3.7.3",
|
"Version": "3.5.1",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "9b2191ede20fa29828139b9900922e51"
|
"Hash": "b7d7f1e926dfcd57c74ce93f5c048e80"
|
||||||
},
|
},
|
||||||
"caret": {
|
"caret": {
|
||||||
"Package": "caret",
|
"Package": "caret",
|
||||||
|
@ -270,10 +263,10 @@
|
||||||
},
|
},
|
||||||
"cli": {
|
"cli": {
|
||||||
"Package": "cli",
|
"Package": "cli",
|
||||||
"Version": "3.6.1",
|
"Version": "2.2.0",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "89e6d8219950eac806ae0c489052048a"
|
"Hash": "3ef298932294b775fa0a3eeaa3a645b0"
|
||||||
},
|
},
|
||||||
"clipr": {
|
"clipr": {
|
||||||
"Package": "clipr",
|
"Package": "clipr",
|
||||||
|
@ -293,7 +286,7 @@
|
||||||
"Package": "codetools",
|
"Package": "codetools",
|
||||||
"Version": "0.2-18",
|
"Version": "0.2-18",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "RSPM",
|
"Repository": "CRAN",
|
||||||
"Hash": "019388fc48e48b3da0d3a76ff94608a8"
|
"Hash": "019388fc48e48b3da0d3a76ff94608a8"
|
||||||
},
|
},
|
||||||
"colorspace": {
|
"colorspace": {
|
||||||
|
@ -310,13 +303,6 @@
|
||||||
"Repository": "RSPM",
|
"Repository": "RSPM",
|
||||||
"Hash": "0f22be39ec1d141fd03683c06f3a6e67"
|
"Hash": "0f22be39ec1d141fd03683c06f3a6e67"
|
||||||
},
|
},
|
||||||
"conflicted": {
|
|
||||||
"Package": "conflicted",
|
|
||||||
"Version": "1.2.0",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "bb097fccb22d156624fd07cd2894ddb6"
|
|
||||||
},
|
|
||||||
"corpcor": {
|
"corpcor": {
|
||||||
"Package": "corpcor",
|
"Package": "corpcor",
|
||||||
"Version": "1.6.9",
|
"Version": "1.6.9",
|
||||||
|
@ -333,10 +319,10 @@
|
||||||
},
|
},
|
||||||
"cpp11": {
|
"cpp11": {
|
||||||
"Package": "cpp11",
|
"Package": "cpp11",
|
||||||
"Version": "0.4.3",
|
"Version": "0.2.4",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "ed588261931ee3be2c700d22e94a29ab"
|
"Hash": "ba66e5a750d39067d888aa7af797fed2"
|
||||||
},
|
},
|
||||||
"crayon": {
|
"crayon": {
|
||||||
"Package": "crayon",
|
"Package": "crayon",
|
||||||
|
@ -368,10 +354,10 @@
|
||||||
},
|
},
|
||||||
"dbplyr": {
|
"dbplyr": {
|
||||||
"Package": "dbplyr",
|
"Package": "dbplyr",
|
||||||
"Version": "2.3.2",
|
"Version": "2.1.1",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "CRAN",
|
||||||
"Hash": "d24305b92db333726aed162a2c23a147"
|
"Hash": "1f37fa4ab2f5f7eded42f78b9a887182"
|
||||||
},
|
},
|
||||||
"desc": {
|
"desc": {
|
||||||
"Package": "desc",
|
"Package": "desc",
|
||||||
|
@ -396,17 +382,17 @@
|
||||||
},
|
},
|
||||||
"dplyr": {
|
"dplyr": {
|
||||||
"Package": "dplyr",
|
"Package": "dplyr",
|
||||||
"Version": "1.1.1",
|
"Version": "1.0.5",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "eb5742d256a0d9306d85ea68756d8187"
|
"Hash": "d0d76c11ec807eb3f000eba4e3eb0f68"
|
||||||
},
|
},
|
||||||
"dtplyr": {
|
"dtplyr": {
|
||||||
"Package": "dtplyr",
|
"Package": "dtplyr",
|
||||||
"Version": "1.3.1",
|
"Version": "1.1.0",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "54ed3ea01b11e81a86544faaecfef8e2"
|
"Hash": "1e14e4c5b2814de5225312394bc316da"
|
||||||
},
|
},
|
||||||
"e1071": {
|
"e1071": {
|
||||||
"Package": "e1071",
|
"Package": "e1071",
|
||||||
|
@ -433,7 +419,7 @@
|
||||||
"Package": "evaluate",
|
"Package": "evaluate",
|
||||||
"Version": "0.14",
|
"Version": "0.14",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "RSPM",
|
"Repository": "CRAN",
|
||||||
"Hash": "ec8ca05cffcc70569eaaad8469d2a3a7"
|
"Hash": "ec8ca05cffcc70569eaaad8469d2a3a7"
|
||||||
},
|
},
|
||||||
"fansi": {
|
"fansi": {
|
||||||
|
@ -466,10 +452,10 @@
|
||||||
},
|
},
|
||||||
"forcats": {
|
"forcats": {
|
||||||
"Package": "forcats",
|
"Package": "forcats",
|
||||||
"Version": "1.0.0",
|
"Version": "0.5.0",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "1a0a9a3d5083d0d573c4214576f1e690"
|
"Hash": "1cb4279e697650f0bd78cd3601ee7576"
|
||||||
},
|
},
|
||||||
"foreach": {
|
"foreach": {
|
||||||
"Package": "foreach",
|
"Package": "foreach",
|
||||||
|
@ -506,13 +492,6 @@
|
||||||
"Repository": "RSPM",
|
"Repository": "RSPM",
|
||||||
"Hash": "f568ce73d3d59582b0f7babd0eb33d07"
|
"Hash": "f568ce73d3d59582b0f7babd0eb33d07"
|
||||||
},
|
},
|
||||||
"gargle": {
|
|
||||||
"Package": "gargle",
|
|
||||||
"Version": "1.3.0",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "bb3208dcdfeb2e68bf33c87601b3cbe3"
|
|
||||||
},
|
|
||||||
"gclus": {
|
"gclus": {
|
||||||
"Package": "gclus",
|
"Package": "gclus",
|
||||||
"Version": "1.3.2",
|
"Version": "1.3.2",
|
||||||
|
@ -536,10 +515,10 @@
|
||||||
},
|
},
|
||||||
"ggplot2": {
|
"ggplot2": {
|
||||||
"Package": "ggplot2",
|
"Package": "ggplot2",
|
||||||
"Version": "3.4.1",
|
"Version": "3.3.2",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "d494daf77c4aa7f084dbbe6ca5dcaca7"
|
"Hash": "4ded8b439797f7b1693bd3d238d0106b"
|
||||||
},
|
},
|
||||||
"ggraph": {
|
"ggraph": {
|
||||||
"Package": "ggraph",
|
"Package": "ggraph",
|
||||||
|
@ -578,30 +557,16 @@
|
||||||
},
|
},
|
||||||
"glue": {
|
"glue": {
|
||||||
"Package": "glue",
|
"Package": "glue",
|
||||||
"Version": "1.6.2",
|
"Version": "1.4.2",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "4f2596dfb05dac67b9dc558e5c6fba2e"
|
"Hash": "6efd734b14c6471cfe443345f3e35e29"
|
||||||
},
|
|
||||||
"googledrive": {
|
|
||||||
"Package": "googledrive",
|
|
||||||
"Version": "2.1.0",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "e88ba642951bc8d1898ba0d12581850b"
|
|
||||||
},
|
|
||||||
"googlesheets4": {
|
|
||||||
"Package": "googlesheets4",
|
|
||||||
"Version": "1.1.0",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "fd7b97bd862a14297b0bb7ed28a3dada"
|
|
||||||
},
|
},
|
||||||
"gower": {
|
"gower": {
|
||||||
"Package": "gower",
|
"Package": "gower",
|
||||||
"Version": "0.2.2",
|
"Version": "0.2.2",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "RSPM",
|
"Repository": "CRAN",
|
||||||
"Hash": "be6a2b3529928bd803d1c437d1d43152"
|
"Hash": "be6a2b3529928bd803d1c437d1d43152"
|
||||||
},
|
},
|
||||||
"graphlayouts": {
|
"graphlayouts": {
|
||||||
|
@ -634,10 +599,10 @@
|
||||||
},
|
},
|
||||||
"haven": {
|
"haven": {
|
||||||
"Package": "haven",
|
"Package": "haven",
|
||||||
"Version": "2.5.2",
|
"Version": "2.3.1",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "8b331e659e67d757db0fcc28e689c501"
|
"Hash": "221d0ad75dfa03ebf17b1a4cc5c31dfc"
|
||||||
},
|
},
|
||||||
"highr": {
|
"highr": {
|
||||||
"Package": "highr",
|
"Package": "highr",
|
||||||
|
@ -648,10 +613,10 @@
|
||||||
},
|
},
|
||||||
"hms": {
|
"hms": {
|
||||||
"Package": "hms",
|
"Package": "hms",
|
||||||
"Version": "1.1.3",
|
"Version": "1.1.1",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "CRAN",
|
||||||
"Hash": "b59377caa7ed00fa41808342002138f9"
|
"Hash": "5b8a2dd0fdbe2ab4f6081e6c7be6dfca"
|
||||||
},
|
},
|
||||||
"htmlTable": {
|
"htmlTable": {
|
||||||
"Package": "htmlTable",
|
"Package": "htmlTable",
|
||||||
|
@ -683,10 +648,10 @@
|
||||||
},
|
},
|
||||||
"httr": {
|
"httr": {
|
||||||
"Package": "httr",
|
"Package": "httr",
|
||||||
"Version": "1.4.5",
|
"Version": "1.4.2",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "f6844033201269bec3ca0097bc6c97b3"
|
"Hash": "a525aba14184fec243f9eaec62fbed43"
|
||||||
},
|
},
|
||||||
"huge": {
|
"huge": {
|
||||||
"Package": "huge",
|
"Package": "huge",
|
||||||
|
@ -695,13 +660,6 @@
|
||||||
"Repository": "RSPM",
|
"Repository": "RSPM",
|
||||||
"Hash": "a4cde4dd1d2551edb99a3273a4ad34ea"
|
"Hash": "a4cde4dd1d2551edb99a3273a4ad34ea"
|
||||||
},
|
},
|
||||||
"ids": {
|
|
||||||
"Package": "ids",
|
|
||||||
"Version": "1.0.1",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "99df65cfef20e525ed38c3d2577f7190"
|
|
||||||
},
|
|
||||||
"igraph": {
|
"igraph": {
|
||||||
"Package": "igraph",
|
"Package": "igraph",
|
||||||
"Version": "1.2.6",
|
"Version": "1.2.6",
|
||||||
|
@ -746,10 +704,10 @@
|
||||||
},
|
},
|
||||||
"jsonlite": {
|
"jsonlite": {
|
||||||
"Package": "jsonlite",
|
"Package": "jsonlite",
|
||||||
"Version": "1.8.4",
|
"Version": "1.7.2",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "a4269a09a9b865579b2635c77e572374"
|
"Hash": "98138e0994d41508c7a6b84a0600cfcb"
|
||||||
},
|
},
|
||||||
"knitr": {
|
"knitr": {
|
||||||
"Package": "knitr",
|
"Package": "knitr",
|
||||||
|
@ -802,10 +760,10 @@
|
||||||
},
|
},
|
||||||
"lifecycle": {
|
"lifecycle": {
|
||||||
"Package": "lifecycle",
|
"Package": "lifecycle",
|
||||||
"Version": "1.0.3",
|
"Version": "1.0.0",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "001cecbeac1cff9301bdc3775ee46a86"
|
"Hash": "3471fb65971f1a7b2d4ae7848cf2db8d"
|
||||||
},
|
},
|
||||||
"listenv": {
|
"listenv": {
|
||||||
"Package": "listenv",
|
"Package": "listenv",
|
||||||
|
@ -816,17 +774,17 @@
|
||||||
},
|
},
|
||||||
"lubridate": {
|
"lubridate": {
|
||||||
"Package": "lubridate",
|
"Package": "lubridate",
|
||||||
"Version": "1.9.2",
|
"Version": "1.7.9.2",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "e25f18436e3efd42c7c590a1c4c15390"
|
"Hash": "5b5b02f621d39a499def7923a5aee746"
|
||||||
},
|
},
|
||||||
"magrittr": {
|
"magrittr": {
|
||||||
"Package": "magrittr",
|
"Package": "magrittr",
|
||||||
"Version": "2.0.3",
|
"Version": "2.0.1",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "7ce2733a9826b3aeb1775d56fd305472"
|
"Hash": "41287f1ac7d28a92f0a286ed507928d3"
|
||||||
},
|
},
|
||||||
"markdown": {
|
"markdown": {
|
||||||
"Package": "markdown",
|
"Package": "markdown",
|
||||||
|
@ -842,13 +800,6 @@
|
||||||
"Repository": "RSPM",
|
"Repository": "RSPM",
|
||||||
"Hash": "67101e7448dfd9add4ac418623060262"
|
"Hash": "67101e7448dfd9add4ac418623060262"
|
||||||
},
|
},
|
||||||
"memoise": {
|
|
||||||
"Package": "memoise",
|
|
||||||
"Version": "2.0.1",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "e2817ccf4a065c5d9d7f2cfbe7c1d78c"
|
|
||||||
},
|
|
||||||
"mgcv": {
|
"mgcv": {
|
||||||
"Package": "mgcv",
|
"Package": "mgcv",
|
||||||
"Version": "1.8-33",
|
"Version": "1.8-33",
|
||||||
|
@ -879,10 +830,10 @@
|
||||||
},
|
},
|
||||||
"modelr": {
|
"modelr": {
|
||||||
"Package": "modelr",
|
"Package": "modelr",
|
||||||
"Version": "0.1.11",
|
"Version": "0.1.8",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "4f50122dc256b1b6996a4703fecea821"
|
"Hash": "9fd59716311ee82cba83dc2826fc5577"
|
||||||
},
|
},
|
||||||
"munsell": {
|
"munsell": {
|
||||||
"Package": "munsell",
|
"Package": "munsell",
|
||||||
|
@ -937,7 +888,7 @@
|
||||||
"Package": "parallelly",
|
"Package": "parallelly",
|
||||||
"Version": "1.29.0",
|
"Version": "1.29.0",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "RSPM",
|
"Repository": "CRAN",
|
||||||
"Hash": "b5f399c9ce96977e22ef32c20b6cfe87"
|
"Hash": "b5f399c9ce96977e22ef32c20b6cfe87"
|
||||||
},
|
},
|
||||||
"pbapply": {
|
"pbapply": {
|
||||||
|
@ -956,10 +907,10 @@
|
||||||
},
|
},
|
||||||
"pillar": {
|
"pillar": {
|
||||||
"Package": "pillar",
|
"Package": "pillar",
|
||||||
"Version": "1.9.0",
|
"Version": "1.4.7",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "15da5a8412f317beeee6175fbc76f4bb"
|
"Hash": "3b3dd89b2ee115a8b54e93a34cd546b4"
|
||||||
},
|
},
|
||||||
"pkgbuild": {
|
"pkgbuild": {
|
||||||
"Package": "pkgbuild",
|
"Package": "pkgbuild",
|
||||||
|
@ -1026,10 +977,10 @@
|
||||||
},
|
},
|
||||||
"processx": {
|
"processx": {
|
||||||
"Package": "processx",
|
"Package": "processx",
|
||||||
"Version": "3.8.0",
|
"Version": "3.4.5",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "a33ee2d9bf07564efb888ad98410da84"
|
"Hash": "22aab6098cb14edd0a5973a8438b569b"
|
||||||
},
|
},
|
||||||
"prodlim": {
|
"prodlim": {
|
||||||
"Package": "prodlim",
|
"Package": "prodlim",
|
||||||
|
@ -1049,7 +1000,7 @@
|
||||||
"Package": "progressr",
|
"Package": "progressr",
|
||||||
"Version": "0.9.0",
|
"Version": "0.9.0",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "RSPM",
|
"Repository": "CRAN",
|
||||||
"Hash": "ca0d80ecc29903f7579edbabd91f4199"
|
"Hash": "ca0d80ecc29903f7579edbabd91f4199"
|
||||||
},
|
},
|
||||||
"promises": {
|
"promises": {
|
||||||
|
@ -1082,10 +1033,10 @@
|
||||||
},
|
},
|
||||||
"purrr": {
|
"purrr": {
|
||||||
"Package": "purrr",
|
"Package": "purrr",
|
||||||
"Version": "1.0.1",
|
"Version": "0.3.4",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "d71c815267c640f17ddbf7f16144b4bb"
|
"Hash": "97def703420c8ab10d8f0e6c72101e02"
|
||||||
},
|
},
|
||||||
"qap": {
|
"qap": {
|
||||||
"Package": "qap",
|
"Package": "qap",
|
||||||
|
@ -1101,13 +1052,6 @@
|
||||||
"Repository": "RSPM",
|
"Repository": "RSPM",
|
||||||
"Hash": "d35964686307333a7121eb41c7dcd4e0"
|
"Hash": "d35964686307333a7121eb41c7dcd4e0"
|
||||||
},
|
},
|
||||||
"ragg": {
|
|
||||||
"Package": "ragg",
|
|
||||||
"Version": "1.2.5",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "690bc058ea2b1b8a407d3cfe3dce3ef9"
|
|
||||||
},
|
|
||||||
"rappdirs": {
|
"rappdirs": {
|
||||||
"Package": "rappdirs",
|
"Package": "rappdirs",
|
||||||
"Version": "0.3.3",
|
"Version": "0.3.3",
|
||||||
|
@ -1117,17 +1061,17 @@
|
||||||
},
|
},
|
||||||
"readr": {
|
"readr": {
|
||||||
"Package": "readr",
|
"Package": "readr",
|
||||||
"Version": "2.1.4",
|
"Version": "1.4.0",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "b5047343b3825f37ad9d3b5d89aa1078"
|
"Hash": "2639976851f71f330264a9c9c3d43a61"
|
||||||
},
|
},
|
||||||
"readxl": {
|
"readxl": {
|
||||||
"Package": "readxl",
|
"Package": "readxl",
|
||||||
"Version": "1.4.2",
|
"Version": "1.3.1",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "2e6020b1399d95f947ed867045e9ca17"
|
"Hash": "63537c483c2dbec8d9e3183b3735254a"
|
||||||
},
|
},
|
||||||
"recipes": {
|
"recipes": {
|
||||||
"Package": "recipes",
|
"Package": "recipes",
|
||||||
|
@ -1166,10 +1110,10 @@
|
||||||
},
|
},
|
||||||
"reprex": {
|
"reprex": {
|
||||||
"Package": "reprex",
|
"Package": "reprex",
|
||||||
"Version": "2.0.2",
|
"Version": "0.3.0",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "d66fe009d4c20b7ab1927eb405db9ee2"
|
"Hash": "b06bfb3504cc8a4579fd5567646f745b"
|
||||||
},
|
},
|
||||||
"reshape2": {
|
"reshape2": {
|
||||||
"Package": "reshape2",
|
"Package": "reshape2",
|
||||||
|
@ -1194,10 +1138,10 @@
|
||||||
},
|
},
|
||||||
"rlang": {
|
"rlang": {
|
||||||
"Package": "rlang",
|
"Package": "rlang",
|
||||||
"Version": "1.1.0",
|
"Version": "0.4.10",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "dc079ccd156cde8647360f473c1fa718"
|
"Hash": "599df23c40a4fce9c7b4764f28c37857"
|
||||||
},
|
},
|
||||||
"rmarkdown": {
|
"rmarkdown": {
|
||||||
"Package": "rmarkdown",
|
"Package": "rmarkdown",
|
||||||
|
@ -1229,24 +1173,24 @@
|
||||||
},
|
},
|
||||||
"rstudioapi": {
|
"rstudioapi": {
|
||||||
"Package": "rstudioapi",
|
"Package": "rstudioapi",
|
||||||
"Version": "0.14",
|
"Version": "0.13",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "690bd2acc42a9166ce34845884459320"
|
"Hash": "06c85365a03fdaf699966cc1d3cf53ea"
|
||||||
},
|
},
|
||||||
"rvest": {
|
"rvest": {
|
||||||
"Package": "rvest",
|
"Package": "rvest",
|
||||||
"Version": "1.0.3",
|
"Version": "0.3.6",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "a4a5ac819a467808c60e36e92ddf195e"
|
"Hash": "a9795ccb2d608330e841998b67156764"
|
||||||
},
|
},
|
||||||
"scales": {
|
"scales": {
|
||||||
"Package": "scales",
|
"Package": "scales",
|
||||||
"Version": "1.2.1",
|
"Version": "1.1.1",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "906cb23d2f1c5680b8ce439b44c6fa63"
|
"Hash": "6f76f71042411426ec8df6c54f34e6dd"
|
||||||
},
|
},
|
||||||
"selectr": {
|
"selectr": {
|
||||||
"Package": "selectr",
|
"Package": "selectr",
|
||||||
|
@ -1292,17 +1236,17 @@
|
||||||
},
|
},
|
||||||
"stringi": {
|
"stringi": {
|
||||||
"Package": "stringi",
|
"Package": "stringi",
|
||||||
"Version": "1.7.12",
|
"Version": "1.5.3",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "ca8bd84263c77310739d2cf64d84d7c9"
|
"Hash": "a063ebea753c92910a4cca7b18bc1f05"
|
||||||
},
|
},
|
||||||
"stringr": {
|
"stringr": {
|
||||||
"Package": "stringr",
|
"Package": "stringr",
|
||||||
"Version": "1.5.0",
|
"Version": "1.4.0",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "CRAN",
|
||||||
"Hash": "671a4d384ae9d32fc47a14e98bfa3dc8"
|
"Hash": "0759e6b6c0957edb1311028a49a35e76"
|
||||||
},
|
},
|
||||||
"survival": {
|
"survival": {
|
||||||
"Package": "survival",
|
"Package": "survival",
|
||||||
|
@ -1318,13 +1262,6 @@
|
||||||
"Repository": "RSPM",
|
"Repository": "RSPM",
|
||||||
"Hash": "b227d13e29222b4574486cfcbde077fa"
|
"Hash": "b227d13e29222b4574486cfcbde077fa"
|
||||||
},
|
},
|
||||||
"systemfonts": {
|
|
||||||
"Package": "systemfonts",
|
|
||||||
"Version": "1.0.4",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "90b28393209827327de889f49935140a"
|
|
||||||
},
|
|
||||||
"testthat": {
|
"testthat": {
|
||||||
"Package": "testthat",
|
"Package": "testthat",
|
||||||
"Version": "3.0.1",
|
"Version": "3.0.1",
|
||||||
|
@ -1332,19 +1269,12 @@
|
||||||
"Repository": "RSPM",
|
"Repository": "RSPM",
|
||||||
"Hash": "17826764cb92d8b5aae6619896e5a161"
|
"Hash": "17826764cb92d8b5aae6619896e5a161"
|
||||||
},
|
},
|
||||||
"textshaping": {
|
|
||||||
"Package": "textshaping",
|
|
||||||
"Version": "0.3.6",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "1ab6223d3670fac7143202cb6a2d43d5"
|
|
||||||
},
|
|
||||||
"tibble": {
|
"tibble": {
|
||||||
"Package": "tibble",
|
"Package": "tibble",
|
||||||
"Version": "3.2.1",
|
"Version": "3.0.4",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "a84e2cc86d07289b3b6f5069df7a004c"
|
"Hash": "71dffd8544691c520dd8e41ed2d7e070"
|
||||||
},
|
},
|
||||||
"tidygraph": {
|
"tidygraph": {
|
||||||
"Package": "tidygraph",
|
"Package": "tidygraph",
|
||||||
|
@ -1355,24 +1285,24 @@
|
||||||
},
|
},
|
||||||
"tidyr": {
|
"tidyr": {
|
||||||
"Package": "tidyr",
|
"Package": "tidyr",
|
||||||
"Version": "1.3.0",
|
"Version": "1.1.2",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "e47debdc7ce599b070c8e78e8ac0cfcf"
|
"Hash": "c40b2d5824d829190f4b825f4496dfae"
|
||||||
},
|
},
|
||||||
"tidyselect": {
|
"tidyselect": {
|
||||||
"Package": "tidyselect",
|
"Package": "tidyselect",
|
||||||
"Version": "1.2.0",
|
"Version": "1.1.0",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "79540e5fcd9e0435af547d885f184fd5"
|
"Hash": "6ea435c354e8448819627cf686f66e0a"
|
||||||
},
|
},
|
||||||
"tidyverse": {
|
"tidyverse": {
|
||||||
"Package": "tidyverse",
|
"Package": "tidyverse",
|
||||||
"Version": "2.0.0",
|
"Version": "1.3.0",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "c328568cd14ea89a83bd4ca7f54ae07e"
|
"Hash": "bd51be662f359fa99021f3d51e911490"
|
||||||
},
|
},
|
||||||
"timeDate": {
|
"timeDate": {
|
||||||
"Package": "timeDate",
|
"Package": "timeDate",
|
||||||
|
@ -1381,13 +1311,6 @@
|
||||||
"Repository": "RSPM",
|
"Repository": "RSPM",
|
||||||
"Hash": "fde4fc571f5f61978652c229d4713845"
|
"Hash": "fde4fc571f5f61978652c229d4713845"
|
||||||
},
|
},
|
||||||
"timechange": {
|
|
||||||
"Package": "timechange",
|
|
||||||
"Version": "0.2.0",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "8548b44f79a35ba1791308b61e6012d7"
|
|
||||||
},
|
|
||||||
"tinytex": {
|
"tinytex": {
|
||||||
"Package": "tinytex",
|
"Package": "tinytex",
|
||||||
"Version": "0.28",
|
"Version": "0.28",
|
||||||
|
@ -1409,13 +1332,6 @@
|
||||||
"Repository": "RSPM",
|
"Repository": "RSPM",
|
||||||
"Hash": "fc77eb5297507cccfa3349a606061030"
|
"Hash": "fc77eb5297507cccfa3349a606061030"
|
||||||
},
|
},
|
||||||
"tzdb": {
|
|
||||||
"Package": "tzdb",
|
|
||||||
"Version": "0.3.0",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "b2e1cbce7c903eaf23ec05c58e59fb5e"
|
|
||||||
},
|
|
||||||
"utf8": {
|
"utf8": {
|
||||||
"Package": "utf8",
|
"Package": "utf8",
|
||||||
"Version": "1.1.4",
|
"Version": "1.1.4",
|
||||||
|
@ -1423,19 +1339,12 @@
|
||||||
"Repository": "RSPM",
|
"Repository": "RSPM",
|
||||||
"Hash": "4a5081acfb7b81a572e4384a7aaf2af1"
|
"Hash": "4a5081acfb7b81a572e4384a7aaf2af1"
|
||||||
},
|
},
|
||||||
"uuid": {
|
|
||||||
"Package": "uuid",
|
|
||||||
"Version": "1.1-0",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "f1cb46c157d080b729159d407be83496"
|
|
||||||
},
|
|
||||||
"vctrs": {
|
"vctrs": {
|
||||||
"Package": "vctrs",
|
"Package": "vctrs",
|
||||||
"Version": "0.6.1",
|
"Version": "0.3.8",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "CRAN",
|
||||||
"Hash": "06eceb3a5d716fd0654cc23ca3d71a99"
|
"Hash": "ecf749a1b39ea72bd9b51b76292261f1"
|
||||||
},
|
},
|
||||||
"viridis": {
|
"viridis": {
|
||||||
"Package": "viridis",
|
"Package": "viridis",
|
||||||
|
@ -1451,13 +1360,6 @@
|
||||||
"Repository": "RSPM",
|
"Repository": "RSPM",
|
||||||
"Hash": "ce4f6271baa94776db692f1cb2055bee"
|
"Hash": "ce4f6271baa94776db692f1cb2055bee"
|
||||||
},
|
},
|
||||||
"vroom": {
|
|
||||||
"Package": "vroom",
|
|
||||||
"Version": "1.6.1",
|
|
||||||
"Source": "Repository",
|
|
||||||
"Repository": "CRAN",
|
|
||||||
"Hash": "7015a74373b83ffaef64023f4a0f5033"
|
|
||||||
},
|
|
||||||
"waldo": {
|
"waldo": {
|
||||||
"Package": "waldo",
|
"Package": "waldo",
|
||||||
"Version": "0.2.3",
|
"Version": "0.2.3",
|
||||||
|
@ -1474,10 +1376,10 @@
|
||||||
},
|
},
|
||||||
"withr": {
|
"withr": {
|
||||||
"Package": "withr",
|
"Package": "withr",
|
||||||
"Version": "2.5.0",
|
"Version": "2.3.0",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "c0e49a9760983e81e55cdd9be92e7182"
|
"Hash": "7307d79f58d1885b38c4f4f1a8cb19dd"
|
||||||
},
|
},
|
||||||
"xfun": {
|
"xfun": {
|
||||||
"Package": "xfun",
|
"Package": "xfun",
|
||||||
|
@ -1488,10 +1390,10 @@
|
||||||
},
|
},
|
||||||
"xml2": {
|
"xml2": {
|
||||||
"Package": "xml2",
|
"Package": "xml2",
|
||||||
"Version": "1.3.3",
|
"Version": "1.3.2",
|
||||||
"Source": "Repository",
|
"Source": "Repository",
|
||||||
"Repository": "CRAN",
|
"Repository": "RSPM",
|
||||||
"Hash": "40682ed6a969ea5abfd351eb67833adc"
|
"Hash": "d4d71a75dd3ea9eb5fa28cc21f9585e2"
|
||||||
},
|
},
|
||||||
"xtable": {
|
"xtable": {
|
||||||
"Package": "xtable",
|
"Package": "xtable",
|
||||||
|
|
|
@ -345,19 +345,6 @@ rule esm_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
rule phone_speech_python_features:
|
|
||||||
input:
|
|
||||||
sensor_data = "data/raw/{pid}/phone_speech_with_datetime.csv",
|
|
||||||
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
|
||||||
params:
|
|
||||||
provider = lambda wildcards: config["PHONE_SPEECH"]["PROVIDERS"][wildcards.provider_key.upper()],
|
|
||||||
provider_key = "{provider_key}",
|
|
||||||
sensor_key = "phone_speech"
|
|
||||||
output:
|
|
||||||
"data/interim/{pid}/phone_speech_features/phone_speech_python_{provider_key}.csv"
|
|
||||||
script:
|
|
||||||
"../src/features/entry.py"
|
|
||||||
|
|
||||||
rule phone_keyboard_python_features:
|
rule phone_keyboard_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = "data/raw/{pid}/phone_keyboard_with_datetime.csv",
|
sensor_data = "data/raw/{pid}/phone_keyboard_with_datetime.csv",
|
||||||
|
|
|
@ -247,8 +247,6 @@ rule empatica_readable_datetime:
|
||||||
include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
||||||
output:
|
output:
|
||||||
"data/raw/{pid}/empatica_{sensor}_with_datetime.csv"
|
"data/raw/{pid}/empatica_{sensor}_with_datetime.csv"
|
||||||
resources:
|
|
||||||
mem_mb=50000
|
|
||||||
script:
|
script:
|
||||||
"../src/data/datetime/readable_datetime.R"
|
"../src/data/datetime/readable_datetime.R"
|
||||||
|
|
||||||
|
@ -261,19 +259,16 @@ rule extract_event_information_from_esm:
|
||||||
stage = "extract",
|
stage = "extract",
|
||||||
pid = "{pid}"
|
pid = "{pid}"
|
||||||
output:
|
output:
|
||||||
"data/raw/ers/{pid}_ers.csv",
|
"data/raw/ers/{pid}_ers.csv"
|
||||||
"data/raw/ers/{pid}_stress_event_targets.csv"
|
|
||||||
script:
|
script:
|
||||||
"../src/features/phone_esm/straw/process_user_event_related_segments.py"
|
"../src/features/phone_esm/straw/process_user_event_related_segments.py"
|
||||||
|
|
||||||
rule merge_event_related_segments_files:
|
rule create_event_related_segments_file:
|
||||||
input:
|
input:
|
||||||
ers_files = expand("data/raw/ers/{pid}_ers.csv", pid=config["PIDS"]),
|
ers_files = expand("data/raw/ers/{pid}_ers.csv", pid=config["PIDS"])
|
||||||
se_files = expand("data/raw/ers/{pid}_stress_event_targets.csv", pid=config["PIDS"])
|
|
||||||
params:
|
params:
|
||||||
stage = "merge"
|
stage = "merge"
|
||||||
output:
|
output:
|
||||||
"data/external/straw_events.csv",
|
"data/external/straw_events.csv"
|
||||||
"data/external/stress_event_targets.csv"
|
|
||||||
script:
|
script:
|
||||||
"../src/features/phone_esm/straw/process_user_event_related_segments.py"
|
"../src/features/phone_esm/straw/process_user_event_related_segments.py"
|
|
@ -29,17 +29,24 @@ get_genre <- function(apps){
|
||||||
apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F)
|
apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F)
|
||||||
genre_catalogue <- data.frame()
|
genre_catalogue <- data.frame()
|
||||||
catalogue_source <- snakemake@params[["catalogue_source"]]
|
catalogue_source <- snakemake@params[["catalogue_source"]]
|
||||||
|
package_names_hashed <- snakemake@params[["package_names_hashed"]]
|
||||||
update_catalogue_file <- snakemake@params[["update_catalogue_file"]]
|
update_catalogue_file <- snakemake@params[["update_catalogue_file"]]
|
||||||
scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]]
|
scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]]
|
||||||
apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre"))))
|
apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre"))))
|
||||||
|
|
||||||
|
if (length(package_names_hashed) == 0) {package_names_hashed <- FALSE}
|
||||||
|
|
||||||
if(nrow(apps) > 0){
|
if(nrow(apps) > 0){
|
||||||
if(catalogue_source == "GOOGLE"){
|
if(catalogue_source == "GOOGLE"){
|
||||||
apps_with_genre <- apps %>% mutate(genre = NA_character_)
|
apps_with_genre <- apps %>% mutate(genre = NA_character_)
|
||||||
} else if(catalogue_source == "FILE"){
|
} else if(catalogue_source == "FILE"){
|
||||||
genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character"))
|
genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character"))
|
||||||
|
if (package_names_hashed) {
|
||||||
|
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_hash")
|
||||||
|
} else {
|
||||||
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
|
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){
|
if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){
|
||||||
apps_without_genre <- (apps_with_genre %>% filter(is.na(genre)) %>% distinct(package_name))$package_name
|
apps_without_genre <- (apps_with_genre %>% filter(is.na(genre)) %>% distinct(package_name))$package_name
|
||||||
|
|
|
@ -349,24 +349,3 @@ PHONE_WIFI_VISIBLE:
|
||||||
COLUMN_MAPPINGS:
|
COLUMN_MAPPINGS:
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
PHONE_SPEECH:
|
|
||||||
ANDROID:
|
|
||||||
RAPIDS_COLUMN_MAPPINGS:
|
|
||||||
TIMESTAMP: timestamp
|
|
||||||
DEVICE_ID: device_id
|
|
||||||
SPEECH_PROPORTION: speech_proportion
|
|
||||||
MUTATION:
|
|
||||||
COLUMN_MAPPINGS:
|
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
|
||||||
IOS:
|
|
||||||
RAPIDS_COLUMN_MAPPINGS:
|
|
||||||
TIMESTAMP: timestamp
|
|
||||||
DEVICE_ID: device_id
|
|
||||||
SPEECH_PROPORTION: speech_proportion
|
|
||||||
MUTATION:
|
|
||||||
COLUMN_MAPPINGS:
|
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -136,9 +136,8 @@ def patch_ibi_with_bvp(ibi_data, bvp_data):
|
||||||
# Begin with the cr-features part
|
# Begin with the cr-features part
|
||||||
try:
|
try:
|
||||||
ibi_data, ibi_start_timestamp = empatica2d_to_array(ibi_data_file)
|
ibi_data, ibi_start_timestamp = empatica2d_to_array(ibi_data_file)
|
||||||
except (IndexError, KeyError) as e:
|
except IndexError as e:
|
||||||
# Checks whether IBI.csv is empty
|
# Checks whether IBI.csv is empty
|
||||||
# It may raise a KeyError if df is empty here: startTimeStamp = df.time[0]
|
|
||||||
df_test = pd.read_csv(ibi_data_file, names=['timings', 'inter_beat_interval'], header=None)
|
df_test = pd.read_csv(ibi_data_file, names=['timings', 'inter_beat_interval'], header=None)
|
||||||
if df_test.empty:
|
if df_test.empty:
|
||||||
df_test['timestamp'] = df_test['timings']
|
df_test['timestamp'] = df_test['timings']
|
||||||
|
|
|
@ -118,11 +118,6 @@ PHONE_SCREEN:
|
||||||
- DEVICE_ID
|
- DEVICE_ID
|
||||||
- SCREEN_STATUS
|
- SCREEN_STATUS
|
||||||
|
|
||||||
PHONE_SPEECH:
|
|
||||||
- TIMESTAMP
|
|
||||||
- DEVICE_ID
|
|
||||||
- SPEECH_PROPORTION
|
|
||||||
|
|
||||||
PHONE_WIFI_CONNECTED:
|
PHONE_WIFI_CONNECTED:
|
||||||
- TIMESTAMP
|
- TIMESTAMP
|
||||||
- DEVICE_ID
|
- DEVICE_ID
|
||||||
|
|
|
@ -14,6 +14,7 @@ from src.features import empatica_data_yield as edy
|
||||||
pd.set_option('display.max_columns', 20)
|
pd.set_option('display.max_columns', 20)
|
||||||
|
|
||||||
def straw_cleaning(sensor_data_files, provider):
|
def straw_cleaning(sensor_data_files, provider):
|
||||||
|
# TODO (maybe): reorganize the script based on the overall
|
||||||
|
|
||||||
features = pd.read_csv(sensor_data_files["sensor_data"][0])
|
features = pd.read_csv(sensor_data_files["sensor_data"][0])
|
||||||
|
|
||||||
|
@ -27,18 +28,12 @@ def straw_cleaning(sensor_data_files, provider):
|
||||||
# (1) FILTER_OUT THE ROWS THAT DO NOT HAVE THE TARGET COLUMN AVAILABLE
|
# (1) FILTER_OUT THE ROWS THAT DO NOT HAVE THE TARGET COLUMN AVAILABLE
|
||||||
if config['PARAMS_FOR_ANALYSIS']['TARGET']['COMPUTE']:
|
if config['PARAMS_FOR_ANALYSIS']['TARGET']['COMPUTE']:
|
||||||
target = config['PARAMS_FOR_ANALYSIS']['TARGET']['LABEL'] # get target label from config
|
target = config['PARAMS_FOR_ANALYSIS']['TARGET']['LABEL'] # get target label from config
|
||||||
if 'phone_esm_straw_' + target in features:
|
|
||||||
features = features[features['phone_esm_straw_' + target].notna()].reset_index(drop=True)
|
features = features[features['phone_esm_straw_' + target].notna()].reset_index(drop=True)
|
||||||
else:
|
|
||||||
return features
|
|
||||||
|
|
||||||
# (2.1) QUALITY CHECK (DATA YIELD COLUMN) deletes the rows where E4 or phone data is low quality
|
# (2.1) QUALITY CHECK (DATA YIELD COLUMN) deletes the rows where E4 or phone data is low quality
|
||||||
phone_data_yield_unit = provider["PHONE_DATA_YIELD_FEATURE"].split("_")[3].lower()
|
phone_data_yield_unit = provider["PHONE_DATA_YIELD_FEATURE"].split("_")[3].lower()
|
||||||
phone_data_yield_column = "phone_data_yield_rapids_ratiovalidyielded" + phone_data_yield_unit
|
phone_data_yield_column = "phone_data_yield_rapids_ratiovalidyielded" + phone_data_yield_unit
|
||||||
|
|
||||||
if features.empty:
|
|
||||||
return features
|
|
||||||
|
|
||||||
features = edy.calculate_empatica_data_yield(features)
|
features = edy.calculate_empatica_data_yield(features)
|
||||||
|
|
||||||
if not phone_data_yield_column in features.columns and not "empatica_data_yield" in features.columns:
|
if not phone_data_yield_column in features.columns and not "empatica_data_yield" in features.columns:
|
||||||
|
@ -120,7 +115,7 @@ def straw_cleaning(sensor_data_files, provider):
|
||||||
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')]
|
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')]
|
||||||
|
|
||||||
if provider["COLS_VAR_THRESHOLD"]:
|
if provider["COLS_VAR_THRESHOLD"]:
|
||||||
features.drop(features.std(numeric_only=True)[features.std(numeric_only=True) == 0].index.values, axis=1, inplace=True)
|
features.drop(features.std()[features.std() == 0].index.values, axis=1, inplace=True)
|
||||||
|
|
||||||
fe5 = features.copy()
|
fe5 = features.copy()
|
||||||
|
|
||||||
|
@ -134,7 +129,7 @@ def straw_cleaning(sensor_data_files, provider):
|
||||||
valid_features = features[numerical_cols].loc[:, features[numerical_cols].isna().sum() < drop_corr_features['MIN_OVERLAP_FOR_CORR_THRESHOLD'] * features[numerical_cols].shape[0]]
|
valid_features = features[numerical_cols].loc[:, features[numerical_cols].isna().sum() < drop_corr_features['MIN_OVERLAP_FOR_CORR_THRESHOLD'] * features[numerical_cols].shape[0]]
|
||||||
|
|
||||||
corr_matrix = valid_features.corr().abs()
|
corr_matrix = valid_features.corr().abs()
|
||||||
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
|
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
|
||||||
to_drop = [column for column in upper.columns if any(upper[column] > drop_corr_features["CORR_THRESHOLD"])]
|
to_drop = [column for column in upper.columns if any(upper[column] > drop_corr_features["CORR_THRESHOLD"])]
|
||||||
|
|
||||||
features.drop(to_drop, axis=1, inplace=True)
|
features.drop(to_drop, axis=1, inplace=True)
|
||||||
|
@ -144,21 +139,21 @@ def straw_cleaning(sensor_data_files, provider):
|
||||||
if esm not in features:
|
if esm not in features:
|
||||||
features[esm] = esm_cols[esm]
|
features[esm] = esm_cols[esm]
|
||||||
|
|
||||||
|
fe6 = features.copy()
|
||||||
|
|
||||||
# (9) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
|
# (9) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
|
||||||
if features.isna().any().any():
|
if features.isna().any().any():
|
||||||
raise ValueError("There are still some NaNs present in the dataframe. Please check for implementation errors.")
|
raise ValueError("There are still some NaNs present in the dataframe. Please check for implementation errors.")
|
||||||
|
|
||||||
return features
|
return features
|
||||||
|
|
||||||
|
def impute(df, method='zero'):
|
||||||
|
|
||||||
def k_nearest(df):
|
def k_nearest(df):
|
||||||
pd.set_option('display.max_columns', None)
|
pd.set_option('display.max_columns', None)
|
||||||
imputer = KNNImputer(n_neighbors=3)
|
imputer = KNNImputer(n_neighbors=3)
|
||||||
return pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
|
return pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
|
||||||
|
|
||||||
|
|
||||||
def impute(df, method='zero'):
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'zero': df.fillna(0),
|
'zero': df.fillna(0),
|
||||||
'high_number': df.fillna(1500),
|
'high_number': df.fillna(1500),
|
||||||
|
@ -167,7 +162,6 @@ def impute(df, method='zero'):
|
||||||
'knn': k_nearest(df)
|
'knn': k_nearest(df)
|
||||||
}[method]
|
}[method]
|
||||||
|
|
||||||
|
|
||||||
def graph_bf_af(features, phase_name, plt_flag=False):
|
def graph_bf_af(features, phase_name, plt_flag=False):
|
||||||
if plt_flag:
|
if plt_flag:
|
||||||
sns.set(rc={"figure.figsize":(16, 8)})
|
sns.set(rc={"figure.figsize":(16, 8)})
|
||||||
|
|
|
@ -14,41 +14,21 @@ def straw_cleaning(sensor_data_files, provider, target):
|
||||||
|
|
||||||
features = pd.read_csv(sensor_data_files["sensor_data"][0])
|
features = pd.read_csv(sensor_data_files["sensor_data"][0])
|
||||||
|
|
||||||
with open('config.yaml', 'r') as stream:
|
# features = features[features['local_segment_label'] == 'working_day'] # Filtriranje ustreznih časovnih segmentov
|
||||||
config = yaml.load(stream, Loader=yaml.FullLoader)
|
|
||||||
|
# print(features)
|
||||||
|
# sys.exit()
|
||||||
|
|
||||||
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns
|
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns
|
||||||
|
|
||||||
|
with open('config.yaml', 'r') as stream:
|
||||||
|
config = yaml.load(stream, Loader=yaml.FullLoader)
|
||||||
|
|
||||||
excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime']
|
excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime']
|
||||||
|
|
||||||
graph_bf_af(features, "1target_rows_before")
|
graph_bf_af(features, "1target_rows_before")
|
||||||
|
|
||||||
# (1.0) OVERRIDE STRESSFULNESS EVENT TARGETS IF ERS SEGMENTING_METHOD IS "STRESS_EVENT"
|
# (1) FILTER_OUT THE ROWS THAT DO NOT HAVE THE TARGET COLUMN AVAILABLE
|
||||||
if config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["SEGMENTING_METHOD"] == "stress_event":
|
|
||||||
|
|
||||||
stress_events_targets = pd.read_csv("data/external/stress_event_targets.csv")
|
|
||||||
|
|
||||||
if "appraisal_stressfulness_event_mean" in config['PARAMS_FOR_ANALYSIS']['TARGET']['ALL_LABELS']:
|
|
||||||
features.drop(columns=['phone_esm_straw_appraisal_stressfulness_event_mean'], inplace=True)
|
|
||||||
features = features.merge(stress_events_targets[["label", "appraisal_stressfulness_event"]] \
|
|
||||||
.rename(columns={'label': 'local_segment_label'}), on=['local_segment_label'], how='inner') \
|
|
||||||
.rename(columns={'appraisal_stressfulness_event': 'phone_esm_straw_appraisal_stressfulness_event_mean'})
|
|
||||||
|
|
||||||
if "appraisal_threat_mean" in config['PARAMS_FOR_ANALYSIS']['TARGET']['ALL_LABELS']:
|
|
||||||
features.drop(columns=['phone_esm_straw_appraisal_threat_mean'], inplace=True)
|
|
||||||
features = features.merge(stress_events_targets[["label", "appraisal_threat"]] \
|
|
||||||
.rename(columns={'label': 'local_segment_label'}), on=['local_segment_label'], how='inner') \
|
|
||||||
.rename(columns={'appraisal_threat': 'phone_esm_straw_appraisal_threat_mean'})
|
|
||||||
|
|
||||||
if "appraisal_challenge_mean" in config['PARAMS_FOR_ANALYSIS']['TARGET']['ALL_LABELS']:
|
|
||||||
features.drop(columns=['phone_esm_straw_appraisal_challenge_mean'], inplace=True)
|
|
||||||
features = features.merge(stress_events_targets[["label", "appraisal_challenge"]] \
|
|
||||||
.rename(columns={'label': 'local_segment_label'}), on=['local_segment_label'], how='inner') \
|
|
||||||
.rename(columns={'appraisal_challenge': 'phone_esm_straw_appraisal_challenge_mean'})
|
|
||||||
|
|
||||||
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns
|
|
||||||
|
|
||||||
# (1.1) FILTER_OUT THE ROWS THAT DO NOT HAVE THE TARGET COLUMN AVAILABLE
|
|
||||||
if config['PARAMS_FOR_ANALYSIS']['TARGET']['COMPUTE']:
|
if config['PARAMS_FOR_ANALYSIS']['TARGET']['COMPUTE']:
|
||||||
features = features[features['phone_esm_straw_' + target].notna()].reset_index(drop=True)
|
features = features[features['phone_esm_straw_' + target].notna()].reset_index(drop=True)
|
||||||
|
|
||||||
|
@ -56,6 +36,7 @@ def straw_cleaning(sensor_data_files, provider, target):
|
||||||
return pd.DataFrame(columns=excluded_columns)
|
return pd.DataFrame(columns=excluded_columns)
|
||||||
|
|
||||||
graph_bf_af(features, "2target_rows_after")
|
graph_bf_af(features, "2target_rows_after")
|
||||||
|
print("HERE1", target, features["pid"])
|
||||||
|
|
||||||
# (2) QUALITY CHECK (DATA YIELD COLUMN) drops the rows where E4 or phone data is low quality
|
# (2) QUALITY CHECK (DATA YIELD COLUMN) drops the rows where E4 or phone data is low quality
|
||||||
phone_data_yield_unit = provider["PHONE_DATA_YIELD_FEATURE"].split("_")[3].lower()
|
phone_data_yield_unit = provider["PHONE_DATA_YIELD_FEATURE"].split("_")[3].lower()
|
||||||
|
@ -71,23 +52,26 @@ def straw_cleaning(sensor_data_files, provider, target):
|
||||||
|
|
||||||
# Drop rows where phone data yield is less then given threshold
|
# Drop rows where phone data yield is less then given threshold
|
||||||
if provider["PHONE_DATA_YIELD_RATIO_THRESHOLD"]:
|
if provider["PHONE_DATA_YIELD_RATIO_THRESHOLD"]:
|
||||||
|
# print("\nThreshold:", provider["PHONE_DATA_YIELD_RATIO_THRESHOLD"])
|
||||||
|
# print("Phone features data yield stats:", features[phone_data_yield_column].describe(), "\n")
|
||||||
|
# print(features[phone_data_yield_column].sort_values())
|
||||||
hist = features[phone_data_yield_column].hist(bins=5)
|
hist = features[phone_data_yield_column].hist(bins=5)
|
||||||
plt.close()
|
plt.close()
|
||||||
features = features[features[phone_data_yield_column] >= provider["PHONE_DATA_YIELD_RATIO_THRESHOLD"]].reset_index(drop=True)
|
features = features[features[phone_data_yield_column] >= provider["PHONE_DATA_YIELD_RATIO_THRESHOLD"]].reset_index(drop=True)
|
||||||
|
|
||||||
# Drop rows where empatica data yield is less then given threshold
|
# Drop rows where empatica data yield is less then given threshold
|
||||||
if provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]:
|
if provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]:
|
||||||
|
# print("\nThreshold:", provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"])
|
||||||
|
# print("E4 features data yield stats:", features["empatica_data_yield"].describe(), "\n")
|
||||||
|
# print(features["empatica_data_yield"].sort_values())
|
||||||
features = features[features["empatica_data_yield"] >= provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]].reset_index(drop=True)
|
features = features[features["empatica_data_yield"] >= provider["EMPATICA_DATA_YIELD_RATIO_THRESHOLD"]].reset_index(drop=True)
|
||||||
|
|
||||||
if features.empty:
|
|
||||||
return pd.DataFrame(columns=excluded_columns)
|
|
||||||
|
|
||||||
graph_bf_af(features, "3data_yield_drop_rows")
|
graph_bf_af(features, "3data_yield_drop_rows")
|
||||||
|
|
||||||
if features.empty:
|
if features.empty:
|
||||||
return pd.DataFrame(columns=excluded_columns)
|
return pd.DataFrame(columns=excluded_columns)
|
||||||
|
|
||||||
|
|
||||||
# (3) CONTEXTUAL IMPUTATION
|
# (3) CONTEXTUAL IMPUTATION
|
||||||
|
|
||||||
# Impute selected phone features with a high number
|
# Impute selected phone features with a high number
|
||||||
|
@ -109,31 +93,20 @@ def straw_cleaning(sensor_data_files, provider, target):
|
||||||
features[impute_w_sn2] = features[impute_w_sn2].fillna(1) # Special case of imputation - nominal/ordinal value
|
features[impute_w_sn2] = features[impute_w_sn2].fillna(1) # Special case of imputation - nominal/ordinal value
|
||||||
|
|
||||||
impute_w_sn3 = [col for col in features.columns if "loglocationvariance" in col]
|
impute_w_sn3 = [col for col in features.columns if "loglocationvariance" in col]
|
||||||
features[impute_w_sn3] = features[impute_w_sn3].fillna(-1000000) # Special case of imputation - loglocation
|
features[impute_w_sn2] = features[impute_w_sn2].fillna(-1000000) # Special case of imputation - loglocation
|
||||||
|
|
||||||
# Impute location features
|
# Impute selected phone features with 0 + impute ESM features with 0
|
||||||
impute_locations = [col for col in features \
|
|
||||||
if col.startswith('phone_locations_doryab_') and
|
|
||||||
'radiusgyration' not in col
|
|
||||||
]
|
|
||||||
|
|
||||||
# Impute selected phone, location, and esm features with 0
|
|
||||||
impute_zero = [col for col in features if \
|
impute_zero = [col for col in features if \
|
||||||
col.startswith('phone_applications_foreground_rapids_') or
|
col.startswith('phone_applications_foreground_rapids_') or
|
||||||
col.startswith('phone_activity_recognition_') or
|
|
||||||
col.startswith('phone_battery_rapids_') or
|
col.startswith('phone_battery_rapids_') or
|
||||||
col.startswith('phone_bluetooth_rapids_') or
|
col.startswith('phone_bluetooth_rapids_') or
|
||||||
col.startswith('phone_light_rapids_') or
|
col.startswith('phone_light_rapids_') or
|
||||||
col.startswith('phone_calls_rapids_') or
|
col.startswith('phone_calls_rapids_') or
|
||||||
col.startswith('phone_messages_rapids_') or
|
col.startswith('phone_messages_rapids_') or
|
||||||
col.startswith('phone_screen_rapids_') or
|
col.startswith('phone_screen_rapids_') or
|
||||||
col.startswith('phone_bluetooth_doryab_') or
|
col.startswith('phone_wifi_visible')]
|
||||||
col.startswith('phone_wifi_visible')
|
|
||||||
]
|
|
||||||
|
|
||||||
features[impute_zero+impute_locations+list(esm_cols.columns)] = features[impute_zero+impute_locations+list(esm_cols.columns)].fillna(0)
|
features[impute_zero+list(esm_cols.columns)] = features[impute_zero+list(esm_cols.columns)].fillna(0)
|
||||||
|
|
||||||
pd.set_option('display.max_rows', None)
|
|
||||||
|
|
||||||
graph_bf_af(features, "4context_imp")
|
graph_bf_af(features, "4context_imp")
|
||||||
|
|
||||||
|
@ -146,7 +119,7 @@ def straw_cleaning(sensor_data_files, provider, target):
|
||||||
# (5) REMOVE COLS WHERE VARIANCE IS 0
|
# (5) REMOVE COLS WHERE VARIANCE IS 0
|
||||||
|
|
||||||
if provider["COLS_VAR_THRESHOLD"]:
|
if provider["COLS_VAR_THRESHOLD"]:
|
||||||
features.drop(features.std(numeric_only=True)[features.std(numeric_only=True) == 0].index.values, axis=1, inplace=True)
|
features.drop(features.std()[features.std() == 0].index.values, axis=1, inplace=True)
|
||||||
|
|
||||||
graph_bf_af(features, "6variance_drop")
|
graph_bf_af(features, "6variance_drop")
|
||||||
|
|
||||||
|
@ -164,18 +137,15 @@ def straw_cleaning(sensor_data_files, provider, target):
|
||||||
if features.empty:
|
if features.empty:
|
||||||
return pd.DataFrame(columns=excluded_columns)
|
return pd.DataFrame(columns=excluded_columns)
|
||||||
|
|
||||||
# (7) STANDARDIZATION
|
|
||||||
|
# (7) STANDARDIZATION TODO: exclude nominal features from standardization
|
||||||
|
|
||||||
if provider["STANDARDIZATION"]:
|
if provider["STANDARDIZATION"]:
|
||||||
nominal_cols = [col for col in features.columns if "mostcommonactivity" in col or "homelabel" in col] # Excluded nominal features
|
|
||||||
# Expected warning within this code block
|
# Expected warning within this code block
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.simplefilter("ignore", category=RuntimeWarning)
|
warnings.simplefilter("ignore", category=RuntimeWarning)
|
||||||
if provider["TARGET_STANDARDIZATION"]:
|
features.loc[:, ~features.columns.isin(excluded_columns + ["pid"])] = \
|
||||||
features.loc[:, ~features.columns.isin(excluded_columns + ["pid"] + nominal_cols)] = \
|
features.loc[:, ~features.columns.isin(excluded_columns)].groupby('pid').transform(lambda x: StandardScaler().fit_transform(x.values[:,np.newaxis]).ravel())
|
||||||
features.loc[:, ~features.columns.isin(excluded_columns + nominal_cols)].groupby('pid').transform(lambda x: StandardScaler().fit_transform(x.values[:,np.newaxis]).ravel())
|
|
||||||
else:
|
|
||||||
features.loc[:, ~features.columns.isin(excluded_columns + ["pid"] + nominal_cols + ['phone_esm_straw_' + target])] = \
|
|
||||||
features.loc[:, ~features.columns.isin(excluded_columns + nominal_cols + ['phone_esm_straw_' + target])].groupby('pid').transform(lambda x: StandardScaler().fit_transform(x.values[:,np.newaxis]).ravel())
|
|
||||||
|
|
||||||
graph_bf_af(features, "8standardization")
|
graph_bf_af(features, "8standardization")
|
||||||
|
|
||||||
|
@ -200,7 +170,7 @@ def straw_cleaning(sensor_data_files, provider, target):
|
||||||
valid_features = features[numerical_cols].loc[:, features[numerical_cols].isna().sum() < drop_corr_features['MIN_OVERLAP_FOR_CORR_THRESHOLD'] * features[numerical_cols].shape[0]]
|
valid_features = features[numerical_cols].loc[:, features[numerical_cols].isna().sum() < drop_corr_features['MIN_OVERLAP_FOR_CORR_THRESHOLD'] * features[numerical_cols].shape[0]]
|
||||||
|
|
||||||
corr_matrix = valid_features.corr().abs()
|
corr_matrix = valid_features.corr().abs()
|
||||||
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
|
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
|
||||||
to_drop = [column for column in upper.columns if any(upper[column] > drop_corr_features["CORR_THRESHOLD"])]
|
to_drop = [column for column in upper.columns if any(upper[column] > drop_corr_features["CORR_THRESHOLD"])]
|
||||||
|
|
||||||
# sns.heatmap(corr_matrix, cmap="YlGnBu")
|
# sns.heatmap(corr_matrix, cmap="YlGnBu")
|
||||||
|
@ -223,36 +193,18 @@ def straw_cleaning(sensor_data_files, provider, target):
|
||||||
|
|
||||||
graph_bf_af(features, "10correlation_drop")
|
graph_bf_af(features, "10correlation_drop")
|
||||||
|
|
||||||
# Transform categorical columns to category dtype
|
# (10) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
|
||||||
|
|
||||||
cat1 = [col for col in features.columns if "mostcommonactivity" in col]
|
|
||||||
if cat1: # Transform columns to category dtype (mostcommonactivity)
|
|
||||||
features[cat1] = features[cat1].astype(int).astype('category')
|
|
||||||
|
|
||||||
cat2 = [col for col in features.columns if "homelabel" in col]
|
|
||||||
if cat2: # Transform columns to category dtype (homelabel)
|
|
||||||
features[cat2] = features[cat2].astype(int).astype('category')
|
|
||||||
|
|
||||||
# (10) DROP ALL WINDOW RELATED COLUMNS
|
|
||||||
win_count_cols = [col for col in features if "SO_windowsCount" in col]
|
|
||||||
if win_count_cols:
|
|
||||||
features.drop(columns=win_count_cols, inplace=True)
|
|
||||||
|
|
||||||
# (11) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
|
|
||||||
if features.isna().any().any():
|
if features.isna().any().any():
|
||||||
raise ValueError("There are still some NaNs present in the dataframe. Please check for implementation errors.")
|
raise ValueError("There are still some NaNs present in the dataframe. Please check for implementation errors.")
|
||||||
|
|
||||||
|
|
||||||
return features
|
return features
|
||||||
|
|
||||||
|
def impute(df, method='zero'):
|
||||||
|
|
||||||
def k_nearest(df):
|
def k_nearest(df):
|
||||||
imputer = KNNImputer(n_neighbors=3)
|
imputer = KNNImputer(n_neighbors=3)
|
||||||
return pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
|
return pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
|
||||||
|
|
||||||
|
|
||||||
def impute(df, method='zero'):
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'zero': df.fillna(0),
|
'zero': df.fillna(0),
|
||||||
'high_number': df.fillna(1500),
|
'high_number': df.fillna(1500),
|
||||||
|
@ -261,8 +213,7 @@ def impute(df, method='zero'):
|
||||||
'knn': k_nearest(df)
|
'knn': k_nearest(df)
|
||||||
}[method]
|
}[method]
|
||||||
|
|
||||||
|
def graph_bf_af(features, phase_name, plt_flag=True):
|
||||||
def graph_bf_af(features, phase_name, plt_flag=False):
|
|
||||||
if plt_flag:
|
if plt_flag:
|
||||||
sns.set(rc={"figure.figsize":(16, 8)})
|
sns.set(rc={"figure.figsize":(16, 8)})
|
||||||
sns.heatmap(features.isna(), cbar=False) #features.select_dtypes(include=np.number)
|
sns.heatmap(features.isna(), cbar=False) #features.select_dtypes(include=np.number)
|
||||||
|
|
|
@ -15,13 +15,13 @@ def extract_second_order_features(intraday_features, so_features_names, prefix="
|
||||||
so_features = pd.DataFrame()
|
so_features = pd.DataFrame()
|
||||||
#print(intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).nsmallest())
|
#print(intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).nsmallest())
|
||||||
if "mean" in so_features_names:
|
if "mean" in so_features_names:
|
||||||
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).mean(numeric_only=True).add_suffix("_SO_mean")], axis=1)
|
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).mean().add_suffix("_SO_mean")], axis=1)
|
||||||
|
|
||||||
if "median" in so_features_names:
|
if "median" in so_features_names:
|
||||||
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).median(numeric_only=True).add_suffix("_SO_median")], axis=1)
|
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).median().add_suffix("_SO_median")], axis=1)
|
||||||
|
|
||||||
if "sd" in so_features_names:
|
if "sd" in so_features_names:
|
||||||
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).std(numeric_only=True).fillna(0).add_suffix("_SO_sd")], axis=1)
|
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).std().fillna(0).add_suffix("_SO_sd")], axis=1)
|
||||||
|
|
||||||
if "nlargest" in so_features_names: # largest 5 -- maybe there is a faster groupby solution?
|
if "nlargest" in so_features_names: # largest 5 -- maybe there is a faster groupby solution?
|
||||||
for column in intraday_features.loc[:, ~intraday_features.columns.isin(groupby_cols+[prefix+"level_1"])]:
|
for column in intraday_features.loc[:, ~intraday_features.columns.isin(groupby_cols+[prefix+"level_1"])]:
|
||||||
|
|
|
@ -2,31 +2,25 @@ import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import sys, yaml
|
import sys
|
||||||
|
|
||||||
def calculate_empatica_data_yield(features): # TODO
|
def calculate_empatica_data_yield(features):
|
||||||
|
# Get time segment duration in seconds from dataframe
|
||||||
|
datetime_start = datetime.strptime(features.loc[0, 'local_segment_start_datetime'], '%Y-%m-%d %H:%M:%S')
|
||||||
|
datetime_end = datetime.strptime(features.loc[0, 'local_segment_end_datetime'], '%Y-%m-%d %H:%M:%S')
|
||||||
|
tseg_duration = (datetime_end - datetime_start).total_seconds()
|
||||||
|
|
||||||
# Get time segment duration in seconds from all segments in features dataframe
|
features["acc_data_yield"] = (features['empatica_accelerometer_cr_SO_windowsCount'] * 15) / tseg_duration \
|
||||||
datetime_start = pd.to_datetime(features['local_segment_start_datetime'], format='%Y-%m-%d %H:%M:%S')
|
if 'empatica_accelerometer_cr_SO_windowsCount' in features else 0
|
||||||
datetime_end = pd.to_datetime(features['local_segment_end_datetime'], format='%Y-%m-%d %H:%M:%S')
|
features["temp_data_yield"] = (features['empatica_temperature_cr_SO_windowsCount'] * 300) / tseg_duration \
|
||||||
tseg_duration = (datetime_end - datetime_start).dt.total_seconds()
|
if 'empatica_temperature_cr_SO_windowsCount' in features else 0
|
||||||
|
features["eda_data_yield"] = (features['empatica_electrodermal_activity_cr_SO_windowsCount'] * 60) / tseg_duration \
|
||||||
|
if 'empatica_electrodermal_activity_cr_SO_windowsCount' in features else 0
|
||||||
|
features["ibi_data_yield"] = (features['empatica_inter_beat_interval_cr_SO_windowsCount'] * 300) / tseg_duration \
|
||||||
|
if 'empatica_inter_beat_interval_cr_SO_windowsCount' in features else 0
|
||||||
|
|
||||||
with open('config.yaml', 'r') as stream:
|
empatica_data_yield_cols = ['acc_data_yield', 'temp_data_yield', 'eda_data_yield', 'ibi_data_yield']
|
||||||
config = yaml.load(stream, Loader=yaml.FullLoader)
|
features["empatica_data_yield"] = features[empatica_data_yield_cols].mean(axis=1).fillna(0)
|
||||||
|
|
||||||
sensors = ["EMPATICA_ACCELEROMETER", "EMPATICA_TEMPERATURE", "EMPATICA_ELECTRODERMAL_ACTIVITY", "EMPATICA_INTER_BEAT_INTERVAL"]
|
|
||||||
for sensor in sensors:
|
|
||||||
features[f"{sensor.lower()}_data_yield"] = \
|
|
||||||
(features[f"{sensor.lower()}_cr_SO_windowsCount"] * config[sensor]["PROVIDERS"]["CR"]["WINDOWS"]["WINDOW_LENGTH"]) / tseg_duration \
|
|
||||||
if f'{sensor.lower()}_cr_SO_windowsCount' in features else 0
|
|
||||||
|
|
||||||
empatica_data_yield_cols = [sensor.lower() + "_data_yield" for sensor in sensors]
|
|
||||||
pd.set_option('display.max_rows', None)
|
|
||||||
|
|
||||||
# Assigns 1 to values that are over 1 (in case of windows not being filled fully)
|
|
||||||
features[empatica_data_yield_cols] = features[empatica_data_yield_cols].apply(lambda x: [y if y <= 1 or np.isnan(y) else 1 for y in x])
|
|
||||||
|
|
||||||
features["empatica_data_yield"] = features[empatica_data_yield_cols].mean(axis=1, numeric_only=True).fillna(0)
|
|
||||||
features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)
|
features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)
|
||||||
|
|
||||||
return features
|
return features
|
||||||
|
|
|
@ -54,7 +54,8 @@ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segmen
|
||||||
data_types = {'local_timezone': 'str', 'device_id': 'str', 'timestamp': 'int64', 'inter_beat_interval': 'float64', 'timings': 'float64', 'local_date_time': 'str',
|
data_types = {'local_timezone': 'str', 'device_id': 'str', 'timestamp': 'int64', 'inter_beat_interval': 'float64', 'timings': 'float64', 'local_date_time': 'str',
|
||||||
'local_date': "str", 'local_time': "str", 'local_hour': "str", 'local_minute': "str", 'assigned_segments': "str"}
|
'local_date': "str", 'local_time': "str", 'local_hour': "str", 'local_minute': "str", 'assigned_segments': "str"}
|
||||||
|
|
||||||
ibi_intraday_data = pd.read_csv(sensor_data_files["sensor_data"], dtype=data_types)
|
temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"], dtype=data_types)
|
||||||
|
ibi_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||||
|
|
||||||
requested_intraday_features = provider["FEATURES"]
|
requested_intraday_features = provider["FEATURES"]
|
||||||
|
|
||||||
|
|
|
@ -42,8 +42,7 @@ def straw_features(sensor_data_files, time_segment, provider, filter_data_by_seg
|
||||||
requested_features = provider["FEATURES"]
|
requested_features = provider["FEATURES"]
|
||||||
# name of the features this function can compute
|
# name of the features this function can compute
|
||||||
requested_scales = provider["SCALES"]
|
requested_scales = provider["SCALES"]
|
||||||
base_features_names = ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support",
|
base_features_names = ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support"]
|
||||||
"appraisal_stressfulness_period", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"]
|
|
||||||
#TODO Check valid questionnaire and feature names.
|
#TODO Check valid questionnaire and feature names.
|
||||||
# the subset of requested features this function can compute
|
# the subset of requested features this function can compute
|
||||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||||
|
|
|
@ -10,15 +10,6 @@ from esm import classify_sessions_by_completion_time, preprocess_esm
|
||||||
input_data_files = dict(snakemake.input)
|
input_data_files = dict(snakemake.input)
|
||||||
|
|
||||||
def format_timestamp(x):
|
def format_timestamp(x):
|
||||||
"""This method formates inputed timestamp into format "HH MM SS". Including spaces. If there is no hours or minutes present
|
|
||||||
that part is ignored, e.g., "MM SS" or just "SS".
|
|
||||||
|
|
||||||
Args:
|
|
||||||
x (int): unix timestamp in seconds
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: formatted timestamp using "HH MM SS" sintax
|
|
||||||
"""
|
|
||||||
tstring=""
|
tstring=""
|
||||||
space = False
|
space = False
|
||||||
if x//3600 > 0:
|
if x//3600 > 0:
|
||||||
|
@ -32,217 +23,60 @@ def format_timestamp(x):
|
||||||
return tstring
|
return tstring
|
||||||
|
|
||||||
|
|
||||||
def extract_ers(esm_df):
|
def extract_ers_from_file(esm_df, device_id): # TODO: session_id groupby -> spremeni naziv segmenta
|
||||||
"""This method has two major functionalities:
|
|
||||||
(1) It prepares STRAW event-related segments file with the use of esm file. The execution protocol is depended on
|
|
||||||
the segmenting method specified in the config.yaml file.
|
|
||||||
(2) It prepares and writes csv with targets and corresponding time segments labels. This is later used
|
|
||||||
in the overall cleaning script (straw).
|
|
||||||
|
|
||||||
Details about each segmenting method are listed below by each corresponding condition. Refer to the RAPIDS documentation for the
|
pd.set_option("display.max_rows", None)
|
||||||
ERS file format: https://www.rapids.science/1.9/setup/configuration/#time-segments -> event segments
|
|
||||||
|
|
||||||
Args:
|
|
||||||
esm_df (DataFrame): read esm file that is dependend on the current participant.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
extracted_ers (DataFrame): dataframe with all necessary information to write event-related segments file
|
|
||||||
in the correct format.
|
|
||||||
"""
|
|
||||||
|
|
||||||
pd.set_option("display.max_rows", 100)
|
|
||||||
pd.set_option("display.max_columns", None)
|
pd.set_option("display.max_columns", None)
|
||||||
|
|
||||||
with open('config.yaml', 'r') as stream:
|
# extracted_ers = pd.DataFrame(columns=["label", "event_timestamp", "length", "shift", "shift_direction", "device_id"])
|
||||||
config = yaml.load(stream, Loader=yaml.FullLoader)
|
|
||||||
|
|
||||||
pd.DataFrame(columns=["label"]).to_csv(snakemake.output[1]) # Create an empty stress_events_targets file
|
|
||||||
|
|
||||||
|
# esm_df = clean_up_esm(preprocess_esm(esm_df))
|
||||||
esm_preprocessed = clean_up_esm(preprocess_esm(esm_df))
|
esm_preprocessed = clean_up_esm(preprocess_esm(esm_df))
|
||||||
|
|
||||||
|
# Take only during work sessions
|
||||||
|
# during_work = esm_df[esm_df["esm_trigger"].str.contains("during_work", na=False)]
|
||||||
|
# esm_trigger_group = esm_df.groupby("esm_session").agg(pd.Series.mode)['esm_trigger'] # Get most frequent esm_trigger within particular session
|
||||||
|
# esm_filtered_sessions = list(esm_trigger_group[esm_trigger_group == 'during_work'].index) # Take only sessions that contains during work
|
||||||
|
|
||||||
# Take only ema_completed sessions responses
|
# Take only ema_completed sessions responses
|
||||||
classified = classify_sessions_by_completion_time(esm_preprocessed)
|
classified = classify_sessions_by_completion_time(esm_preprocessed)
|
||||||
esm_filtered_sessions = classified[classified["session_response"] == 'ema_completed'].reset_index()[['device_id', 'esm_session']]
|
esm_filtered_sessions = classified[classified["session_response"] == 'ema_completed'].reset_index()['esm_session']
|
||||||
esm_df = esm_preprocessed.loc[(esm_preprocessed['device_id'].isin(esm_filtered_sessions['device_id'])) & (esm_preprocessed['esm_session'].isin(esm_filtered_sessions['esm_session']))]
|
|
||||||
|
|
||||||
segmenting_method = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["SEGMENTING_METHOD"]
|
esm_df = esm_preprocessed[esm_preprocessed["esm_session"].isin(esm_filtered_sessions)]
|
||||||
|
|
||||||
if segmenting_method in ["30_before", "90_before"]: # takes 30-minute peroid before the questionnaire + the duration of the questionnaire
|
|
||||||
""" '30-minutes and 90-minutes before' have the same fundamental logic with couple of deviations that will be explained below.
|
|
||||||
Both take x-minute period before the questionnaire that is summed with the questionnaire duration.
|
|
||||||
All questionnaire durations over 15 minutes are excluded from the querying.
|
|
||||||
"""
|
|
||||||
# Extract time-relevant information
|
# Extract time-relevant information
|
||||||
extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index() # questionnaire length
|
extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index() # in rounded up seconds
|
||||||
extracted_ers["label"] = f"straw_event_{segmenting_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
|
|
||||||
extracted_ers[['event_timestamp', 'device_id']] = esm_df.groupby(["device_id", "esm_session"])['timestamp'].min().reset_index()[['timestamp', 'device_id']]
|
|
||||||
extracted_ers = extracted_ers[extracted_ers["timestamp"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire anwsering is 15 min
|
extracted_ers = extracted_ers[extracted_ers["timestamp"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire anwsering is 15 min
|
||||||
extracted_ers["shift_direction"] = -1
|
# TODO: Rename "timestamp" column meaningfully.
|
||||||
|
|
||||||
if segmenting_method == "30_before":
|
|
||||||
"""The method 30-minutes before simply takes 30 minutes before the questionnaire and sums it with the questionnaire duration.
|
|
||||||
The timestamps are formatted with the help of format_timestamp() method.
|
|
||||||
"""
|
|
||||||
time_before_questionnaire = 30 * 60 # in seconds (30 minutes)
|
time_before_questionnaire = 30 * 60 # in seconds (30 minutes)
|
||||||
|
|
||||||
|
extracted_ers["label"] = "straw_event_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
|
||||||
|
extracted_ers["event_timestamp"] = esm_df.groupby("esm_session")['timestamp'].min().reset_index()['timestamp']
|
||||||
extracted_ers["length"] = (extracted_ers["timestamp"] + time_before_questionnaire).apply(lambda x: format_timestamp(x))
|
extracted_ers["length"] = (extracted_ers["timestamp"] + time_before_questionnaire).apply(lambda x: format_timestamp(x))
|
||||||
|
# TODO: Think about adding questionnaire duration.
|
||||||
extracted_ers["shift"] = time_before_questionnaire
|
extracted_ers["shift"] = time_before_questionnaire
|
||||||
extracted_ers["shift"] = extracted_ers["shift"].apply(lambda x: format_timestamp(x))
|
extracted_ers["shift"] = extracted_ers["shift"].apply(lambda x: format_timestamp(x))
|
||||||
|
extracted_ers["shift_direction"] = -1
|
||||||
elif segmenting_method == "90_before":
|
extracted_ers["device_id"] = device_id
|
||||||
"""The method 90-minutes before has an important condition. If the time between the current and the previous questionnaire is
|
|
||||||
longer then 90 minutes it takes 90 minutes, otherwise it takes the original time difference between the questionnaires.
|
|
||||||
"""
|
|
||||||
time_before_questionnaire = 90 * 60 # in seconds (90 minutes)
|
|
||||||
|
|
||||||
extracted_ers[['end_event_timestamp', 'device_id']] = esm_df.groupby(["device_id", "esm_session"])['timestamp'].max().reset_index()[['timestamp', 'device_id']]
|
|
||||||
|
|
||||||
extracted_ers['diffs'] = extracted_ers['event_timestamp'].astype('int64') - extracted_ers['end_event_timestamp'].shift(1, fill_value=0).astype('int64')
|
|
||||||
extracted_ers.loc[extracted_ers['diffs'] > time_before_questionnaire * 1000, 'diffs'] = time_before_questionnaire * 1000
|
|
||||||
|
|
||||||
extracted_ers["diffs"] = (extracted_ers["diffs"] / 1000).apply(lambda x: math.ceil(x))
|
|
||||||
|
|
||||||
extracted_ers["length"] = (extracted_ers["timestamp"] + extracted_ers["diffs"]).apply(lambda x: format_timestamp(x))
|
|
||||||
extracted_ers["shift"] = extracted_ers["diffs"].apply(lambda x: format_timestamp(x))
|
|
||||||
|
|
||||||
elif segmenting_method == "stress_event":
|
|
||||||
"""
|
|
||||||
TODO: update documentation for this condition
|
|
||||||
This is a special case of the method as it consists of two important parts:
|
|
||||||
(1) Generating of the ERS file (same as the methods above) and
|
|
||||||
(2) Generating targets file alongside with the correct time segment labels.
|
|
||||||
|
|
||||||
This extracts event-related segments, depended on the event time and duration specified by the participant in the next
|
|
||||||
questionnaire. Additionally, 5 minutes before the specified start time of this event is taken to take into a account the
|
|
||||||
possiblity of the participant not remembering the start time percisely => this parameter can be manipulated with the variable
|
|
||||||
"time_before_event" which is defined below.
|
|
||||||
|
|
||||||
In case if the participant marked that no stressful event happened, the default of 30 minutes before the event is choosen.
|
|
||||||
In this case, se_threat and se_challenge are NaN.
|
|
||||||
|
|
||||||
By default, this method also excludes all events that are longer then 2.5 hours so that the segments are easily comparable.
|
|
||||||
"""
|
|
||||||
|
|
||||||
ioi = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["INTERVAL_OF_INTEREST"] * 60 # interval of interest in seconds
|
|
||||||
ioi_error_tolerance = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["IOI_ERROR_TOLERANCE"] * 60 # interval of interest error tolerance in seconds
|
|
||||||
|
|
||||||
# Get and join required data
|
|
||||||
extracted_ers = esm_df.groupby(["device_id", "esm_session"])['timestamp'].apply(lambda x: math.ceil((x.max() - x.min()) / 1000)).reset_index().rename(columns={'timestamp': 'session_length'}) # questionnaire length
|
|
||||||
extracted_ers = extracted_ers[extracted_ers["session_length"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire answering is 15 min
|
|
||||||
session_start_timestamp = esm_df.groupby(['device_id', 'esm_session'])['timestamp'].min().to_frame().rename(columns={'timestamp': 'session_start_timestamp'}) # questionnaire start timestamp
|
|
||||||
session_end_timestamp = esm_df.groupby(['device_id', 'esm_session'])['timestamp'].max().to_frame().rename(columns={'timestamp': 'session_end_timestamp'}) # questionnaire end timestamp
|
|
||||||
|
|
||||||
# Users' answers for the stressfulness event (se) start times and durations
|
|
||||||
se_time = esm_df[esm_df.questionnaire_id == 90.].set_index(['device_id', 'esm_session'])['esm_user_answer'].to_frame().rename(columns={'esm_user_answer': 'se_time'})
|
|
||||||
se_duration = esm_df[esm_df.questionnaire_id == 91.].set_index(['device_id', 'esm_session'])['esm_user_answer'].to_frame().rename(columns={'esm_user_answer': 'se_duration'})
|
|
||||||
|
|
||||||
# Make se_durations to the appropriate lengths
|
|
||||||
|
|
||||||
# Extracted 3 targets that will be transfered in the csv file to the cleaning script.
|
|
||||||
se_stressfulness_event_tg = esm_df[esm_df.questionnaire_id == 87.].set_index(['device_id', 'esm_session'])['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_stressfulness_event'})
|
|
||||||
se_threat_tg = esm_df[esm_df.questionnaire_id == 88.].groupby(["device_id", "esm_session"]).mean(numeric_only=True)['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_threat'})
|
|
||||||
se_challenge_tg = esm_df[esm_df.questionnaire_id == 89.].groupby(["device_id", "esm_session"]).mean(numeric_only=True)['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_challenge'})
|
|
||||||
|
|
||||||
# All relevant features are joined by inner join to remove standalone columns (e.g., stressfulness event target has larger count)
|
|
||||||
extracted_ers = extracted_ers.join(session_start_timestamp, on=['device_id', 'esm_session'], how='inner') \
|
|
||||||
.join(session_end_timestamp, on=['device_id', 'esm_session'], how='inner') \
|
|
||||||
.join(se_stressfulness_event_tg, on=['device_id', 'esm_session'], how='inner') \
|
|
||||||
.join(se_time, on=['device_id', 'esm_session'], how='left') \
|
|
||||||
.join(se_duration, on=['device_id', 'esm_session'], how='left') \
|
|
||||||
.join(se_threat_tg, on=['device_id', 'esm_session'], how='left') \
|
|
||||||
.join(se_challenge_tg, on=['device_id', 'esm_session'], how='left')
|
|
||||||
|
|
||||||
# Filter-out the sessions that are not useful. Because of the ambiguity this excludes:
|
|
||||||
# (1) straw event times that are marked as "0 - I don't remember"
|
|
||||||
extracted_ers = extracted_ers[~extracted_ers.se_time.astype(str).str.startswith("0 - ")]
|
|
||||||
extracted_ers.reset_index(drop=True, inplace=True)
|
|
||||||
|
|
||||||
extracted_ers.loc[extracted_ers.se_duration.astype(str).str.startswith("0 - "), 'se_duration'] = 0
|
|
||||||
|
|
||||||
# Add default duration in case if participant answered that no stressful event occured
|
|
||||||
extracted_ers["se_duration"] = extracted_ers["se_duration"].fillna(int((ioi + 2*ioi_error_tolerance) * 1000))
|
|
||||||
|
|
||||||
# Prepare data to fit the data structure in the CSV file ...
|
|
||||||
# Add the event time as the end of the questionnaire if no stress event occured
|
|
||||||
extracted_ers['se_time'] = extracted_ers['se_time'].fillna(extracted_ers['session_start_timestamp'])
|
|
||||||
# Type could be an int (timestamp [ms]) which stays the same, and datetime str which is converted to timestamp in miliseconds
|
|
||||||
extracted_ers['event_timestamp'] = extracted_ers['se_time'].apply(lambda x: x if isinstance(x, int) else pd.to_datetime(x).timestamp() * 1000).astype('int64')
|
|
||||||
extracted_ers['shift_direction'] = -1
|
|
||||||
|
|
||||||
""">>>>> begin section (could be optimized) <<<<<"""
|
|
||||||
|
|
||||||
# Checks whether the duration is marked with "1 - It's still ongoing" which means that the end of the current questionnaire
|
|
||||||
# is taken as end time of the segment. Else the user input duration is taken.
|
|
||||||
extracted_ers['se_duration'] = \
|
|
||||||
np.where(
|
|
||||||
extracted_ers['se_duration'].astype(str).str.startswith("1 - "),
|
|
||||||
extracted_ers['session_end_timestamp'] - extracted_ers['event_timestamp'],
|
|
||||||
extracted_ers['se_duration']
|
|
||||||
)
|
|
||||||
|
|
||||||
# This converts the rows of timestamps in miliseconds and the rows with datetime... to timestamp in seconds.
|
|
||||||
extracted_ers['se_duration'] = \
|
|
||||||
extracted_ers['se_duration'].apply(lambda x: math.ceil(x / 1000) if isinstance(x, int) else (pd.to_datetime(x).hour * 60 + pd.to_datetime(x).minute) * 60)
|
|
||||||
|
|
||||||
# Check explicitley whether min duration is at least 0. This will eliminate rows that would be investigated after the end of the questionnaire.
|
|
||||||
extracted_ers = extracted_ers[extracted_ers['session_end_timestamp'] - extracted_ers['event_timestamp'] >= 0]
|
|
||||||
# Double check whether min se_duration is at least 0. Filter-out the rest. Negative values are considered invalid.
|
|
||||||
extracted_ers = extracted_ers[extracted_ers["se_duration"] >= 0].reset_index(drop=True)
|
|
||||||
|
|
||||||
""">>>>> end section <<<<<"""
|
|
||||||
|
|
||||||
# Simply override all durations to be of an equal amount
|
|
||||||
extracted_ers['se_duration'] = ioi + 2*ioi_error_tolerance
|
|
||||||
|
|
||||||
# If target is 0 then shift by the total stress event duration, otherwise shift it by ioi_tolerance
|
|
||||||
extracted_ers['shift'] = \
|
|
||||||
np.where(
|
|
||||||
extracted_ers['appraisal_stressfulness_event'] == 0,
|
|
||||||
extracted_ers['se_duration'],
|
|
||||||
ioi_error_tolerance
|
|
||||||
)
|
|
||||||
|
|
||||||
extracted_ers['shift'] = extracted_ers['shift'].apply(lambda x: format_timestamp(int(x)))
|
|
||||||
extracted_ers['length'] = extracted_ers['se_duration'].apply(lambda x: format_timestamp(int(x)))
|
|
||||||
|
|
||||||
# Drop event_timestamp duplicates in case in the user is referencing the same event over multiple questionnaires
|
|
||||||
extracted_ers.drop_duplicates(subset=["event_timestamp"], keep='first', inplace=True)
|
|
||||||
extracted_ers.reset_index(drop=True, inplace=True)
|
|
||||||
|
|
||||||
extracted_ers["label"] = f"straw_event_{segmenting_method}_" + snakemake.params["pid"] + "_" + extracted_ers.index.astype(str).str.zfill(3)
|
|
||||||
|
|
||||||
# Write the csv of extracted ERS labels with targets related to stressfulness event
|
|
||||||
extracted_ers[["label", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"]].to_csv(snakemake.output[1], index=False)
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise Exception("Please select correct target method for the event-related segments.")
|
|
||||||
extracted_ers = pd.DataFrame(columns=["label", "event_timestamp", "length", "shift", "shift_direction", "device_id"])
|
|
||||||
|
|
||||||
return extracted_ers[["label", "event_timestamp", "length", "shift", "shift_direction", "device_id"]]
|
return extracted_ers[["label", "event_timestamp", "length", "shift", "shift_direction", "device_id"]]
|
||||||
|
|
||||||
|
# TODO: potrebno preveriti kako se izvaja iskanje prek device_id -> na tem temelji tudi proces ekstrahiranja ERS
|
||||||
|
|
||||||
"""
|
if snakemake.params["stage"] == "extract": # TODO: najprej preveri ustreznost umeščenosti v RAPIDS pipelineu
|
||||||
Here the code is executed - this .py file is used both for extraction of the STRAW time_segments file for the individual
|
|
||||||
participant, and also for merging all participant's files into one combined file which is later used for the time segments
|
|
||||||
to all sensors assignment.
|
|
||||||
|
|
||||||
There are two files involved (see rules extract_event_information_from_esm and merge_event_related_segments_files in preprocessing.smk)
|
|
||||||
(1) ERS file which contains all the information about the time segment timings and
|
|
||||||
(2) targets file which has corresponding target value for the segment label which is later used to merge with other features in the cleaning script.
|
|
||||||
For more information, see the comment in the method above.
|
|
||||||
"""
|
|
||||||
if snakemake.params["stage"] == "extract":
|
|
||||||
esm_df = pd.read_csv(input_data_files['esm_raw_input'])
|
esm_df = pd.read_csv(input_data_files['esm_raw_input'])
|
||||||
|
|
||||||
extracted_ers = extract_ers(esm_df)
|
with open(input_data_files['pid_file'], 'r') as stream:
|
||||||
|
pid_file = yaml.load(stream, Loader=yaml.FullLoader)
|
||||||
|
|
||||||
|
extracted_ers = extract_ers_from_file(esm_df, pid_file["PHONE"]["DEVICE_IDS"][0])
|
||||||
|
|
||||||
extracted_ers.to_csv(snakemake.output[0], index=False)
|
extracted_ers.to_csv(snakemake.output[0], index=False)
|
||||||
|
|
||||||
elif snakemake.params["stage"] == "merge":
|
elif snakemake.params["stage"] == "merge":
|
||||||
|
|
||||||
input_data_files = dict(snakemake.input)
|
input_data_files = dict(snakemake.input)
|
||||||
straw_events = pd.DataFrame(columns=["label", "event_timestamp", "length", "shift", "shift_direction", "device_id"])
|
straw_events = pd.DataFrame(columns=["label", "event_timestamp", "length", "shift", "shift_direction", "device_id"])
|
||||||
stress_events_targets = pd.DataFrame(columns=["label", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"])
|
|
||||||
|
|
||||||
for input_file in input_data_files["ers_files"]:
|
for input_file in input_data_files["ers_files"]:
|
||||||
ers_df = pd.read_csv(input_file)
|
ers_df = pd.read_csv(input_file)
|
||||||
|
@ -250,11 +84,3 @@ elif snakemake.params["stage"] == "merge":
|
||||||
|
|
||||||
straw_events.to_csv(snakemake.output[0], index=False)
|
straw_events.to_csv(snakemake.output[0], index=False)
|
||||||
|
|
||||||
for input_file in input_data_files["se_files"]:
|
|
||||||
se_df = pd.read_csv(input_file)
|
|
||||||
stress_events_targets = pd.concat([stress_events_targets, se_df], axis=0, ignore_index=True)
|
|
||||||
|
|
||||||
stress_events_targets.to_csv(snakemake.output[1], index=False)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -115,7 +115,7 @@ cluster_on = provider["CLUSTER_ON"]
|
||||||
strategy = provider["INFER_HOME_LOCATION_STRATEGY"]
|
strategy = provider["INFER_HOME_LOCATION_STRATEGY"]
|
||||||
days_threshold = provider["MINIMUM_DAYS_TO_DETECT_HOME_CHANGES"]
|
days_threshold = provider["MINIMUM_DAYS_TO_DETECT_HOME_CHANGES"]
|
||||||
|
|
||||||
if not location_data.timestamp.is_monotonic_increasing:
|
if not location_data.timestamp.is_monotonic:
|
||||||
location_data.sort_values(by=["timestamp"], inplace=True)
|
location_data.sort_values(by=["timestamp"], inplace=True)
|
||||||
|
|
||||||
location_data["duration_in_seconds"] = -1 * location_data.timestamp.diff(-1) / 1000
|
location_data["duration_in_seconds"] = -1 * location_data.timestamp.diff(-1) / 1000
|
||||||
|
|
|
@ -1,30 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
|
|
||||||
def straw_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
|
||||||
speech_data = pd.read_csv(sensor_data_files["sensor_data"])
|
|
||||||
requested_features = provider["FEATURES"]
|
|
||||||
# name of the features this function can compute+
|
|
||||||
base_features_names = ["meanspeech", "stdspeech", "nlargest", "nsmallest", "medianspeech"]
|
|
||||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
|
||||||
speech_features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
|
|
||||||
|
|
||||||
if not speech_data.empty:
|
|
||||||
speech_data = filter_data_by_segment(speech_data, time_segment)
|
|
||||||
|
|
||||||
if not speech_data.empty:
|
|
||||||
speech_features = pd.DataFrame()
|
|
||||||
if "meanspeech" in features_to_compute:
|
|
||||||
speech_features["meanspeech"] = speech_data.groupby(["local_segment"])['speech_proportion'].mean()
|
|
||||||
if "stdspeech" in features_to_compute:
|
|
||||||
speech_features["stdspeech"] = speech_data.groupby(["local_segment"])['speech_proportion'].std()
|
|
||||||
if "nlargest" in features_to_compute:
|
|
||||||
speech_features["nlargest"] = speech_data.groupby(["local_segment"])['speech_proportion'].apply(lambda x: x.nlargest(5).mean())
|
|
||||||
if "nsmallest" in features_to_compute:
|
|
||||||
speech_features["nsmallest"] = speech_data.groupby(["local_segment"])['speech_proportion'].apply(lambda x: x.nsmallest(5).mean())
|
|
||||||
if "medianspeech" in features_to_compute:
|
|
||||||
speech_features["medianspeech"] = speech_data.groupby(["local_segment"])['speech_proportion'].median()
|
|
||||||
|
|
||||||
speech_features = speech_features.reset_index()
|
|
||||||
|
|
||||||
return speech_features
|
|
|
@ -1,6 +1,5 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import sys
|
|
||||||
import warnings
|
|
||||||
|
|
||||||
def retain_target_column(df_input: pd.DataFrame, target_variable_name: str):
|
def retain_target_column(df_input: pd.DataFrame, target_variable_name: str):
|
||||||
column_names = df_input.columns
|
column_names = df_input.columns
|
||||||
|
@ -9,9 +8,9 @@ def retain_target_column(df_input: pd.DataFrame, target_variable_name: str):
|
||||||
esm_names = column_names[esm_names_index]
|
esm_names = column_names[esm_names_index]
|
||||||
target_variable_index = esm_names.str.contains(target_variable_name)
|
target_variable_index = esm_names.str.contains(target_variable_name)
|
||||||
if all(~target_variable_index):
|
if all(~target_variable_index):
|
||||||
warnings.warn(f"The requested target (, {target_variable_name} ,)cannot be found in the dataset. Please check the names of phone_esm_ columns in cleaned python file")
|
raise ValueError("The requested target (", target_variable_name,
|
||||||
return None
|
")cannot be found in the dataset.",
|
||||||
|
"Please check the names of phone_esm_ columns in z_all_sensor_features_cleaned_straw_py.csv")
|
||||||
sensor_features_plus_target = df_input.drop(esm_names, axis=1)
|
sensor_features_plus_target = df_input.drop(esm_names, axis=1)
|
||||||
sensor_features_plus_target["target"] = df_input[esm_names[target_variable_index]]
|
sensor_features_plus_target["target"] = df_input[esm_names[target_variable_index]]
|
||||||
# We will only keep one column related to phone_esm and that will be our target variable.
|
# We will only keep one column related to phone_esm and that will be our target variable.
|
||||||
|
|
|
@ -7,7 +7,4 @@ target_variable_name = snakemake.params["target_variable"]
|
||||||
|
|
||||||
model_input = retain_target_column(cleaned_sensor_features, target_variable_name)
|
model_input = retain_target_column(cleaned_sensor_features, target_variable_name)
|
||||||
|
|
||||||
if model_input is None:
|
model_input.to_csv(snakemake.output[0], index=False)
|
||||||
pd.DataFrame().to_csv(snakemake.output[0])
|
|
||||||
else:
|
|
||||||
model_input.to_csv(snakemake.output[0], index=False)
|
|
||||||
|
|
Loading…
Reference in New Issue