Compare commits

...

24 Commits

Author SHA1 Message Date
junos 63f5a526fc Bring back requested fields in config.yaml.
Update coding files based on 7e565c34db98265afcda922a337493781fdd8ed5 in supermodule.
2023-04-19 11:07:58 +02:00
junos 1cc7339fc8 Completely remove PACKAGE_NAMES_HASHED and instead provide a differently structured file. 2023-04-18 22:58:42 +02:00
junos 5307c71df0 Missing comma. 2023-04-18 22:45:12 +02:00
junos f261286542 Add package_names_hashed param for rule phone_application_categories. 2023-04-18 22:40:11 +02:00
junos a6bc0a90d1 Do not ignore application categories. 2023-04-18 21:34:59 +02:00
junos f161da41f4 Merge branch 'master' into runner 2023-04-18 21:23:26 +02:00
junos 8ffd934fd3 Categorize applications in config.yaml. 2023-04-18 20:39:57 +02:00
junos cf6af7c9a4 Add a TODO. 2023-04-18 16:11:30 +02:00
junos 4dacb7129d Change targets for 30 before.
Further increase resources for acc.
2023-04-18 10:47:54 +02:00
junos f542a97eab Change targets for 90 before. 2023-04-15 16:29:06 +02:00
junos 5cb2dcfb00 Run 90 before event. 2023-04-15 16:18:55 +02:00
junos 8cef60ba87 Limit memory usage by readable_datetime.
Especially important for accelerometer data.
2023-04-14 16:01:44 +02:00
junos 0d634f3622 Remove deprecated numpy dtype. 2023-04-14 13:43:20 +02:00
junos 00e4f8deae More numeric_only arguments.
See 1d903f3629 for explanation.
2023-04-13 13:04:53 +02:00
junos 03687a1ac2 Fix deprecated attribute. 2023-04-12 18:21:43 +02:00
junos a36da99ccb Catch another possible exception. 2023-04-12 16:37:25 +02:00
junos 1d903f3629 Specify numeric_only for pandas.core.groupby.DataFrameGroupBy.mean.
This parameter used to be None by default, but this usage is deprecated since pandas 2.0.
See [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.core.groupby.DataFrameGroupBy.mean.html):

> Changed in version 2.0.0: numeric_only no longer accepts None and defaults to False.
2023-04-12 16:05:58 +02:00
junos d678be0641 Extract definition of function. 2023-04-12 16:00:13 +02:00
junos 27b90421bf Add a missing pip dependency. 2023-04-11 19:06:29 +02:00
junos cb006ed0cf Completely overhaul environment.yml. 2023-04-11 19:04:28 +02:00
junos 9ca58ed204 Fix Python libraries. 2023-04-11 17:21:03 +02:00
junos 982fa982f7 Remove Python libraries versions. 2023-04-11 17:16:42 +02:00
junos f8088172e9 Update more R packages. 2023-04-11 15:31:44 +02:00
junos 801fbe1c10 Update R packages. 2023-04-11 15:26:21 +02:00
15 changed files with 1368 additions and 327 deletions

3
.gitignore vendored
View File

@ -100,6 +100,9 @@ data/external/*
!/data/external/wiki_tz.csv !/data/external/wiki_tz.csv
!/data/external/main_study_usernames.csv !/data/external/main_study_usernames.csv
!/data/external/timezone.csv !/data/external/timezone.csv
!/data/external/play_store_application_genre_catalogue.csv
!/data/external/play_store_categories_count.csv
data/raw/* data/raw/*
!/data/raw/.gitkeep !/data/raw/.gitkeep

View File

@ -26,7 +26,7 @@ TIME_SEGMENTS: &time_segments
INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
TAILORED_EVENTS: # Only relevant if TYPE=EVENT TAILORED_EVENTS: # Only relevant if TYPE=EVENT
COMPUTE: True COMPUTE: True
SEGMENTING_METHOD: "stress_event" # 30_before, 90_before, stress_event SEGMENTING_METHOD: "30_before" # 30_before, 90_before, stress_event
INTERVAL_OF_INTEREST: 10 # duration of event of interest [minutes] INTERVAL_OF_INTEREST: 10 # duration of event of interest [minutes]
IOI_ERROR_TOLERANCE: 5 # interval of interest erorr tolerance (before and after IOI) [minutes] IOI_ERROR_TOLERANCE: 5 # interval of interest erorr tolerance (before and after IOI) [minutes]
@ -104,9 +104,9 @@ PHONE_APPLICATIONS_CRASHES:
CONTAINER: applications_crashes CONTAINER: applications_crashes
APPLICATION_CATEGORIES: APPLICATION_CATEGORIES:
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store) CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv" CATALOGUE_FILE: "data/external/play_store_application_genre_catalogue.csv"
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway SCRAPE_MISSING_CATEGORIES: False # whether to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD PROVIDERS: # None implemented yet but this sensor can be used in PHONE_DATA_YIELD
# See https://www.rapids.science/latest/features/phone-applications-foreground/ # See https://www.rapids.science/latest/features/phone-applications-foreground/
@ -114,24 +114,32 @@ PHONE_APPLICATIONS_FOREGROUND:
CONTAINER: applications CONTAINER: applications
APPLICATION_CATEGORIES: APPLICATION_CATEGORIES:
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store) CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv" CATALOGUE_FILE: "data/external/play_store_application_genre_catalogue.csv"
PACKAGE_NAMES_HASHED: True # Refer to data/external/play_store_categories_count.csv for a list of categories (genres) and their frequency.
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway SCRAPE_MISSING_CATEGORIES: False # whether to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: True COMPUTE: True
INCLUDE_EPISODE_FEATURES: True INCLUDE_EPISODE_FEATURES: True
SINGLE_CATEGORIES: ["all", "email"] SINGLE_CATEGORIES: ["Productivity", "Tools", "Communication", "Education", "Social"]
MULTIPLE_CATEGORIES: MULTIPLE_CATEGORIES:
social: ["socialnetworks", "socialmediatools"] games: ["Puzzle", "Card", "Casual", "Board", "Strategy", "Trivia", "Word", "Adventure", "Role Playing", "Simulation", "Board, Brain Games", "Racing"]
entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"] social: ["Communication", "Social", "Dating"]
productivity: ["Tools", "Productivity", "Finance", "Education", "News & Magazines", "Business", "Books & Reference"]
health: ["Health & Fitness", "Lifestyle", "Food & Drink", "Sports", "Medical", "Parenting"]
entertainment: ["Shopping", "Music & Audio", "Entertainment", "Travel & Local", "Photography", "Video Players & Editors", "Personalization", "House & Home", "Art & Design", "Auto & Vehicles", "Entertainment,Music & Video",
"Puzzle", "Card", "Casual", "Board", "Strategy", "Trivia", "Word", "Adventure", "Role Playing", "Simulation", "Board, Brain Games", "Racing" # Add all games.
]
maps_weather: ["Maps & Navigation", "Weather"]
CUSTOM_CATEGORIES: CUSTOM_CATEGORIES:
social_media: ["com.google.android.youtube", "com.snapchat.android", "com.instagram.android", "com.zhiliaoapp.musically", "com.facebook.katana"] SINGLE_APPS: []
dating: ["com.tinder", "com.relance.happycouple", "com.kiwi.joyride"] EXCLUDED_CATEGORIES: ["System", "STRAW"]
SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps # Note: A special option here is "is_system_app".
EXCLUDED_CATEGORIES: [] # This excludes applications that have is_system_app = TRUE, which is a separate column in the table.
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"] # TODO list system apps? # However, all of these applications have been assigned System category.
# I will therefore filter by that category, which is a superset and is more complete. JL
EXCLUDED_APPS: []
FEATURES: FEATURES:
APP_EVENTS: ["countevent", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] APP_EVENTS: ["countevent", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
APP_EPISODES: ["countepisode", "minduration", "maxduration", "meanduration", "sumduration"] APP_EPISODES: ["countepisode", "minduration", "maxduration", "meanduration", "sumduration"]
@ -745,6 +753,6 @@ PARAMS_FOR_ANALYSIS:
TARGET: TARGET:
COMPUTE: True COMPUTE: True
LABEL: appraisal_stressfulness_event_mean LABEL: appraisal_stressfulness_event_mean
ALL_LABELS: [appraisal_stressfulness_event_mean, appraisal_threat_mean, appraisal_challenge_mean] ALL_LABELS: [PANAS_positive_affect_mean, PANAS_negative_affect_mean, JCQ_job_demand_mean, JCQ_job_control_mean, JCQ_supervisor_support_mean, JCQ_coworker_support_mean, appraisal_stressfulness_period_mean]
# PANAS_positive_affect_mean, PANAS_negative_affect_mean, JCQ_job_demand_mean, JCQ_job_control_mean, JCQ_supervisor_support_mean, # PANAS_positive_affect_mean, PANAS_negative_affect_mean, JCQ_job_demand_mean, JCQ_job_control_mean, JCQ_supervisor_support_mean,
# JCQ_coworker_support_mean, appraisal_stressfulness_period_mean, appraisal_stressfulness_event_mean, appraisal_threat_mean, appraisal_challenge_mean # JCQ_coworker_support_mean, appraisal_stressfulness_period_mean, appraisal_stressfulness_event_mean, appraisal_threat_mean, appraisal_challenge_mean

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,45 @@
genre,n
System,261
Tools,96
Productivity,71
Health & Fitness,60
Finance,54
Communication,39
Music & Audio,39
Shopping,38
Lifestyle,33
Education,28
News & Magazines,24
Maps & Navigation,23
Entertainment,21
Business,18
Travel & Local,18
Books & Reference,16
Social,16
Weather,16
Food & Drink,14
Sports,14
Other,13
Photography,13
Puzzle,13
Video Players & Editors,12
Card,9
Casual,9
Personalization,8
Medical,7
Board,5
Strategy,4
House & Home,3
Trivia,3
Word,3
Adventure,2
Art & Design,2
Auto & Vehicles,2
Dating,2
Role Playing,2
STRAW,2
Simulation,2
"Board,Brain Games",1
"Entertainment,Music & Video",1
Parenting,1
Racing,1
1 genre n
2 System 261
3 Tools 96
4 Productivity 71
5 Health & Fitness 60
6 Finance 54
7 Communication 39
8 Music & Audio 39
9 Shopping 38
10 Lifestyle 33
11 Education 28
12 News & Magazines 24
13 Maps & Navigation 23
14 Entertainment 21
15 Business 18
16 Travel & Local 18
17 Books & Reference 16
18 Social 16
19 Weather 16
20 Food & Drink 14
21 Sports 14
22 Other 13
23 Photography 13
24 Puzzle 13
25 Video Players & Editors 12
26 Card 9
27 Casual 9
28 Personalization 8
29 Medical 7
30 Board 5
31 Strategy 4
32 House & Home 3
33 Trivia 3
34 Word 3
35 Adventure 2
36 Art & Design 2
37 Auto & Vehicles 2
38 Dating 2
39 Role Playing 2
40 STRAW 2
41 Simulation 2
42 Board,Brain Games 1
43 Entertainment,Music & Video 1
44 Parenting 1
45 Racing 1

View File

@ -1,165 +1,30 @@
name: rapids name: rapids
channels: channels:
- conda-forge - conda-forge
- defaults
dependencies: dependencies:
- _libgcc_mutex=0.1 - auto-sklearn
- _openmp_mutex=4.5 - hmmlearn
- _py-xgboost-mutex=2.0 - imbalanced-learn
- appdirs=1.4.4 - jsonschema
- arrow=0.16.0 - lightgbm
- asn1crypto=1.4.0 - matplotlib
- astropy=4.2.1 - numpy
- attrs=20.3.0 - pandas
- binaryornot=0.4.4 - peakutils
- blas=1.0 - pip
- brotlipy=0.7.0 - plotly
- bzip2=1.0.8 - python-dateutil
- ca-certificates=2021.7.5 - pytz
- certifi=2021.5.30 - pywavelets
- cffi=1.14.4 - pyyaml
- chardet=3.0.4 - scikit-learn
- click=7.1.2 - scipy
- colorama=0.4.4 - seaborn
- cookiecutter=1.6.0 - setuptools
- cryptography=3.3.1 - bioconda::snakemake
- datrie=0.8.2 - bioconda::snakemake-minimal
- docutils=0.16 - tqdm
- future=0.18.2 - xgboost
- gitdb=4.0.5 - pip:
- gitdb2=4.0.2 - biosppy
- gitpython=3.1.11 - cr_features>=0.2
- idna=2.10
- imbalanced-learn=0.6.2
- importlib-metadata=2.0.0
- importlib_metadata=2.0.0
- intel-openmp=2019.4
- jinja2=2.11.2
- jinja2-time=0.2.0
- joblib=1.0.0
- jsonschema=3.2.0
- ld_impl_linux-64=2.36.1
- libblas=3.8.0
- libcblas=3.8.0
- libcxx=10.0.0
- libcxxabi=10.0.0
- libedit=3.1.20191231
- libffi=3.3
- libgcc-ng=11.2.0
- libgfortran
- libgfortran
- libgfortran
- liblapack=3.8.0
- libopenblas=0.3.10
- libstdcxx-ng=11.2.0
- libxgboost=0.90
- libzlib=1.2.11
- lightgbm=3.1.1
- llvm-openmp=10.0.0
- markupsafe=1.1.1
- mkl
- mkl-service=2.3.0
- mkl_fft=1.2.0
- mkl_random=1.1.1
- more-itertools=8.6.0
- ncurses=6.2
- numpy=1.19.2
- numpy-base=1.19.2
- openblas=0.3.4
- openssl=1.1.1k
- pandas=1.1.5
- pbr=5.5.1
- pip=20.3.3
- plotly=4.14.1
- poyo=0.5.0
- psutil=5.7.2
- py-xgboost=0.90
- pycparser=2.20
- pyerfa=1.7.1.1
- pyopenssl=20.0.1
- pysocks=1.7.1
- python=3.7.9
- python-dateutil=2.8.1
- python_abi=3.7
- pytz=2020.4
- pyyaml=5.3.1
- readline=8.0
- requests=2.25.0
- retrying=1.3.3
- setuptools=51.0.0
- six=1.15.0
- smmap=3.0.4
- smmap2=3.0.1
- sqlite=3.33.0
- threadpoolctl=2.1.0
- tk=8.6.10
- tqdm=4.62.0
- urllib3=1.25.11
- wheel=0.36.2
- whichcraft=0.6.1
- wrapt=1.12.1
- xgboost=0.90
- xz=5.2.5
- yaml=0.2.5
- zipp=3.4.0
- zlib=1.2.11
- pip:
- amply==0.1.4
- auto-sklearn==0.14.7
- bidict==0.22.0
- biosppy==0.8.0
- build==0.8.0
- cached-property==1.5.2
- cloudpickle==2.2.0
- configargparse==0.15.1
- configspace==0.4.21
- cr-features==0.2.1
- cycler==0.11.0
- cython==0.29.32
- dask==2022.2.0
- decorator==4.4.2
- distributed==2022.2.0
- distro==1.7.0
- emcee==3.1.2
- fonttools==4.33.2
- fsspec==2022.8.2
- h5py==3.6.0
- heapdict==1.0.1
- hmmlearn==0.2.7
- ipython-genutils==0.2.0
- jupyter-core==4.6.3
- kiwisolver==1.4.2
- liac-arff==2.5.0
- locket==1.0.0
- matplotlib==3.5.1
- msgpack==1.0.4
- nbformat==5.0.7
- opencv-python==4.5.5.64
- packaging==21.3
- partd==1.3.0
- peakutils==1.3.3
- pep517==0.13.0
- pillow==9.1.0
- pulp==2.4
- pynisher==0.6.4
- pyparsing==2.4.7
- pyrfr==0.8.3
- pyrsistent==0.15.5
- pywavelets==1.3.0
- ratelimiter==1.2.0.post0
- scikit-learn==0.24.2
- scipy==1.7.3
- seaborn==0.11.2
- shortuuid==1.0.8
- smac==1.2
- snakemake==5.30.2
- sortedcontainers==2.4.0
- tblib==1.7.0
- tomli==2.0.1
- toolz==0.12.0
- toposort==1.5
- tornado==6.2
- traitlets==4.3.3
- typing-extensions==4.2.0
- zict==2.2.0
prefix: /opt/conda/envs/rapids

338
renv.lock
View File

@ -1,6 +1,6 @@
{ {
"R": { "R": {
"Version": "4.1.2", "Version": "4.2.3",
"Repositories": [ "Repositories": [
{ {
"Name": "CRAN", "Name": "CRAN",
@ -46,10 +46,10 @@
}, },
"Hmisc": { "Hmisc": {
"Package": "Hmisc", "Package": "Hmisc",
"Version": "4.4-2", "Version": "5.0-1",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "66458e906b2112a8b1639964efd77d7c" "Hash": "bf9fe82c010a468fb32f913ff56d65e1"
}, },
"KernSmooth": { "KernSmooth": {
"Package": "KernSmooth", "Package": "KernSmooth",
@ -104,7 +104,7 @@
"Package": "RPostgres", "Package": "RPostgres",
"Version": "1.4.4", "Version": "1.4.4",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "RSPM",
"Hash": "c593ecb8dbca9faf3906431be610ca28" "Hash": "c593ecb8dbca9faf3906431be610ca28"
}, },
"Rcpp": { "Rcpp": {
@ -181,7 +181,7 @@
"Package": "base64enc", "Package": "base64enc",
"Version": "0.1-3", "Version": "0.1-3",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "RSPM",
"Hash": "543776ae6848fde2f48ff3816d0628bc" "Hash": "543776ae6848fde2f48ff3816d0628bc"
}, },
"bit": { "bit": {
@ -221,17 +221,24 @@
}, },
"broom": { "broom": {
"Package": "broom", "Package": "broom",
"Version": "0.7.3", "Version": "1.0.4",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "5581a5ddc8fe2ac5e0d092ec2de4c4ae" "Hash": "f62b2504021369a2449c54bbda362d30"
},
"cachem": {
"Package": "cachem",
"Version": "1.0.7",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "cda74447c42f529de601fe4d4050daef"
}, },
"callr": { "callr": {
"Package": "callr", "Package": "callr",
"Version": "3.5.1", "Version": "3.7.3",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "b7d7f1e926dfcd57c74ce93f5c048e80" "Hash": "9b2191ede20fa29828139b9900922e51"
}, },
"caret": { "caret": {
"Package": "caret", "Package": "caret",
@ -263,10 +270,10 @@
}, },
"cli": { "cli": {
"Package": "cli", "Package": "cli",
"Version": "2.2.0", "Version": "3.6.1",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "3ef298932294b775fa0a3eeaa3a645b0" "Hash": "89e6d8219950eac806ae0c489052048a"
}, },
"clipr": { "clipr": {
"Package": "clipr", "Package": "clipr",
@ -286,7 +293,7 @@
"Package": "codetools", "Package": "codetools",
"Version": "0.2-18", "Version": "0.2-18",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "RSPM",
"Hash": "019388fc48e48b3da0d3a76ff94608a8" "Hash": "019388fc48e48b3da0d3a76ff94608a8"
}, },
"colorspace": { "colorspace": {
@ -303,6 +310,13 @@
"Repository": "RSPM", "Repository": "RSPM",
"Hash": "0f22be39ec1d141fd03683c06f3a6e67" "Hash": "0f22be39ec1d141fd03683c06f3a6e67"
}, },
"conflicted": {
"Package": "conflicted",
"Version": "1.2.0",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "bb097fccb22d156624fd07cd2894ddb6"
},
"corpcor": { "corpcor": {
"Package": "corpcor", "Package": "corpcor",
"Version": "1.6.9", "Version": "1.6.9",
@ -319,10 +333,10 @@
}, },
"cpp11": { "cpp11": {
"Package": "cpp11", "Package": "cpp11",
"Version": "0.2.4", "Version": "0.4.3",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "ba66e5a750d39067d888aa7af797fed2" "Hash": "ed588261931ee3be2c700d22e94a29ab"
}, },
"crayon": { "crayon": {
"Package": "crayon", "Package": "crayon",
@ -354,10 +368,10 @@
}, },
"dbplyr": { "dbplyr": {
"Package": "dbplyr", "Package": "dbplyr",
"Version": "2.1.1", "Version": "2.3.2",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Hash": "1f37fa4ab2f5f7eded42f78b9a887182" "Hash": "d24305b92db333726aed162a2c23a147"
}, },
"desc": { "desc": {
"Package": "desc", "Package": "desc",
@ -382,17 +396,17 @@
}, },
"dplyr": { "dplyr": {
"Package": "dplyr", "Package": "dplyr",
"Version": "1.0.5", "Version": "1.1.1",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "d0d76c11ec807eb3f000eba4e3eb0f68" "Hash": "eb5742d256a0d9306d85ea68756d8187"
}, },
"dtplyr": { "dtplyr": {
"Package": "dtplyr", "Package": "dtplyr",
"Version": "1.1.0", "Version": "1.3.1",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "1e14e4c5b2814de5225312394bc316da" "Hash": "54ed3ea01b11e81a86544faaecfef8e2"
}, },
"e1071": { "e1071": {
"Package": "e1071", "Package": "e1071",
@ -419,7 +433,7 @@
"Package": "evaluate", "Package": "evaluate",
"Version": "0.14", "Version": "0.14",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "RSPM",
"Hash": "ec8ca05cffcc70569eaaad8469d2a3a7" "Hash": "ec8ca05cffcc70569eaaad8469d2a3a7"
}, },
"fansi": { "fansi": {
@ -452,10 +466,10 @@
}, },
"forcats": { "forcats": {
"Package": "forcats", "Package": "forcats",
"Version": "0.5.0", "Version": "1.0.0",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "1cb4279e697650f0bd78cd3601ee7576" "Hash": "1a0a9a3d5083d0d573c4214576f1e690"
}, },
"foreach": { "foreach": {
"Package": "foreach", "Package": "foreach",
@ -492,6 +506,13 @@
"Repository": "RSPM", "Repository": "RSPM",
"Hash": "f568ce73d3d59582b0f7babd0eb33d07" "Hash": "f568ce73d3d59582b0f7babd0eb33d07"
}, },
"gargle": {
"Package": "gargle",
"Version": "1.3.0",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "bb3208dcdfeb2e68bf33c87601b3cbe3"
},
"gclus": { "gclus": {
"Package": "gclus", "Package": "gclus",
"Version": "1.3.2", "Version": "1.3.2",
@ -515,10 +536,10 @@
}, },
"ggplot2": { "ggplot2": {
"Package": "ggplot2", "Package": "ggplot2",
"Version": "3.3.2", "Version": "3.4.1",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "4ded8b439797f7b1693bd3d238d0106b" "Hash": "d494daf77c4aa7f084dbbe6ca5dcaca7"
}, },
"ggraph": { "ggraph": {
"Package": "ggraph", "Package": "ggraph",
@ -557,16 +578,30 @@
}, },
"glue": { "glue": {
"Package": "glue", "Package": "glue",
"Version": "1.4.2", "Version": "1.6.2",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "6efd734b14c6471cfe443345f3e35e29" "Hash": "4f2596dfb05dac67b9dc558e5c6fba2e"
},
"googledrive": {
"Package": "googledrive",
"Version": "2.1.0",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "e88ba642951bc8d1898ba0d12581850b"
},
"googlesheets4": {
"Package": "googlesheets4",
"Version": "1.1.0",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "fd7b97bd862a14297b0bb7ed28a3dada"
}, },
"gower": { "gower": {
"Package": "gower", "Package": "gower",
"Version": "0.2.2", "Version": "0.2.2",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "RSPM",
"Hash": "be6a2b3529928bd803d1c437d1d43152" "Hash": "be6a2b3529928bd803d1c437d1d43152"
}, },
"graphlayouts": { "graphlayouts": {
@ -599,10 +634,10 @@
}, },
"haven": { "haven": {
"Package": "haven", "Package": "haven",
"Version": "2.3.1", "Version": "2.5.2",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "221d0ad75dfa03ebf17b1a4cc5c31dfc" "Hash": "8b331e659e67d757db0fcc28e689c501"
}, },
"highr": { "highr": {
"Package": "highr", "Package": "highr",
@ -613,10 +648,10 @@
}, },
"hms": { "hms": {
"Package": "hms", "Package": "hms",
"Version": "1.1.1", "Version": "1.1.3",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Hash": "5b8a2dd0fdbe2ab4f6081e6c7be6dfca" "Hash": "b59377caa7ed00fa41808342002138f9"
}, },
"htmlTable": { "htmlTable": {
"Package": "htmlTable", "Package": "htmlTable",
@ -648,10 +683,10 @@
}, },
"httr": { "httr": {
"Package": "httr", "Package": "httr",
"Version": "1.4.2", "Version": "1.4.5",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "a525aba14184fec243f9eaec62fbed43" "Hash": "f6844033201269bec3ca0097bc6c97b3"
}, },
"huge": { "huge": {
"Package": "huge", "Package": "huge",
@ -660,6 +695,13 @@
"Repository": "RSPM", "Repository": "RSPM",
"Hash": "a4cde4dd1d2551edb99a3273a4ad34ea" "Hash": "a4cde4dd1d2551edb99a3273a4ad34ea"
}, },
"ids": {
"Package": "ids",
"Version": "1.0.1",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "99df65cfef20e525ed38c3d2577f7190"
},
"igraph": { "igraph": {
"Package": "igraph", "Package": "igraph",
"Version": "1.2.6", "Version": "1.2.6",
@ -704,10 +746,10 @@
}, },
"jsonlite": { "jsonlite": {
"Package": "jsonlite", "Package": "jsonlite",
"Version": "1.7.2", "Version": "1.8.4",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "98138e0994d41508c7a6b84a0600cfcb" "Hash": "a4269a09a9b865579b2635c77e572374"
}, },
"knitr": { "knitr": {
"Package": "knitr", "Package": "knitr",
@ -760,10 +802,10 @@
}, },
"lifecycle": { "lifecycle": {
"Package": "lifecycle", "Package": "lifecycle",
"Version": "1.0.0", "Version": "1.0.3",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "3471fb65971f1a7b2d4ae7848cf2db8d" "Hash": "001cecbeac1cff9301bdc3775ee46a86"
}, },
"listenv": { "listenv": {
"Package": "listenv", "Package": "listenv",
@ -774,17 +816,17 @@
}, },
"lubridate": { "lubridate": {
"Package": "lubridate", "Package": "lubridate",
"Version": "1.7.9.2", "Version": "1.9.2",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "5b5b02f621d39a499def7923a5aee746" "Hash": "e25f18436e3efd42c7c590a1c4c15390"
}, },
"magrittr": { "magrittr": {
"Package": "magrittr", "Package": "magrittr",
"Version": "2.0.1", "Version": "2.0.3",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "41287f1ac7d28a92f0a286ed507928d3" "Hash": "7ce2733a9826b3aeb1775d56fd305472"
}, },
"markdown": { "markdown": {
"Package": "markdown", "Package": "markdown",
@ -800,6 +842,13 @@
"Repository": "RSPM", "Repository": "RSPM",
"Hash": "67101e7448dfd9add4ac418623060262" "Hash": "67101e7448dfd9add4ac418623060262"
}, },
"memoise": {
"Package": "memoise",
"Version": "2.0.1",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "e2817ccf4a065c5d9d7f2cfbe7c1d78c"
},
"mgcv": { "mgcv": {
"Package": "mgcv", "Package": "mgcv",
"Version": "1.8-33", "Version": "1.8-33",
@ -830,10 +879,10 @@
}, },
"modelr": { "modelr": {
"Package": "modelr", "Package": "modelr",
"Version": "0.1.8", "Version": "0.1.11",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "9fd59716311ee82cba83dc2826fc5577" "Hash": "4f50122dc256b1b6996a4703fecea821"
}, },
"munsell": { "munsell": {
"Package": "munsell", "Package": "munsell",
@ -888,7 +937,7 @@
"Package": "parallelly", "Package": "parallelly",
"Version": "1.29.0", "Version": "1.29.0",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "RSPM",
"Hash": "b5f399c9ce96977e22ef32c20b6cfe87" "Hash": "b5f399c9ce96977e22ef32c20b6cfe87"
}, },
"pbapply": { "pbapply": {
@ -907,10 +956,10 @@
}, },
"pillar": { "pillar": {
"Package": "pillar", "Package": "pillar",
"Version": "1.4.7", "Version": "1.9.0",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "3b3dd89b2ee115a8b54e93a34cd546b4" "Hash": "15da5a8412f317beeee6175fbc76f4bb"
}, },
"pkgbuild": { "pkgbuild": {
"Package": "pkgbuild", "Package": "pkgbuild",
@ -977,10 +1026,10 @@
}, },
"processx": { "processx": {
"Package": "processx", "Package": "processx",
"Version": "3.4.5", "Version": "3.8.0",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "22aab6098cb14edd0a5973a8438b569b" "Hash": "a33ee2d9bf07564efb888ad98410da84"
}, },
"prodlim": { "prodlim": {
"Package": "prodlim", "Package": "prodlim",
@ -1000,7 +1049,7 @@
"Package": "progressr", "Package": "progressr",
"Version": "0.9.0", "Version": "0.9.0",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "RSPM",
"Hash": "ca0d80ecc29903f7579edbabd91f4199" "Hash": "ca0d80ecc29903f7579edbabd91f4199"
}, },
"promises": { "promises": {
@ -1033,10 +1082,10 @@
}, },
"purrr": { "purrr": {
"Package": "purrr", "Package": "purrr",
"Version": "0.3.4", "Version": "1.0.1",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "97def703420c8ab10d8f0e6c72101e02" "Hash": "d71c815267c640f17ddbf7f16144b4bb"
}, },
"qap": { "qap": {
"Package": "qap", "Package": "qap",
@ -1052,6 +1101,13 @@
"Repository": "RSPM", "Repository": "RSPM",
"Hash": "d35964686307333a7121eb41c7dcd4e0" "Hash": "d35964686307333a7121eb41c7dcd4e0"
}, },
"ragg": {
"Package": "ragg",
"Version": "1.2.5",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "690bc058ea2b1b8a407d3cfe3dce3ef9"
},
"rappdirs": { "rappdirs": {
"Package": "rappdirs", "Package": "rappdirs",
"Version": "0.3.3", "Version": "0.3.3",
@ -1061,17 +1117,17 @@
}, },
"readr": { "readr": {
"Package": "readr", "Package": "readr",
"Version": "1.4.0", "Version": "2.1.4",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "2639976851f71f330264a9c9c3d43a61" "Hash": "b5047343b3825f37ad9d3b5d89aa1078"
}, },
"readxl": { "readxl": {
"Package": "readxl", "Package": "readxl",
"Version": "1.3.1", "Version": "1.4.2",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "63537c483c2dbec8d9e3183b3735254a" "Hash": "2e6020b1399d95f947ed867045e9ca17"
}, },
"recipes": { "recipes": {
"Package": "recipes", "Package": "recipes",
@ -1110,10 +1166,10 @@
}, },
"reprex": { "reprex": {
"Package": "reprex", "Package": "reprex",
"Version": "0.3.0", "Version": "2.0.2",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "b06bfb3504cc8a4579fd5567646f745b" "Hash": "d66fe009d4c20b7ab1927eb405db9ee2"
}, },
"reshape2": { "reshape2": {
"Package": "reshape2", "Package": "reshape2",
@ -1138,10 +1194,10 @@
}, },
"rlang": { "rlang": {
"Package": "rlang", "Package": "rlang",
"Version": "0.4.10", "Version": "1.1.0",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "599df23c40a4fce9c7b4764f28c37857" "Hash": "dc079ccd156cde8647360f473c1fa718"
}, },
"rmarkdown": { "rmarkdown": {
"Package": "rmarkdown", "Package": "rmarkdown",
@ -1173,24 +1229,24 @@
}, },
"rstudioapi": { "rstudioapi": {
"Package": "rstudioapi", "Package": "rstudioapi",
"Version": "0.13", "Version": "0.14",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "06c85365a03fdaf699966cc1d3cf53ea" "Hash": "690bd2acc42a9166ce34845884459320"
}, },
"rvest": { "rvest": {
"Package": "rvest", "Package": "rvest",
"Version": "0.3.6", "Version": "1.0.3",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "a9795ccb2d608330e841998b67156764" "Hash": "a4a5ac819a467808c60e36e92ddf195e"
}, },
"scales": { "scales": {
"Package": "scales", "Package": "scales",
"Version": "1.1.1", "Version": "1.2.1",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "6f76f71042411426ec8df6c54f34e6dd" "Hash": "906cb23d2f1c5680b8ce439b44c6fa63"
}, },
"selectr": { "selectr": {
"Package": "selectr", "Package": "selectr",
@ -1236,17 +1292,17 @@
}, },
"stringi": { "stringi": {
"Package": "stringi", "Package": "stringi",
"Version": "1.5.3", "Version": "1.7.12",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "a063ebea753c92910a4cca7b18bc1f05" "Hash": "ca8bd84263c77310739d2cf64d84d7c9"
}, },
"stringr": { "stringr": {
"Package": "stringr", "Package": "stringr",
"Version": "1.4.0", "Version": "1.5.0",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Hash": "0759e6b6c0957edb1311028a49a35e76" "Hash": "671a4d384ae9d32fc47a14e98bfa3dc8"
}, },
"survival": { "survival": {
"Package": "survival", "Package": "survival",
@ -1262,6 +1318,13 @@
"Repository": "RSPM", "Repository": "RSPM",
"Hash": "b227d13e29222b4574486cfcbde077fa" "Hash": "b227d13e29222b4574486cfcbde077fa"
}, },
"systemfonts": {
"Package": "systemfonts",
"Version": "1.0.4",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "90b28393209827327de889f49935140a"
},
"testthat": { "testthat": {
"Package": "testthat", "Package": "testthat",
"Version": "3.0.1", "Version": "3.0.1",
@ -1269,12 +1332,19 @@
"Repository": "RSPM", "Repository": "RSPM",
"Hash": "17826764cb92d8b5aae6619896e5a161" "Hash": "17826764cb92d8b5aae6619896e5a161"
}, },
"textshaping": {
"Package": "textshaping",
"Version": "0.3.6",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "1ab6223d3670fac7143202cb6a2d43d5"
},
"tibble": { "tibble": {
"Package": "tibble", "Package": "tibble",
"Version": "3.0.4", "Version": "3.2.1",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "71dffd8544691c520dd8e41ed2d7e070" "Hash": "a84e2cc86d07289b3b6f5069df7a004c"
}, },
"tidygraph": { "tidygraph": {
"Package": "tidygraph", "Package": "tidygraph",
@ -1285,24 +1355,24 @@
}, },
"tidyr": { "tidyr": {
"Package": "tidyr", "Package": "tidyr",
"Version": "1.1.2", "Version": "1.3.0",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "c40b2d5824d829190f4b825f4496dfae" "Hash": "e47debdc7ce599b070c8e78e8ac0cfcf"
}, },
"tidyselect": { "tidyselect": {
"Package": "tidyselect", "Package": "tidyselect",
"Version": "1.1.0", "Version": "1.2.0",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "6ea435c354e8448819627cf686f66e0a" "Hash": "79540e5fcd9e0435af547d885f184fd5"
}, },
"tidyverse": { "tidyverse": {
"Package": "tidyverse", "Package": "tidyverse",
"Version": "1.3.0", "Version": "2.0.0",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "bd51be662f359fa99021f3d51e911490" "Hash": "c328568cd14ea89a83bd4ca7f54ae07e"
}, },
"timeDate": { "timeDate": {
"Package": "timeDate", "Package": "timeDate",
@ -1311,6 +1381,13 @@
"Repository": "RSPM", "Repository": "RSPM",
"Hash": "fde4fc571f5f61978652c229d4713845" "Hash": "fde4fc571f5f61978652c229d4713845"
}, },
"timechange": {
"Package": "timechange",
"Version": "0.2.0",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "8548b44f79a35ba1791308b61e6012d7"
},
"tinytex": { "tinytex": {
"Package": "tinytex", "Package": "tinytex",
"Version": "0.28", "Version": "0.28",
@ -1332,6 +1409,13 @@
"Repository": "RSPM", "Repository": "RSPM",
"Hash": "fc77eb5297507cccfa3349a606061030" "Hash": "fc77eb5297507cccfa3349a606061030"
}, },
"tzdb": {
"Package": "tzdb",
"Version": "0.3.0",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "b2e1cbce7c903eaf23ec05c58e59fb5e"
},
"utf8": { "utf8": {
"Package": "utf8", "Package": "utf8",
"Version": "1.1.4", "Version": "1.1.4",
@ -1339,12 +1423,19 @@
"Repository": "RSPM", "Repository": "RSPM",
"Hash": "4a5081acfb7b81a572e4384a7aaf2af1" "Hash": "4a5081acfb7b81a572e4384a7aaf2af1"
}, },
"vctrs": { "uuid": {
"Package": "vctrs", "Package": "uuid",
"Version": "0.3.8", "Version": "1.1-0",
"Source": "Repository", "Source": "Repository",
"Repository": "CRAN", "Repository": "CRAN",
"Hash": "ecf749a1b39ea72bd9b51b76292261f1" "Hash": "f1cb46c157d080b729159d407be83496"
},
"vctrs": {
"Package": "vctrs",
"Version": "0.6.1",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "06eceb3a5d716fd0654cc23ca3d71a99"
}, },
"viridis": { "viridis": {
"Package": "viridis", "Package": "viridis",
@ -1360,6 +1451,13 @@
"Repository": "RSPM", "Repository": "RSPM",
"Hash": "ce4f6271baa94776db692f1cb2055bee" "Hash": "ce4f6271baa94776db692f1cb2055bee"
}, },
"vroom": {
"Package": "vroom",
"Version": "1.6.1",
"Source": "Repository",
"Repository": "CRAN",
"Hash": "7015a74373b83ffaef64023f4a0f5033"
},
"waldo": { "waldo": {
"Package": "waldo", "Package": "waldo",
"Version": "0.2.3", "Version": "0.2.3",
@ -1376,10 +1474,10 @@
}, },
"withr": { "withr": {
"Package": "withr", "Package": "withr",
"Version": "2.3.0", "Version": "2.5.0",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "7307d79f58d1885b38c4f4f1a8cb19dd" "Hash": "c0e49a9760983e81e55cdd9be92e7182"
}, },
"xfun": { "xfun": {
"Package": "xfun", "Package": "xfun",
@ -1390,10 +1488,10 @@
}, },
"xml2": { "xml2": {
"Package": "xml2", "Package": "xml2",
"Version": "1.3.2", "Version": "1.3.3",
"Source": "Repository", "Source": "Repository",
"Repository": "RSPM", "Repository": "CRAN",
"Hash": "d4d71a75dd3ea9eb5fa28cc21f9585e2" "Hash": "40682ed6a969ea5abfd351eb67833adc"
}, },
"xtable": { "xtable": {
"Package": "xtable", "Package": "xtable",

View File

@ -247,6 +247,8 @@ rule empatica_readable_datetime:
include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"] include_past_periodic_segments = config["TIME_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
output: output:
"data/raw/{pid}/empatica_{sensor}_with_datetime.csv" "data/raw/{pid}/empatica_{sensor}_with_datetime.csv"
resources:
mem_mb=50000
script: script:
"../src/data/datetime/readable_datetime.R" "../src/data/datetime/readable_datetime.R"

View File

@ -29,23 +29,16 @@ get_genre <- function(apps){
apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F) apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F)
genre_catalogue <- data.frame() genre_catalogue <- data.frame()
catalogue_source <- snakemake@params[["catalogue_source"]] catalogue_source <- snakemake@params[["catalogue_source"]]
package_names_hashed <- snakemake@params[["package_names_hashed"]]
update_catalogue_file <- snakemake@params[["update_catalogue_file"]] update_catalogue_file <- snakemake@params[["update_catalogue_file"]]
scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]] scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]]
apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre")))) apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre"))))
if (length(package_names_hashed) == 0) {package_names_hashed <- FALSE}
if(nrow(apps) > 0){ if(nrow(apps) > 0){
if(catalogue_source == "GOOGLE"){ if(catalogue_source == "GOOGLE"){
apps_with_genre <- apps %>% mutate(genre = NA_character_) apps_with_genre <- apps %>% mutate(genre = NA_character_)
} else if(catalogue_source == "FILE"){ } else if(catalogue_source == "FILE"){
genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character")) genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character"))
if (package_names_hashed) { apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_hash")
} else {
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
}
} }
if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){ if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){

View File

@ -136,8 +136,9 @@ def patch_ibi_with_bvp(ibi_data, bvp_data):
# Begin with the cr-features part # Begin with the cr-features part
try: try:
ibi_data, ibi_start_timestamp = empatica2d_to_array(ibi_data_file) ibi_data, ibi_start_timestamp = empatica2d_to_array(ibi_data_file)
except IndexError as e: except (IndexError, KeyError) as e:
# Checks whether IBI.csv is empty # Checks whether IBI.csv is empty
# It may raise a KeyError if df is empty here: startTimeStamp = df.time[0]
df_test = pd.read_csv(ibi_data_file, names=['timings', 'inter_beat_interval'], header=None) df_test = pd.read_csv(ibi_data_file, names=['timings', 'inter_beat_interval'], header=None)
if df_test.empty: if df_test.empty:
df_test['timestamp'] = df_test['timings'] df_test['timestamp'] = df_test['timings']

View File

@ -120,7 +120,7 @@ def straw_cleaning(sensor_data_files, provider):
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')]
if provider["COLS_VAR_THRESHOLD"]: if provider["COLS_VAR_THRESHOLD"]:
features.drop(features.std()[features.std() == 0].index.values, axis=1, inplace=True) features.drop(features.std(numeric_only=True)[features.std(numeric_only=True) == 0].index.values, axis=1, inplace=True)
fe5 = features.copy() fe5 = features.copy()
@ -134,7 +134,7 @@ def straw_cleaning(sensor_data_files, provider):
valid_features = features[numerical_cols].loc[:, features[numerical_cols].isna().sum() < drop_corr_features['MIN_OVERLAP_FOR_CORR_THRESHOLD'] * features[numerical_cols].shape[0]] valid_features = features[numerical_cols].loc[:, features[numerical_cols].isna().sum() < drop_corr_features['MIN_OVERLAP_FOR_CORR_THRESHOLD'] * features[numerical_cols].shape[0]]
corr_matrix = valid_features.corr().abs() corr_matrix = valid_features.corr().abs()
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool)) upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper.columns if any(upper[column] > drop_corr_features["CORR_THRESHOLD"])] to_drop = [column for column in upper.columns if any(upper[column] > drop_corr_features["CORR_THRESHOLD"])]
features.drop(to_drop, axis=1, inplace=True) features.drop(to_drop, axis=1, inplace=True)
@ -150,12 +150,14 @@ def straw_cleaning(sensor_data_files, provider):
return features return features
def k_nearest(df):
pd.set_option('display.max_columns', None)
imputer = KNNImputer(n_neighbors=3)
return pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
def impute(df, method='zero'): def impute(df, method='zero'):
def k_nearest(df):
pd.set_option('display.max_columns', None)
imputer = KNNImputer(n_neighbors=3)
return pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
return { return {
'zero': df.fillna(0), 'zero': df.fillna(0),
@ -165,6 +167,7 @@ def impute(df, method='zero'):
'knn': k_nearest(df) 'knn': k_nearest(df)
}[method] }[method]
def graph_bf_af(features, phase_name, plt_flag=False): def graph_bf_af(features, phase_name, plt_flag=False):
if plt_flag: if plt_flag:
sns.set(rc={"figure.figsize":(16, 8)}) sns.set(rc={"figure.figsize":(16, 8)})

View File

@ -146,7 +146,7 @@ def straw_cleaning(sensor_data_files, provider, target):
# (5) REMOVE COLS WHERE VARIANCE IS 0 # (5) REMOVE COLS WHERE VARIANCE IS 0
if provider["COLS_VAR_THRESHOLD"]: if provider["COLS_VAR_THRESHOLD"]:
features.drop(features.std()[features.std() == 0].index.values, axis=1, inplace=True) features.drop(features.std(numeric_only=True)[features.std(numeric_only=True) == 0].index.values, axis=1, inplace=True)
graph_bf_af(features, "6variance_drop") graph_bf_af(features, "6variance_drop")
@ -200,7 +200,7 @@ def straw_cleaning(sensor_data_files, provider, target):
valid_features = features[numerical_cols].loc[:, features[numerical_cols].isna().sum() < drop_corr_features['MIN_OVERLAP_FOR_CORR_THRESHOLD'] * features[numerical_cols].shape[0]] valid_features = features[numerical_cols].loc[:, features[numerical_cols].isna().sum() < drop_corr_features['MIN_OVERLAP_FOR_CORR_THRESHOLD'] * features[numerical_cols].shape[0]]
corr_matrix = valid_features.corr().abs() corr_matrix = valid_features.corr().abs()
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool)) upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper.columns if any(upper[column] > drop_corr_features["CORR_THRESHOLD"])] to_drop = [column for column in upper.columns if any(upper[column] > drop_corr_features["CORR_THRESHOLD"])]
# sns.heatmap(corr_matrix, cmap="YlGnBu") # sns.heatmap(corr_matrix, cmap="YlGnBu")
@ -245,11 +245,13 @@ def straw_cleaning(sensor_data_files, provider, target):
return features return features
def k_nearest(df):
imputer = KNNImputer(n_neighbors=3)
return pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
def impute(df, method='zero'): def impute(df, method='zero'):
def k_nearest(df):
imputer = KNNImputer(n_neighbors=3)
return pd.DataFrame(imputer.fit_transform(df), columns=df.columns)
return { return {
'zero': df.fillna(0), 'zero': df.fillna(0),
@ -259,6 +261,7 @@ def impute(df, method='zero'):
'knn': k_nearest(df) 'knn': k_nearest(df)
}[method] }[method]
def graph_bf_af(features, phase_name, plt_flag=False): def graph_bf_af(features, phase_name, plt_flag=False):
if plt_flag: if plt_flag:
sns.set(rc={"figure.figsize":(16, 8)}) sns.set(rc={"figure.figsize":(16, 8)})

View File

@ -15,13 +15,13 @@ def extract_second_order_features(intraday_features, so_features_names, prefix="
so_features = pd.DataFrame() so_features = pd.DataFrame()
#print(intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).nsmallest()) #print(intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).nsmallest())
if "mean" in so_features_names: if "mean" in so_features_names:
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).mean().add_suffix("_SO_mean")], axis=1) so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).mean(numeric_only=True).add_suffix("_SO_mean")], axis=1)
if "median" in so_features_names: if "median" in so_features_names:
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).median().add_suffix("_SO_median")], axis=1) so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).median(numeric_only=True).add_suffix("_SO_median")], axis=1)
if "sd" in so_features_names: if "sd" in so_features_names:
so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).std().fillna(0).add_suffix("_SO_sd")], axis=1) so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).std(numeric_only=True).fillna(0).add_suffix("_SO_sd")], axis=1)
if "nlargest" in so_features_names: # largest 5 -- maybe there is a faster groupby solution? if "nlargest" in so_features_names: # largest 5 -- maybe there is a faster groupby solution?
for column in intraday_features.loc[:, ~intraday_features.columns.isin(groupby_cols+[prefix+"level_1"])]: for column in intraday_features.loc[:, ~intraday_features.columns.isin(groupby_cols+[prefix+"level_1"])]:

View File

@ -26,7 +26,7 @@ def calculate_empatica_data_yield(features): # TODO
# Assigns 1 to values that are over 1 (in case of windows not being filled fully) # Assigns 1 to values that are over 1 (in case of windows not being filled fully)
features[empatica_data_yield_cols] = features[empatica_data_yield_cols].apply(lambda x: [y if y <= 1 or np.isnan(y) else 1 for y in x]) features[empatica_data_yield_cols] = features[empatica_data_yield_cols].apply(lambda x: [y if y <= 1 or np.isnan(y) else 1 for y in x])
features["empatica_data_yield"] = features[empatica_data_yield_cols].mean(axis=1).fillna(0) features["empatica_data_yield"] = features[empatica_data_yield_cols].mean(axis=1, numeric_only=True).fillna(0)
features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average) features.drop(empatica_data_yield_cols, axis=1, inplace=True) # In case of if the advanced operations will later not be needed (e.g., weighted average)
return features return features

View File

@ -140,8 +140,8 @@ def extract_ers(esm_df):
# Extracted 3 targets that will be transfered in the csv file to the cleaning script. # Extracted 3 targets that will be transfered in the csv file to the cleaning script.
se_stressfulness_event_tg = esm_df[esm_df.questionnaire_id == 87.].set_index(['device_id', 'esm_session'])['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_stressfulness_event'}) se_stressfulness_event_tg = esm_df[esm_df.questionnaire_id == 87.].set_index(['device_id', 'esm_session'])['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_stressfulness_event'})
se_threat_tg = esm_df[esm_df.questionnaire_id == 88.].groupby(["device_id", "esm_session"]).mean()['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_threat'}) se_threat_tg = esm_df[esm_df.questionnaire_id == 88.].groupby(["device_id", "esm_session"]).mean(numeric_only=True)['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_threat'})
se_challenge_tg = esm_df[esm_df.questionnaire_id == 89.].groupby(["device_id", "esm_session"]).mean()['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_challenge'}) se_challenge_tg = esm_df[esm_df.questionnaire_id == 89.].groupby(["device_id", "esm_session"]).mean(numeric_only=True)['esm_user_answer_numeric'].to_frame().rename(columns={'esm_user_answer_numeric': 'appraisal_challenge'})
# All relevant features are joined by inner join to remove standalone columns (e.g., stressfulness event target has larger count) # All relevant features are joined by inner join to remove standalone columns (e.g., stressfulness event target has larger count)
extracted_ers = extracted_ers.join(session_start_timestamp, on=['device_id', 'esm_session'], how='inner') \ extracted_ers = extracted_ers.join(session_start_timestamp, on=['device_id', 'esm_session'], how='inner') \

View File

@ -115,7 +115,7 @@ cluster_on = provider["CLUSTER_ON"]
strategy = provider["INFER_HOME_LOCATION_STRATEGY"] strategy = provider["INFER_HOME_LOCATION_STRATEGY"]
days_threshold = provider["MINIMUM_DAYS_TO_DETECT_HOME_CHANGES"] days_threshold = provider["MINIMUM_DAYS_TO_DETECT_HOME_CHANGES"]
if not location_data.timestamp.is_monotonic: if not location_data.timestamp.is_monotonic_increasing:
location_data.sort_values(by=["timestamp"], inplace=True) location_data.sort_values(by=["timestamp"], inplace=True)
location_data["duration_in_seconds"] = -1 * location_data.timestamp.diff(-1) / 1000 location_data["duration_in_seconds"] = -1 * location_data.timestamp.diff(-1) / 1000