diff --git a/.gitignore b/.gitignore index f2a2a4c6..eb1b7009 100644 --- a/.gitignore +++ b/.gitignore @@ -93,6 +93,7 @@ packrat/* # exclude data from source control by default data/external/* +!/data/external/empatica/empatica1/E4 Data.zip !/data/external/.gitkeep !/data/external/stachl_application_genre_catalogue.csv !/data/external/timesegments*.csv @@ -113,4 +114,10 @@ sn_profile_*/ settings.dcf tests/fakedata_generation/ site/ -credentials.yaml +!credentials.yaml + +# Docker container and other files +.devcontainer + +# Calculating features module +calculatingfeatures/ diff --git a/README.md b/README.md index 8c33f4c6..9524b94f 100644 --- a/README.md +++ b/README.md @@ -11,3 +11,22 @@ For more information refer to our [documentation](http://www.rapids.science) By [MoSHI](https://www.moshi.pitt.edu/), [University of Pittsburgh](https://www.pitt.edu/) + +## Installation + +For RAPIDS installation refer to to the [documentation](https://www.rapids.science/1.8/setup/installation/) + +## CalculatingFeatures + +This RAPIDS extension uses CalculatingFeatures library accessible [here](https://repo.ijs.si/matjazbostic/calculatingfeatures). + +To use CalculatingFeatures library: +- Follow the installation instructions in the [README.md](https://repo.ijs.si/matjazbostic/calculatingfeatures/-/blob/master/README.md). + +- Copy built calculatingfeatures folder into the RAPIDS workspace. + +- Install the CalculatingFeatures package by: + ``` + pip install "path/to/the/calculatingfeatures/folder" + CalculatingFeatures package has to be built and installed everytime to get the newest version. + ``` \ No newline at end of file diff --git a/Snakefile b/Snakefile index 94b6555b..4241aa19 100644 --- a/Snakefile +++ b/Snakefile @@ -5,7 +5,6 @@ include: "rules/common.smk" include: "rules/renv.smk" include: "rules/preprocessing.smk" include: "rules/features.smk" -include: "rules/models.smk" include: "rules/reports.smk" import itertools @@ -328,6 +327,8 @@ for provider in config["EMPATICA_ACCELEROMETER"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/empatica_accelerometer.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/interim/{pid}/empatica_accelerometer_features/z_empatica_accelerometer_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) for provider in config["EMPATICA_HEARTRATE"]["PROVIDERS"].keys(): if config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]: @@ -347,6 +348,8 @@ for provider in config["EMPATICA_TEMPERATURE"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/empatica_temperature.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/interim/{pid}/empatica_temperature_features/z_empatica_temperature_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys(): if config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"][provider]["COMPUTE"]: @@ -356,6 +359,8 @@ for provider in config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/empatica_electrodermal_activity.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/interim/{pid}/empatica_electrodermal_activity_features/z_empatica_electrodermal_activity_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys(): if config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"][provider]["COMPUTE"]: @@ -365,6 +370,9 @@ for provider in config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/empatica_blood_volume_pulse.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/interim/{pid}/empatica_blood_volume_pulse_features/z_empatica_blood_volume_pulse_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) + for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys(): if config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"][provider]["COMPUTE"]: @@ -374,6 +382,8 @@ for provider in config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"].keys(): files_to_compute.extend(expand("data/processed/features/{pid}/empatica_inter_beat_interval.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") + if provider in config["STANDARDIZATION"]["PROVIDERS"] and config["STANDARDIZATION"]["PROVIDERS"][provider]["COMPUTE"]: + files_to_compute.extend(expand("data/interim/{pid}/empatica_inter_beat_interval_features/z_empatica_inter_beat_interval_{language}_{provider_key}_windows.csv", pid=config["PIDS"], language=get_script_language(config["STANDARDIZATION"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) if isinstance(config["EMPATICA_TAGS"]["PROVIDERS"], dict): for provider in config["EMPATICA_TAGS"]["PROVIDERS"].keys(): @@ -426,6 +436,9 @@ if config["PARAMS_FOR_ANALYSIS"]["TARGET"]["COMPUTE"]: files_to_compute.extend(expand("data/processed/models/population_model/input.csv")) #files_to_compute.extend(expand("data/processed/models/individual_model/{pid}/output_{cv_method}/baselines.csv", pid=config["PIDS"], cv_method=config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"])) +# Put the for loop over STANDARDIZATION providers if all are COMPUTE == True +# then merge all that are set to True in z_all_sensors for all and each participant +# See the logic behind: in each sensor the "data/processed/features/all_participants/all_sensor_features.csv" is listed rule all: input: diff --git a/__init__.py b/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/config.yaml b/config.yaml index f9fc26c9..e3498510 100644 --- a/config.yaml +++ b/config.yaml @@ -3,36 +3,34 @@ ######################################################################################################################## # See https://www.rapids.science/latest/setup/configuration/#participant-files -PIDS: ['p031', 'p032', 'p033', 'p034', 'p035', 'p036', 'p037', 'p038', 'p039', 'p040', 'p042', 'p043', 'p044', 'p045', 'p046', 'p049', 'p050', 'p052', 'p053', 'p054', 'p055', 'p057', 'p058', 'p059', 'p060', 'p061', 'p062', 'p064', 'p067', 'p068', 'p069', 'p070', 'p071', 'p072', 'p073', 'p074', 'p075', 'p076', 'p077', 'p078', 'p079', 'p080', 'p081', 'p082', 'p083', 'p084', 'p085', 'p086', 'p088', 'p089', 'p090', 'p091', 'p092', 'p093', 'p106', 'p107'] +PIDS: [p031] #p01, p02, p03] # See https://www.rapids.science/latest/setup/configuration/#automatic-creation-of-participant-files CREATE_PARTICIPANT_FILES: - USERNAMES_CSV: "data/external/main_study_usernames.csv" - CSV_FILE_PATH: "data/external/main_study_participants.csv" # see docs for required format + CSV_FILE_PATH: "data/external/example_participants.csv" # see docs for required format PHONE_SECTION: ADD: True IGNORED_DEVICE_IDS: [] FITBIT_SECTION: - ADD: False + ADD: True IGNORED_DEVICE_IDS: [] EMPATICA_SECTION: - ADD: False + ADD: True IGNORED_DEVICE_IDS: [] # See https://www.rapids.science/latest/setup/configuration/#time-segments TIME_SEGMENTS: &time_segments TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT - FILE: "data/external/timesegments_daily.csv" - INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs + FILE: "data/external/timesegments_periodic.csv" + INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs # See https://www.rapids.science/latest/setup/configuration/#timezone-of-your-study TIMEZONE: - TYPE: MULTIPLE + TYPE: SINGLE SINGLE: TZCODE: Europe/Ljubljana MULTIPLE: - TZ_FILE: data/external/timezone.csv - TZCODES_FILE: data/external/multiple_timezones.csv + TZCODES_FILE: data/external/multiple_timezones_example.csv IF_MISSING_TZCODE: USE_DEFAULT DEFAULT_TZCODE: Europe/Ljubljana FITBIT: @@ -87,7 +85,7 @@ PHONE_ACTIVITY_RECOGNITION: EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same AR episode. PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"] ACTIVITY_CLASSES: STATIONARY: ["still", "tilting"] @@ -116,7 +114,7 @@ PHONE_APPLICATIONS_FOREGROUND: SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False INCLUDE_EPISODE_FEATURES: True SINGLE_CATEGORIES: ["all", "email"] MULTIPLE_CATEGORIES: @@ -151,7 +149,7 @@ PHONE_BATTERY: EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode. PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"] SRC_SCRIPT: src/features/phone_battery/rapids/main.py @@ -160,12 +158,12 @@ PHONE_BLUETOOTH: CONTAINER: bluetooth PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] SRC_SCRIPT: src/features/phone_bluetooth/rapids/main.R DORYAB: - COMPUTE: True + COMPUTE: False FEATURES: ALL: DEVICES: ["countscans", "uniquedevices", "meanscans", "stdscans"] @@ -186,7 +184,7 @@ PHONE_CALLS: CONTAINER: call PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False FEATURES_TYPE: EPISODES # EVENTS or EPISODES CALL_TYPES: [missed, incoming, outgoing] FEATURES: @@ -229,7 +227,7 @@ PHONE_DATA_YIELD: PHONE_WIFI_VISIBLE] PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours] MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1, minimum percentage of valid minutes in an hour to be considered valid. SRC_SCRIPT: src/features/phone_data_yield/rapids/main.R @@ -257,7 +255,7 @@ PHONE_LIGHT: CONTAINER: light_sensor PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"] SRC_SCRIPT: src/features/phone_light/rapids/main.py @@ -271,7 +269,7 @@ PHONE_LOCATIONS: PROVIDERS: DORYAB: - COMPUTE: True + COMPUTE: False FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome", "homelabel"] DBSCAN_EPS: 100 # meters DBSCAN_MINSAMPLES: 5 @@ -286,7 +284,7 @@ PHONE_LOCATIONS: SRC_SCRIPT: src/features/phone_locations/doryab/main.py BARNETT: - COMPUTE: True + COMPUTE: False FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"] IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features @@ -304,7 +302,7 @@ PHONE_MESSAGES: CONTAINER: sms PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False MESSAGES_TYPES : [received, sent] FEATURES: received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact] @@ -316,7 +314,7 @@ PHONE_SCREEN: CONTAINER: screen PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False REFERENCE_HOUR_FIRST_USE: 0 IGNORE_EPISODES_SHORTER_THAN: 0 # in minutes, set to 0 to disable IGNORE_EPISODES_LONGER_THAN: 360 # in minutes, set to 0 to disable @@ -338,12 +336,13 @@ PHONE_WIFI_VISIBLE: CONTAINER: wifi PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"] SRC_SCRIPT: src/features/phone_wifi_visible/rapids/main.R + ######################################################################################################################## # FITBIT # ######################################################################################################################## @@ -506,6 +505,16 @@ EMPATICA_ACCELEROMETER: COMPUTE: False FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py + CR: + COMPUTE: False + FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features + WINDOWS: + COMPUTE: True + WINDOW_LENGTH: 15 # specify window length in seconds + SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest', 'nsmallest', 'count_windows'] + STANDARDIZE_FEATURES: True + SRC_SCRIPT: src/features/empatica_accelerometer/cr/main.py + # See https://www.rapids.science/latest/features/empatica-heartrate/ EMPATICA_HEARTRATE: @@ -524,6 +533,16 @@ EMPATICA_TEMPERATURE: COMPUTE: False FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"] SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py + CR: + COMPUTE: False + FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean", + "stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"] + WINDOWS: + COMPUTE: True + WINDOW_LENGTH: 300 # specify window length in seconds + SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest', 'nsmallest', 'count_windows'] + STANDARDIZE_FEATURES: True + SRC_SCRIPT: src/features/empatica_temperature/cr/main.py # See https://www.rapids.science/latest/features/empatica-electrodermal-activity/ EMPATICA_ELECTRODERMAL_ACTIVITY: @@ -533,6 +552,20 @@ EMPATICA_ELECTRODERMAL_ACTIVITY: COMPUTE: False FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"] SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py + CR: + COMPUTE: True + FEATURES: ['mean', 'std', 'q25', 'q75', 'qd', 'deriv', 'power', 'numPeaks', 'ratePeaks', 'powerPeaks', 'sumPosDeriv', 'propPosDeriv', 'derivTonic', + 'sigTonicDifference', 'freqFeats','maxPeakAmplitudeChangeBefore', 'maxPeakAmplitudeChangeAfter', 'avgPeakAmplitudeChangeBefore', + 'avgPeakAmplitudeChangeAfter', 'avgPeakChangeRatio', 'maxPeakIncreaseTime', 'maxPeakDecreaseTime', 'maxPeakDuration', 'maxPeakChangeRatio', + 'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease', + 'significantDecrease'] + WINDOWS: + COMPUTE: True + WINDOW_LENGTH: 60 # specify window length in seconds + SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest', 'nsmallest', count_windows, eda_num_peaks_non_zero] + STANDARDIZE_FEATURES: True + IMPUTE_NANS: True + SRC_SCRIPT: src/features/empatica_electrodermal_activity/cr/main.py # See https://www.rapids.science/latest/features/empatica-blood-volume-pulse/ EMPATICA_BLOOD_VOLUME_PULSE: @@ -542,6 +575,16 @@ EMPATICA_BLOOD_VOLUME_PULSE: COMPUTE: False FEATURES: ["maxbvp", "minbvp", "avgbvp", "medianbvp", "modebvp", "stdbvp", "diffmaxmodebvp", "diffminmodebvp", "entropybvp"] SRC_SCRIPT: src/features/empatica_blood_volume_pulse/dbdp/main.py + CR: + COMPUTE: False + FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR', # Time features + 'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features + WINDOWS: + COMPUTE: True + WINDOW_LENGTH: 300 # specify window length in seconds + SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest', 'nsmallest', 'count_windows', 'hrv_num_windows_non_nan'] + STANDARDIZE_FEATURES: True + SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py # See https://www.rapids.science/latest/features/empatica-inter-beat-interval/ EMPATICA_INTER_BEAT_INTERVAL: @@ -551,6 +594,17 @@ EMPATICA_INTER_BEAT_INTERVAL: COMPUTE: False FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"] SRC_SCRIPT: src/features/empatica_inter_beat_interval/dbdp/main.py + CR: + COMPUTE: False + FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR', # Time features + 'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features + PATCH_WITH_BVP: True + WINDOWS: + COMPUTE: True + WINDOW_LENGTH: 300 # specify window length in seconds + SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest', 'nsmallest', 'count_windows', 'hrv_num_windows_non_nan'] + STANDARDIZE_FEATURES: True + SRC_SCRIPT: src/features/empatica_inter_beat_interval/cr/main.py # See https://www.rapids.science/latest/features/empatica-tags/ EMPATICA_TAGS: @@ -558,6 +612,7 @@ EMPATICA_TAGS: PROVIDERS: # None implemented yet + ######################################################################################################################## # PLOTS # ######################################################################################################################## @@ -566,7 +621,7 @@ EMPATICA_TAGS: # See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#1-histograms-of-phone-data-yield HISTOGRAM_PHONE_DATA_YIELD: - PLOT: True + PLOT: False # See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#2-heatmaps-of-overall-data-yield HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT: @@ -575,7 +630,7 @@ HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT: # See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#3-heatmap-of-recorded-phone-sensors HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT: - PLOT: True + PLOT: False # See https://www.rapids.science/latest/visualizations/data-quality-visualizations/#4-heatmap-of-sensor-row-count HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT: @@ -586,7 +641,7 @@ HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT: # See https://www.rapids.science/latest/visualizations/feature-visualizations/#1-heatmap-correlation-matrix HEATMAP_FEATURE_CORRELATION_MATRIX: - PLOT: True + PLOT: False MIN_ROWS_RATIO: 0.5 CORR_THRESHOLD: 0.1 CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"} @@ -599,17 +654,17 @@ HEATMAP_FEATURE_CORRELATION_MATRIX: ALL_CLEANING_INDIVIDUAL: PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False IMPUTE_SELECTED_EVENT_FEATURES: COMPUTE: True MIN_DATA_YIELDED_MINUTES_TO_IMPUTE: 0.33 COLS_NAN_THRESHOLD: 0.3 # set to 1 to disable COLS_VAR_THRESHOLD: True - ROWS_NAN_THRESHOLD: 1 # set to 1 to disable + ROWS_NAN_THRESHOLD: 0.3 # set to 1 to disable DATA_YIELD_FEATURE: RATIO_VALID_YIELDED_HOURS # RATIO_VALID_YIELDED_HOURS or RATIO_VALID_YIELDED_MINUTES - DATA_YIELD_RATIO_THRESHOLD: 0.3 # set to 0 to disable + DATA_YIELD_RATIO_THRESHOLD: 0.5 # set to 0 to disable DROP_HIGHLY_CORRELATED_FEATURES: - COMPUTE: False + COMPUTE: True MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5 CORR_THRESHOLD: 0.95 SRC_SCRIPT: src/features/all_cleaning_individual/rapids/main.R @@ -617,23 +672,33 @@ ALL_CLEANING_INDIVIDUAL: ALL_CLEANING_OVERALL: PROVIDERS: RAPIDS: - COMPUTE: True + COMPUTE: False IMPUTE_SELECTED_EVENT_FEATURES: COMPUTE: True MIN_DATA_YIELDED_MINUTES_TO_IMPUTE: 0.33 COLS_NAN_THRESHOLD: 0.3 # set to 1 to disable COLS_VAR_THRESHOLD: True - ROWS_NAN_THRESHOLD: 1 # set to 1 to disable + ROWS_NAN_THRESHOLD: 0.3 # set to 1 to disable DATA_YIELD_FEATURE: RATIO_VALID_YIELDED_HOURS # RATIO_VALID_YIELDED_HOURS or RATIO_VALID_YIELDED_MINUTES - DATA_YIELD_RATIO_THRESHOLD: 0.3 # set to 0 to disable + DATA_YIELD_RATIO_THRESHOLD: 0.5 # set to 0 to disable DROP_HIGHLY_CORRELATED_FEATURES: - COMPUTE: False + COMPUTE: True MIN_OVERLAP_FOR_CORR_THRESHOLD: 0.5 CORR_THRESHOLD: 0.95 SRC_SCRIPT: src/features/all_cleaning_overall/rapids/main.R + ######################################################################################################################## -# Analysis Workflow Example # +# Z-score standardization # +######################################################################################################################## +STANDARDIZATION: + PROVIDERS: + CR: + COMPUTE: True + SRC_SCRIPT: src/features/standardization/main.py + +######################################################################################################################## +# Baseline # ######################################################################################################################## PARAMS_FOR_ANALYSIS: @@ -651,3 +716,4 @@ PARAMS_FOR_ANALYSIS: TARGET: COMPUTE: True LABEL: PANAS_negative_affect_mean + diff --git a/credentials.yaml b/credentials.yaml new file mode 100644 index 00000000..3a1c51e0 --- /dev/null +++ b/credentials.yaml @@ -0,0 +1,6 @@ +PSQL_STRAW: + database: staw + user: staw_db + password: kizi-x2yf-mate + host: 212.235.208.113 + port: 5432 diff --git a/data/external/aware_csv/calls.csv b/data/external/aware_csv/calls.csv new file mode 100644 index 00000000..530f275a --- /dev/null +++ b/data/external/aware_csv/calls.csv @@ -0,0 +1,9 @@ +"_id","timestamp","device_id","call_type","call_duration","trace" +1,1587663260695,"a748ee1a-1d0b-4ae9-9074-279a2b6ba524",2,14,"d5e84f8af01b2728021d4f43f53a163c0c90000c" +2,1587739118007,"a748ee1a-1d0b-4ae9-9074-279a2b6ba524",3,0,"47c125dc7bd163b8612cdea13724a814917b6e93" +5,1587746544891,"a748ee1a-1d0b-4ae9-9074-279a2b6ba524",2,95,"9cc793ffd6e88b1d850ce540b5d7e000ef5650d4" +6,1587911379859,"a748ee1a-1d0b-4ae9-9074-279a2b6ba524",2,63,"51fb9344e988049a3fec774c7ca622358bf80264" +7,1587992647361,"a748ee1a-1d0b-4ae9-9074-279a2b6ba524",3,0,"2a862a7730cfdfaf103a9487afe3e02935fd6e02" +8,1588020039448,"a748ee1a-1d0b-4ae9-9074-279a2b6ba524",1,11,"a2c53f6a086d98622c06107780980cf1bb4e37bd" +11,1588176189024,"a748ee1a-1d0b-4ae9-9074-279a2b6ba524",2,65,"56589df8c830c70e330b644921ed38e08d8fd1f3" +12,1588197745079,"a748ee1a-1d0b-4ae9-9074-279a2b6ba524",3,0,"cab458018a8ed3b626515e794c70b6f415318adc" diff --git a/data/external/empatica/empatica1/E4 Data.zip b/data/external/empatica/empatica1/E4 Data.zip new file mode 100644 index 00000000..aa26b26f Binary files /dev/null and b/data/external/empatica/empatica1/E4 Data.zip differ diff --git a/data/external/participant_files/p01.yaml b/data/external/participant_files/p01.yaml new file mode 100644 index 00000000..fe394a76 --- /dev/null +++ b/data/external/participant_files/p01.yaml @@ -0,0 +1,11 @@ +PHONE: + DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524] # the participant's AWARE device id + PLATFORMS: [android] # or ios + LABEL: MyTestP01 # any string + START_DATE: 2020-01-01 # this can also be empty + END_DATE: 2021-01-01 # this can also be empty +EMPATICA: + DEVICE_IDS: [empatica1] + LABEL: test01 + START_DATE: + END_DATE: diff --git a/data/external/timesegments_frequency.csv b/data/external/timesegments_frequency.csv index e56782d1..22684a47 100644 --- a/data/external/timesegments_frequency.csv +++ b/data/external/timesegments_frequency.csv @@ -1,2 +1,2 @@ label,length -thirtyminutes,30 \ No newline at end of file +fiveminutes,5 \ No newline at end of file diff --git a/data/external/timesegments_periodic.csv b/data/external/timesegments_periodic.csv index d52d92fa..117313e8 100644 --- a/data/external/timesegments_periodic.csv +++ b/data/external/timesegments_periodic.csv @@ -1,9 +1,2 @@ label,start_time,length,repeats_on,repeats_value -threeday,00:00:00,2D 23H 59M 59S,every_day,0 daily,00:00:00,23H 59M 59S,every_day,0 -morning,06:00:00,5H 59M 59S,every_day,0 -afternoon,12:00:00,5H 59M 59S,every_day,0 -evening,18:00:00,5H 59M 59S,every_day,0 -night,00:00:00,5H 59M 59S,every_day,0 -two_weeks_overlapping,00:00:00,13D 23H 59M 59S,every_day,0 -weekends,00:00:00,2D 23H 59M 59S,wday,5 diff --git a/environment.yml b/environment.yml index 98fe3c80..9a237a33 100644 --- a/environment.yml +++ b/environment.yml @@ -3,114 +3,138 @@ channels: - conda-forge - defaults dependencies: + - _libgcc_mutex=0.1 + - _openmp_mutex=4.5 - _py-xgboost-mutex=2.0 - - appdirs=1.4.* + - appdirs=1.4.4 - arrow=0.16.0 - - asn1crypto=1.4.* - - astropy=4.2.* - - attrs=20.3.* - - binaryornot=0.4.* + - asn1crypto=1.4.0 + - astropy=4.2.1 + - attrs=20.3.0 + - binaryornot=0.4.4 - blas=1.0 - - brotlipy=0.7.* - - bzip2=1.0.* - - ca-certificates - - certifi + - brotlipy=0.7.0 + - bzip2=1.0.8 + - ca-certificates=2021.7.5 + - certifi=2021.5.30 - cffi=1.14.4 - - chardet=3.0.* - - click=7.1.* - - cookiecutter=1.6.* - - cryptography=3.3.* - - datrie=0.8.* + - chardet=3.0.4 + - click=7.1.2 + - colorama=0.4.4 + - cookiecutter=1.6.0 + - cryptography=3.3.1 + - datrie=0.8.2 - docutils=0.16 - future=0.18.2 - - gitdb=4.0.* - - gitdb2=4.0.* - - gitpython=3.1.* + - gitdb=4.0.5 + - gitdb2=4.0.2 + - gitpython=3.1.11 - idna=2.10 - - imbalanced-learn=0.6.* - - importlib-metadata=2.0.* - - importlib_metadata=2.0.* + - imbalanced-learn=0.6.2 + - importlib-metadata=2.0.0 + - importlib_metadata=2.0.0 - intel-openmp=2019.4 - jinja2=2.11.2 - - jinja2-time=0.2.* - - joblib=1.0.* - - jsonschema=3.2.* - - libblas=3.8.* - - libcblas=3.8.* - - libcxx=10.0.* - - libedit=3.1.* + - jinja2-time=0.2.0 + - joblib=1.0.0 + - jsonschema=3.2.0 + - ld_impl_linux-64=2.36.1 + - libblas=3.8.0 + - libcblas=3.8.0 + - libcxx=10.0.0 + - libcxxabi=10.0.0 + - libedit=3.1.20191231 - libffi=3.3 + - libgcc-ng=11.2.0 - libgfortran - - liblapack=3.8.* - - libopenblas=0.3.* + - libgfortran + - libgfortran + - liblapack=3.8.0 + - libopenblas=0.3.10 + - libstdcxx-ng=11.2.0 - libxgboost=0.90 - - lightgbm=3.1.* - - llvm-openmp=10.0.* - - markupsafe=1.1.* + - libzlib=1.2.11 + - lightgbm=3.1.1 + - llvm-openmp=10.0.0 + - markupsafe=1.1.1 - mkl - - mkl-service=2.3.* - - mkl_fft=1.2.* - - mkl_random=1.1.* - - more-itertools=8.6.* + - mkl-service=2.3.0 + - mkl_fft=1.2.0 + - mkl_random=1.1.1 + - more-itertools=8.6.0 - ncurses=6.2 - numpy=1.19.2 - numpy-base=1.19.2 - - openblas=0.3.* - - openssl - - pandas=1.1.* - - pbr=5.5.* - - pip=20.3.* + - openblas=0.3.4 + - openssl=1.1.1k + - pandas=1.1.5 + - pbr=5.5.1 + - pip=20.3.3 - plotly=4.14.1 - - poyo=0.5.* - - psutil=5.7.* - - psycopg2 + - poyo=0.5.0 + - psutil=5.7.2 - py-xgboost=0.90 - pycparser=2.20 - - pyerfa=1.7.* - - pyopenssl=20.0.* - - pyprojroot - - pysocks=1.7.* - - python=3.7.* - - python-dateutil=2.8.* - - python-dotenv + - pyerfa=1.7.1.1 + - pyopenssl=20.0.1 + - pysocks=1.7.1 + - python=3.7.9 + - python-dateutil=2.8.1 - python_abi=3.7 - pytz=2020.4 - - pyyaml=5.3.* + - pyyaml=5.3.1 - readline=8.0 - requests=2.25.0 - - retrying=1.3.* + - retrying=1.3.3 - scikit-learn=0.23.2 - - scipy=1.5.* - - setuptools=51.0.* + - scipy=1.5.2 + - setuptools=51.0.0 - six=1.15.0 - - smmap=3.0.* - - smmap2=3.0.* - - sqlalchemy + - smmap=3.0.4 + - smmap2=3.0.1 - sqlite=3.33.0 - - threadpoolctl=2.1.* - - tk=8.6.* + - threadpoolctl=2.1.0 + - tk=8.6.10 - tqdm=4.62.0 - urllib3=1.25.11 - wheel=0.36.2 - - whichcraft=0.6.* + - whichcraft=0.6.1 - wrapt=1.12.1 - xgboost=0.90 - - xz=5.2.* - - yaml=0.2.* - - zipp=3.4.* - - zlib=1.2.* + - xz=5.2.5 + - yaml=0.2.5 + - zipp=3.4.0 + - zlib=1.2.11 - pip: - - amply==0.1.* + - amply==0.1.4 + - bidict==0.22.0 + - biosppy==0.8.0 + - cached-property==1.5.2 - configargparse==0.15.1 - - decorator==4.4.* - - ipython-genutils==0.2.* - - jupyter-core==4.6.* - - nbformat==5.0.* + - cr-features==0.1.15 + - cycler==0.11.0 + - decorator==4.4.2 + - fonttools==4.33.2 + - h5py==3.6.0 + - hmmlearn==0.2.7 + - ipython-genutils==0.2.0 + - jupyter-core==4.6.3 + - kiwisolver==1.4.2 + - matplotlib==3.5.1 + - nbformat==5.0.7 + - opencv-python==4.5.5.64 + - packaging==21.3 + - peakutils==1.3.3 + - pillow==9.1.0 - pulp==2.4 - - pyparsing==2.4.* + - pyparsing==2.4.7 - pyrsistent==0.15.5 - - ratelimiter==1.2.* + - pywavelets==1.3.0 + - ratelimiter==1.2.0.post0 + - seaborn==0.11.2 + - shortuuid==1.0.8 - snakemake==5.30.2 - toposort==1.5 - - traitlets==4.3.* -prefix: /usr/local/Caskroom/miniconda/base/envs/rapids202108 + - traitlets==4.3.3 + - typing-extensions==4.2.0 +prefix: /opt/conda/envs/rapids diff --git a/renv.lock b/renv.lock index 0920a32c..8db3499c 100644 --- a/renv.lock +++ b/renv.lock @@ -13,168 +13,168 @@ "Package": "BDgraph", "Version": "2.63", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "81946367dff8885e6d87e4e12ffc5f92" }, "BH": { "Package": "BH", "Version": "1.72.0-3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "8f9ce74c6417d61f0782cbae5fd2b7b0" }, "DBI": { "Package": "DBI", "Version": "1.1.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "4744be45519d675af66c28478720fce5" }, "DEoptimR": { "Package": "DEoptimR", "Version": "1.0-8", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "4fdd17df6c1b897cb59bdd9cf6621a43" }, "Formula": { "Package": "Formula", "Version": "1.2-4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "cc8c8c4d61346cde1ca60030ff9c241f" }, "Hmisc": { "Package": "Hmisc", "Version": "4.4-2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "66458e906b2112a8b1639964efd77d7c" }, "KernSmooth": { "Package": "KernSmooth", "Version": "2.23-18", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "9e703ad8bf0e99f3691f05da32dfe68b" }, "MASS": { "Package": "MASS", "Version": "7.3-53", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "d1bc1c8e9c0ace57ec9ffea01021d45f" }, "Matrix": { "Package": "Matrix", "Version": "1.2-18", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "08588806cba69f04797dab50627428ed" }, "ModelMetrics": { "Package": "ModelMetrics", "Version": "1.2.2.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "40a55bd0b44719941d103291ac5e9d74" }, "R6": { "Package": "R6", "Version": "2.5.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "b203113193e70978a696b2809525649d" }, "RColorBrewer": { "Package": "RColorBrewer", "Version": "1.1-2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "e031418365a7f7a766181ab5a41a5716" }, "RMariaDB": { "Package": "RMariaDB", "Version": "1.0.11", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "0e4f634f2bb38e325717d384a877ccc0" }, "RPostgres": { "Package": "RPostgres", - "Version": "1.4.1", + "Version": "1.4.4", "Source": "Repository", "Repository": "CRAN", - "Hash": "dcc7c6bfb144ff8723f7d525ca4a39f0" + "Hash": "c593ecb8dbca9faf3906431be610ca28" }, "Rcpp": { "Package": "Rcpp", "Version": "1.0.7", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "dab19adae4440ae55aa8a9d238b246bb" }, "RcppArmadillo": { "Package": "RcppArmadillo", "Version": "0.10.1.2.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "1a885965adade4b8629c478a3b3c2cf7" }, "RcppEigen": { "Package": "RcppEigen", "Version": "0.3.3.9.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ddfa72a87fdf4c80466a20818be91d00" }, "SQUAREM": { "Package": "SQUAREM", "Version": "2021.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "0cf10dab0d023d5b46a5a14387556891" }, "TSP": { "Package": "TSP", "Version": "1.1-11", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "b72b16750535acbc63e1337d38ff1ace" }, "abind": { "Package": "abind", "Version": "1.4-5", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "4f57884290cc75ab22f4af9e9d4ca862" }, "acepack": { "Package": "acepack", "Version": "1.4.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "9d5038b94a50c7bf26940e2ef7f340c8" }, "askpass": { "Package": "askpass", "Version": "1.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "e8a22846fff485f0be3770c2da758713" }, "assertthat": { "Package": "assertthat", "Version": "0.2.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "50c838a310445e954bc13f26f26a6ecf" }, "backports": { "Package": "backports", "Version": "1.2.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "644043219fc24e190c2f620c1a380a69" }, "base64enc": { @@ -188,98 +188,98 @@ "Package": "bit", "Version": "4.0.4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "f36715f14d94678eea9933af927bc15d" }, "bit64": { "Package": "bit64", "Version": "4.0.5", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "9fe98599ca456d6552421db0d6772d8f" }, "blob": { "Package": "blob", "Version": "1.2.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "9addc7e2c5954eca5719928131fed98c" }, "boot": { "Package": "boot", "Version": "1.3-25", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "bd51734a754b6c2baf28b2d1ebc11e91" }, "brio": { "Package": "brio", "Version": "1.1.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "570a24963009b9cce0869a0463c83580" }, "broom": { "Package": "broom", "Version": "0.7.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "5581a5ddc8fe2ac5e0d092ec2de4c4ae" }, "callr": { "Package": "callr", "Version": "3.5.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "b7d7f1e926dfcd57c74ce93f5c048e80" }, "caret": { "Package": "caret", "Version": "6.0-89", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "95cdd7da1e51ab0451c27666f15db891" }, "cellranger": { "Package": "cellranger", "Version": "1.1.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "f61dbaec772ccd2e17705c1e872e9e7c" }, "checkmate": { "Package": "checkmate", "Version": "2.0.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "a667800d5f0350371bedeb8b8b950289" }, "class": { "Package": "class", "Version": "7.3-17", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "9267f5dab59a4ef44229858a142bded1" }, "cli": { "Package": "cli", "Version": "2.2.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "3ef298932294b775fa0a3eeaa3a645b0" }, "clipr": { "Package": "clipr", "Version": "0.7.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ebaa97ac99cc2daf04e77eecc7b781d7" }, "cluster": { "Package": "cluster", "Version": "2.1.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "db63a44aab5aadcb6bf2f129751d129a" }, "codetools": { @@ -293,126 +293,126 @@ "Package": "colorspace", "Version": "2.0-0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "abea3384649ef37f60ef51ce002f3547" }, "commonmark": { "Package": "commonmark", "Version": "1.7", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "0f22be39ec1d141fd03683c06f3a6e67" }, "corpcor": { "Package": "corpcor", "Version": "1.6.9", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ae01381679f4511ca7a72d55fe175213" }, "corrr": { "Package": "corrr", "Version": "0.4.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "dbd1387c025b07f62da3334942176e14" }, "cpp11": { "Package": "cpp11", "Version": "0.2.4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ba66e5a750d39067d888aa7af797fed2" }, "crayon": { "Package": "crayon", "Version": "1.3.4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "0d57bc8e27b7ba9e45dba825ebc0de6b" }, "curl": { "Package": "curl", "Version": "4.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "2b7d10581cc730804e9ed178c8374bd6" }, "d3Network": { "Package": "d3Network", "Version": "0.5.2.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "83e3a9632a16bd3a8762b5c8919af568" }, "data.table": { "Package": "data.table", "Version": "1.14.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "d1b8b1a821ee564a3515fa6c6d5c52dc" }, "dbplyr": { "Package": "dbplyr", "Version": "2.1.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "1f37fa4ab2f5f7eded42f78b9a887182" }, "desc": { "Package": "desc", "Version": "1.2.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "6c8fe8fa26a23b79949375d372c7b395" }, "diffobj": { "Package": "diffobj", "Version": "0.3.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "16533929cf545f3c9b796780cccf5eff" }, "digest": { "Package": "digest", "Version": "0.6.27", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "a0cbe758a531d054b537d16dff4d58a1" }, "dplyr": { "Package": "dplyr", "Version": "1.0.5", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "d0d76c11ec807eb3f000eba4e3eb0f68" }, "dtplyr": { "Package": "dtplyr", "Version": "1.1.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "1e14e4c5b2814de5225312394bc316da" }, "e1071": { "Package": "e1071", "Version": "1.7-9", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "32885be243a29301c90d33db37c3aad8" }, "ellipsis": { "Package": "ellipsis", "Version": "0.3.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077" }, "entropy": { "Package": "entropy", "Version": "1.2.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "d24c26075b0cabde12c419424edc6242" }, "evaluate": { @@ -426,679 +426,679 @@ "Package": "fansi", "Version": "0.4.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "7fce217eaaf8016e72065e85c73027b5" }, "farver": { "Package": "farver", "Version": "2.0.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "dad6793a5a1f73c8e91f1a1e3e834b05" }, "fastmap": { "Package": "fastmap", "Version": "1.0.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "83ab58a0518afe3d17e41da01af13b60" }, "fdrtool": { "Package": "fdrtool", "Version": "1.2.15", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "eb8d30ac5bae2835a21d9e229195d8dd" }, "forcats": { "Package": "forcats", "Version": "0.5.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "1cb4279e697650f0bd78cd3601ee7576" }, "foreach": { "Package": "foreach", "Version": "1.5.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "e32cfc0973caba11b65b1fa691b4d8c9" }, "foreign": { "Package": "foreign", "Version": "0.8-80", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ae1b1e15cc6ccb2bc61c0ac33e86d35f" }, "fs": { "Package": "fs", "Version": "1.5.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "44594a07a42e5f91fac9f93fda6d0109" }, "future": { "Package": "future", "Version": "1.23.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "7bf6fbed7f00cae876901fd70c04f3a4" }, "future.apply": { "Package": "future.apply", "Version": "1.8.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "f568ce73d3d59582b0f7babd0eb33d07" }, "gclus": { "Package": "gclus", "Version": "1.3.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "8f905e7f70ac42e5b0e39d6ba360dd44" }, "generics": { "Package": "generics", "Version": "0.1.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "4d243a9c10b00589889fe32314ffd902" }, "ggforce": { "Package": "ggforce", "Version": "0.3.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "4adaf6b01b41f243ba0c62801de3331e" }, "ggplot2": { "Package": "ggplot2", "Version": "3.3.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "4ded8b439797f7b1693bd3d238d0106b" }, "ggraph": { "Package": "ggraph", "Version": "2.0.4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "2a189e5cd2bbfe687ed631be27c07462" }, "ggrepel": { "Package": "ggrepel", "Version": "0.9.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "2cde657b5a57bf20ccdc8ec05fe69556" }, "glasso": { "Package": "glasso", "Version": "1.11", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "1e1217c1b472d1dbffda819b57dc6d8d" }, "glmnet": { "Package": "glmnet", "Version": "4.0-2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "056100917c75e6f8a0d36f8bdd552799" }, "globals": { "Package": "globals", "Version": "0.14.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "eca8023ed5ca6372479ebb9b3207f5ae" }, "glue": { "Package": "glue", "Version": "1.4.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "6efd734b14c6471cfe443345f3e35e29" }, "gower": { "Package": "gower", "Version": "0.2.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "be6a2b3529928bd803d1c437d1d43152" }, "graphlayouts": { "Package": "graphlayouts", "Version": "0.7.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "2a0d18b9395cd27066d209b83bb29ea6" }, "gridExtra": { "Package": "gridExtra", "Version": "2.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "7d7f283939f563670a697165b2cf5560" }, "gtable": { "Package": "gtable", "Version": "0.3.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ac5c6baf7822ce8732b343f14c072c4d" }, "gtools": { "Package": "gtools", "Version": "3.8.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "0a749b4458d19a54acae93c64e3e7c85" }, "haven": { "Package": "haven", "Version": "2.3.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "221d0ad75dfa03ebf17b1a4cc5c31dfc" }, "highr": { "Package": "highr", "Version": "0.8", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "4dc5bb88961e347a0f4d8aad597cbfac" }, "hms": { "Package": "hms", "Version": "1.1.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "5b8a2dd0fdbe2ab4f6081e6c7be6dfca" }, "htmlTable": { "Package": "htmlTable", "Version": "2.1.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "4f2a742fc75dedafe660e3b47a37680e" }, "htmltools": { "Package": "htmltools", "Version": "0.5.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "7d651b7131794fe007b1ad6f21aaa401" }, "htmlwidgets": { "Package": "htmlwidgets", "Version": "1.5.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "6fdaa86d0700f8b3e92ee3c445a5a10d" }, "httpuv": { "Package": "httpuv", "Version": "1.5.4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "4e6dabb220b006ccdc3b3b5ff993b205" }, "httr": { "Package": "httr", "Version": "1.4.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "a525aba14184fec243f9eaec62fbed43" }, "huge": { "Package": "huge", "Version": "1.3.4.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "a4cde4dd1d2551edb99a3273a4ad34ea" }, "igraph": { "Package": "igraph", "Version": "1.2.6", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "7b1f856410253d56ea67ad808f7cdff6" }, "influxdbr": { "Package": "influxdbr", "Version": "0.14.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "fb3d0730f3a6de3b9c09081910cca9bc" }, "ipred": { "Package": "ipred", "Version": "0.9-12", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "8312ebd8121ad2eca1c76441040bee5d" }, "isoband": { "Package": "isoband", "Version": "0.2.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "53647fb507373700028b2ce6cd30751a" }, "iterators": { "Package": "iterators", "Version": "1.0.13", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "64778782a89480e9a644f69aad9a2877" }, "jpeg": { "Package": "jpeg", "Version": "0.1-8.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "bc316c003aba520fc73d70ad53b5fc36" }, "jsonlite": { "Package": "jsonlite", "Version": "1.7.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "98138e0994d41508c7a6b84a0600cfcb" }, "knitr": { "Package": "knitr", "Version": "1.30", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "eed7ee0d02eee88d53881cdc92457c62" }, "labeling": { "Package": "labeling", "Version": "0.4.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "3d5108641f47470611a32d0bdf357a72" }, "later": { "Package": "later", "Version": "1.1.0.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "d0a62b247165aabf397fded504660d8a" }, "lattice": { "Package": "lattice", "Version": "0.20-41", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "fbd9285028b0263d76d18c95ae51a53d" }, "latticeExtra": { "Package": "latticeExtra", "Version": "0.6-29", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "590829599d6182cf7461787af34666ee" }, "lava": { "Package": "lava", "Version": "1.6.10", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "4c31a28528978d8689145f5274ce9058" }, "lavaan": { "Package": "lavaan", "Version": "0.6-7", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "6cc8bb26e9deab15394d48d5ded61ce2" }, "lifecycle": { "Package": "lifecycle", "Version": "1.0.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "3471fb65971f1a7b2d4ae7848cf2db8d" }, "listenv": { "Package": "listenv", "Version": "0.8.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "0bde42ee282efb18c7c4e63822f5b4f7" }, "lubridate": { "Package": "lubridate", "Version": "1.7.9.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "5b5b02f621d39a499def7923a5aee746" }, "magrittr": { "Package": "magrittr", "Version": "2.0.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "41287f1ac7d28a92f0a286ed507928d3" }, "markdown": { "Package": "markdown", "Version": "1.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "61e4a10781dd00d7d81dd06ca9b94e95" }, "matrixcalc": { "Package": "matrixcalc", "Version": "1.0-3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "67101e7448dfd9add4ac418623060262" }, "mgcv": { "Package": "mgcv", "Version": "1.8-33", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "eb7b6439bc6d812eed2cddba5edc6be3" }, "mgm": { "Package": "mgm", "Version": "1.2-10", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "6a82e040f714adcd21a249d718dee637" }, "mime": { "Package": "mime", "Version": "0.9", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "e87a35ec73b157552814869f45a63aa3" }, "mnormt": { "Package": "mnormt", "Version": "2.0.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "7e6ee68a01d6c87c69087d4a250ee763" }, "modelr": { "Package": "modelr", "Version": "0.1.8", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "9fd59716311ee82cba83dc2826fc5577" }, "munsell": { "Package": "munsell", "Version": "0.5.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "6dfe8bf774944bd5595785e3229d8771" }, "nlme": { "Package": "nlme", "Version": "3.1-151", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "42c8ba2b6a32a6bf0874e93e3bd86a43" }, "nnet": { "Package": "nnet", "Version": "7.3-14", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "0d87e50e11394a7151a28873637d799a" }, "numDeriv": { "Package": "numDeriv", "Version": "2016.8-1.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "df58958f293b166e4ab885ebcad90e02" }, "openssl": { "Package": "openssl", "Version": "1.4.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "a399e4773075fc2375b71f45fca186c4" }, "pROC": { "Package": "pROC", "Version": "1.18.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "417fd0d40479932c19faf2747817c473" }, "packrat": { "Package": "packrat", "Version": "0.5.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "2ebd34a38f4248281096cc723535b66d" }, "parallelly": { "Package": "parallelly", "Version": "1.29.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "b5f399c9ce96977e22ef32c20b6cfe87" }, "pbapply": { "Package": "pbapply", "Version": "1.4-3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "52f8028b028076bc3b7ee5d6251abf0d" }, "pbivnorm": { "Package": "pbivnorm", "Version": "0.6.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "643e16a7da6aac3e18cadc3e14abb94b" }, "pillar": { "Package": "pillar", "Version": "1.4.7", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "3b3dd89b2ee115a8b54e93a34cd546b4" }, "pkgbuild": { "Package": "pkgbuild", "Version": "1.2.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "725fcc30222d4d11ec68efb8ff11a9af" }, "pkgconfig": { "Package": "pkgconfig", "Version": "2.0.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "01f28d4278f15c76cddbea05899c5d6f" }, "pkgload": { "Package": "pkgload", "Version": "1.1.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "b6b150cd4709e0c0c9b5d51ac4376282" }, "plogr": { "Package": "plogr", "Version": "0.2.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "09eb987710984fc2905c7129c7d85e65" }, "plyr": { "Package": "plyr", "Version": "1.8.6", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ec0e5ab4e5f851f6ef32cd1d1984957f" }, "png": { "Package": "png", "Version": "0.1-7", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "03b7076c234cb3331288919983326c55" }, "polyclip": { "Package": "polyclip", "Version": "1.10-0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "cb167f328b3ada4ec5cf67a7df4c900a" }, "praise": { "Package": "praise", "Version": "1.0.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "a555924add98c99d2f411e37e7d25e9f" }, "prettyunits": { "Package": "prettyunits", "Version": "1.1.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "95ef9167b75dde9d2ccc3c7528393e7e" }, "processx": { "Package": "processx", "Version": "3.4.5", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "22aab6098cb14edd0a5973a8438b569b" }, "prodlim": { "Package": "prodlim", "Version": "2019.11.13", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "c243bf70db3a6631a0c8783152fb7db9" }, "progress": { "Package": "progress", "Version": "1.2.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061" }, "progressr": { "Package": "progressr", "Version": "0.9.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ca0d80ecc29903f7579edbabd91f4199" }, "promises": { "Package": "promises", "Version": "1.1.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "a8730dcbdd19f9047774909f0ec214a4" }, "proxy": { "Package": "proxy", "Version": "0.4-26", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "50b405c6419e921b9e9360cc9ebbcf2d" }, "ps": { "Package": "ps", "Version": "1.5.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ebaed51a03411fd5cfc1e12d9079b353" }, "psych": { "Package": "psych", "Version": "2.0.12", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "9345dffe01b539d1c0b00eef80c51adf" }, "purrr": { "Package": "purrr", "Version": "0.3.4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "97def703420c8ab10d8f0e6c72101e02" }, "qap": { "Package": "qap", "Version": "0.1-1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "753e69deffc32851121955ea150ecf8d" }, "qgraph": { "Package": "qgraph", "Version": "1.6.5", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "d35964686307333a7121eb41c7dcd4e0" }, "rappdirs": { "Package": "rappdirs", "Version": "0.3.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "5e3c5dc0b071b21fa128676560dbe94d" }, "readr": { "Package": "readr", "Version": "1.4.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "2639976851f71f330264a9c9c3d43a61" }, "readxl": { "Package": "readxl", "Version": "1.3.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "63537c483c2dbec8d9e3183b3735254a" }, "recipes": { "Package": "recipes", "Version": "0.1.17", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "443951ef5d9e72a96405cbb0157bb1d4" }, "registry": { "Package": "registry", "Version": "0.5-1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "1c9935f4f14c6c096c9c9072ddee59f1" }, "rematch": { "Package": "rematch", "Version": "1.0.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "c66b930d20bb6d858cd18e1cebcfae5c" }, "rematch2": { "Package": "rematch2", "Version": "2.1.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "76c9e04c712a05848ae7a23d2f170a40" }, "renv": { @@ -1112,133 +1112,133 @@ "Package": "reprex", "Version": "0.3.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "b06bfb3504cc8a4579fd5567646f745b" }, "reshape2": { "Package": "reshape2", "Version": "1.4.4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "bb5996d0bd962d214a11140d77589917" }, "reticulate": { "Package": "reticulate", "Version": "1.18", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "fbd35cac6ae7554d0e4f440bca1adf3a" }, "rjson": { "Package": "rjson", "Version": "0.2.20", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "7d597f982ee6263716b6a2f28efd29fa" }, "rlang": { "Package": "rlang", "Version": "0.4.10", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "599df23c40a4fce9c7b4764f28c37857" }, "rmarkdown": { "Package": "rmarkdown", "Version": "2.6", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "bc4bac38960b446c183957bfd563e763" }, "robustbase": { "Package": "robustbase", "Version": "0.93-6", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "241454e22c9d26b11669b78d06706b41" }, "rpart": { "Package": "rpart", "Version": "4.1-15", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "9787c1fcb680e655d062e7611cadf78e" }, "rprojroot": { "Package": "rprojroot", "Version": "2.0.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "249d8cd1e74a8f6a26194a91b47f21d1" }, "rstudioapi": { "Package": "rstudioapi", "Version": "0.13", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "06c85365a03fdaf699966cc1d3cf53ea" }, "rvest": { "Package": "rvest", "Version": "0.3.6", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "a9795ccb2d608330e841998b67156764" }, "scales": { "Package": "scales", "Version": "1.1.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "6f76f71042411426ec8df6c54f34e6dd" }, "selectr": { "Package": "selectr", "Version": "0.4-2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "3838071b66e0c566d55cc26bd6e27bf4" }, "seriation": { "Package": "seriation", "Version": "1.3.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "76ad2df9a7784e78dfaebe1ac9e295c3" }, "shape": { "Package": "shape", "Version": "1.4.5", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "58510f25472de6fd363d76698d29709e" }, "shiny": { "Package": "shiny", "Version": "1.5.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ee4ed72d7a5047d9e73cf922ad66e9c9" }, "sourcetools": { "Package": "sourcetools", "Version": "0.1.7", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "947e4e02a79effa5d512473e10f41797" }, "spatial": { "Package": "spatial", "Version": "7.3-12", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "58a02ce0150652b96c044bc67a0df2e5" }, "stringi": { "Package": "stringi", "Version": "1.5.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "a063ebea753c92910a4cca7b18bc1f05" }, "stringr": { @@ -1252,175 +1252,175 @@ "Package": "survival", "Version": "3.2-7", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "39c4ac6d22dad33db0ee37b40810ea12" }, "sys": { "Package": "sys", "Version": "3.4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "b227d13e29222b4574486cfcbde077fa" }, "testthat": { "Package": "testthat", "Version": "3.0.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "17826764cb92d8b5aae6619896e5a161" }, "tibble": { "Package": "tibble", "Version": "3.0.4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "71dffd8544691c520dd8e41ed2d7e070" }, "tidygraph": { "Package": "tidygraph", "Version": "1.2.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "5375980d0787633ca62c265b28bedb41" }, "tidyr": { "Package": "tidyr", "Version": "1.1.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "c40b2d5824d829190f4b825f4496dfae" }, "tidyselect": { "Package": "tidyselect", "Version": "1.1.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "6ea435c354e8448819627cf686f66e0a" }, "tidyverse": { "Package": "tidyverse", "Version": "1.3.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "bd51be662f359fa99021f3d51e911490" }, "timeDate": { "Package": "timeDate", "Version": "3043.102", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "fde4fc571f5f61978652c229d4713845" }, "tinytex": { "Package": "tinytex", "Version": "0.28", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ff73961a77fba606fbf944dc3468e5b9" }, "tmvnsim": { "Package": "tmvnsim", "Version": "1.0-2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "50470c7ed0cc8099cbc17b38f0ef621f" }, "tweenr": { "Package": "tweenr", "Version": "1.0.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "fc77eb5297507cccfa3349a606061030" }, "utf8": { "Package": "utf8", "Version": "1.1.4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "4a5081acfb7b81a572e4384a7aaf2af1" }, "vctrs": { "Package": "vctrs", "Version": "0.3.8", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ecf749a1b39ea72bd9b51b76292261f1" }, "viridis": { "Package": "viridis", "Version": "0.5.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "6f6b49e5b3b5ee5a6d0c28bf1b4b9eb3" }, "viridisLite": { "Package": "viridisLite", "Version": "0.3.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ce4f6271baa94776db692f1cb2055bee" }, "waldo": { "Package": "waldo", "Version": "0.2.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "181d1a31b1ba2009ef20926f2ee0570c" }, "whisker": { "Package": "whisker", "Version": "0.4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ca970b96d894e90397ed20637a0c1bbe" }, "withr": { "Package": "withr", "Version": "2.3.0", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "7307d79f58d1885b38c4f4f1a8cb19dd" }, "xfun": { "Package": "xfun", "Version": "0.19", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "a42372606cb76f34da9d090326e9f955" }, "xml2": { "Package": "xml2", "Version": "1.3.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "d4d71a75dd3ea9eb5fa28cc21f9585e2" }, "xtable": { "Package": "xtable", "Version": "1.8-4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "b8acdf8af494d9ec19ccb2481a9b11c2" }, "xts": { "Package": "xts", "Version": "0.12.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "ca2fd4ad8ef78cca3aa2b30f992798a8" }, "yaml": { "Package": "yaml", "Version": "2.2.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "2826c5d9efb0a88f657c7a679c7106db" }, "zoo": { "Package": "zoo", "Version": "1.8-9", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Hash": "035d1c7c12593038c26fb1c2fd40c4d2" } } diff --git a/renv/activate.R b/renv/activate.R index d3b271d8..6841a967 100644 --- a/renv/activate.R +++ b/renv/activate.R @@ -14,9 +14,6 @@ local({ # signal that we're loading renv during R startup Sys.setenv("RENV_R_INITIALIZING" = "true") on.exit(Sys.unsetenv("RENV_R_INITIALIZING"), add = TRUE) - - if(grepl("Darwin", Sys.info()["sysname"], fixed = TRUE) & grepl("ARM64", Sys.info()["version"], fixed = TRUE)) # M1 Macs - Sys.setenv("TZDIR" = file.path(R.home(), "share", "zoneinfo")) # signal that we've consented to use renv options(renv.consent = TRUE) diff --git a/rules/features.smk b/rules/features.smk index da0f62c0..dff56535 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -791,10 +791,25 @@ rule empatica_accelerometer_python_features: provider_key = "{provider_key}", sensor_key = "empatica_accelerometer" output: - "data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}.csv" + "data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}.csv", + "data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows.csv" script: "../src/features/entry.py" +rule empatica_accelerometer_python_features_standardization: + input: + windows_features_data = "data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_python_{provider_key}_windows.csv" + params: + provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"], + provider_key = "{provider_key}", + sensor_key = "empatica_accelerometer", + provider_main = config["EMPATICA_ACCELEROMETER"]["PROVIDERS"]["CR"] + output: + "data/interim/{pid}/empatica_accelerometer_features/z_empatica_accelerometer_python_{provider_key}.csv", + "data/interim/{pid}/empatica_accelerometer_features/z_empatica_accelerometer_python_{provider_key}_windows.csv" + script: + "../src/features/standardization/main.py" + rule empatica_accelerometer_r_features: input: sensor_data = "data/raw/{pid}/empatica_accelerometer_with_datetime.csv", @@ -817,7 +832,8 @@ rule empatica_heartrate_python_features: provider_key = "{provider_key}", sensor_key = "empatica_heartrate" output: - "data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}.csv" + "data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}.csv", + "data/interim/{pid}/empatica_heartrate_features/empatica_heartrate_python_{provider_key}_windows.csv" script: "../src/features/entry.py" @@ -843,10 +859,25 @@ rule empatica_temperature_python_features: provider_key = "{provider_key}", sensor_key = "empatica_temperature" output: - "data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}.csv" + "data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}.csv", + "data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows.csv" script: "../src/features/entry.py" +rule empatica_temperature_python_features_standardization: + input: + windows_features_data = "data/interim/{pid}/empatica_temperature_features/empatica_temperature_python_{provider_key}_windows.csv" + params: + provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"], + provider_key = "{provider_key}", + sensor_key = "empatica_temperature", + provider_main = config["EMPATICA_TEMPERATURE"]["PROVIDERS"]["CR"] + output: + "data/interim/{pid}/empatica_temperature_features/z_empatica_temperature_python_{provider_key}.csv", + "data/interim/{pid}/empatica_temperature_features/z_empatica_temperature_python_{provider_key}_windows.csv" + script: + "../src/features/standardization/main.py" + rule empatica_temperature_r_features: input: sensor_data = "data/raw/{pid}/empatica_temperature_with_datetime.csv", @@ -869,10 +900,25 @@ rule empatica_electrodermal_activity_python_features: provider_key = "{provider_key}", sensor_key = "empatica_electrodermal_activity" output: - "data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}.csv" + "data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}.csv", + "data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows.csv" script: "../src/features/entry.py" +rule empatica_electrodermal_activity_python_features_standardization: + input: + windows_features_data = "data/interim/{pid}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_{provider_key}_windows.csv" + params: + provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"], + provider_key = "{provider_key}", + sensor_key = "empatica_electrodermal_activity", + provider_main = config["EMPATICA_ELECTRODERMAL_ACTIVITY"]["PROVIDERS"]["CR"] + output: + "data/interim/{pid}/empatica_electrodermal_activity_features/z_empatica_electrodermal_activity_python_{provider_key}.csv", + "data/interim/{pid}/empatica_electrodermal_activity_features/z_empatica_electrodermal_activity_python_{provider_key}_windows.csv" + script: + "../src/features/standardization/main.py" + rule empatica_electrodermal_activity_r_features: input: sensor_data = "data/raw/{pid}/empatica_electrodermal_activity_with_datetime.csv", @@ -895,10 +941,25 @@ rule empatica_blood_volume_pulse_python_features: provider_key = "{provider_key}", sensor_key = "empatica_blood_volume_pulse" output: - "data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}.csv" + "data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}.csv", + "data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv" script: "../src/features/entry.py" +rule empatica_blood_volume_pulse_python_cr_features_standardization: + input: + windows_features_data = "data/interim/{pid}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_{provider_key}_windows.csv" + params: + provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"], + provider_key = "{provider_key}", + sensor_key = "empatica_blood_volume_pulse", + provider_main = config["EMPATICA_BLOOD_VOLUME_PULSE"]["PROVIDERS"]["CR"] + output: + "data/interim/{pid}/empatica_blood_volume_pulse_features/z_empatica_blood_volume_pulse_python_{provider_key}.csv", + "data/interim/{pid}/empatica_blood_volume_pulse_features/z_empatica_blood_volume_pulse_python_{provider_key}_windows.csv" + script: + "../src/features/standardization/main.py" + rule empatica_blood_volume_pulse_r_features: input: sensor_data = "data/raw/{pid}/empatica_blood_volume_pulse_with_datetime.csv", @@ -921,10 +982,25 @@ rule empatica_inter_beat_interval_python_features: provider_key = "{provider_key}", sensor_key = "empatica_inter_beat_interval" output: - "data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}.csv" + "data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}.csv", + "data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows.csv" script: "../src/features/entry.py" +rule empatica_inter_beat_interval_python_features_standardization: + input: + windows_features_data = "data/interim/{pid}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_{provider_key}_windows.csv" + params: + provider = config["STANDARDIZATION"]["PROVIDERS"]["CR"], + provider_key = "{provider_key}", + sensor_key = "empatica_inter_beat_interval", + provider_main = config["EMPATICA_INTER_BEAT_INTERVAL"]["PROVIDERS"]["CR"] + output: + "data/interim/{pid}/empatica_inter_beat_interval_features/z_empatica_inter_beat_interval_python_{provider_key}.csv", + "data/interim/{pid}/empatica_inter_beat_interval_features/z_empatica_inter_beat_interval_python_{provider_key}_windows.csv" + script: + "../src/features/standardization/main.py" + rule empatica_inter_beat_interval_r_features: input: sensor_data = "data/raw/{pid}/empatica_inter_beat_interval_with_datetime.csv", diff --git a/rules/preprocessing.smk b/rules/preprocessing.smk index 83608204..07ffc390 100644 --- a/rules/preprocessing.smk +++ b/rules/preprocessing.smk @@ -4,36 +4,6 @@ rule create_example_participant_files: shell: "echo 'PHONE:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n PLATFORMS: [android]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [a748ee1a-1d0b-4ae9-9074-279a2b6ba524]\n LABEL: test-01\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example01.yaml && echo 'PHONE:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n PLATFORMS: [ios]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\nFITBIT:\n DEVICE_IDS: [13dbc8a3-dae3-4834-823a-4bc96a7d459d]\n LABEL: test-02\n START_DATE: 2020-04-23 00:00:00\n END_DATE: 2020-05-04 23:59:59\n' >> ./data/external/participant_files/example02.yaml" -rule query_usernames_device_empatica_ids: - params: - baseline_folder = "/mnt/e/STRAWbaseline/" - output: - usernames_file = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"], - timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"] - script: - "../../participants/prepare_usernames_file.py" - -rule prepare_tzcodes_file: - input: - timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"] - output: - tzcodes_file = config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"] - script: - "../tools/create_multi_timezones_file.py" - -rule prepare_participants_csv: - input: - username_list = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"] - params: - data_configuration = config["PHONE_DATA_STREAMS"][config["PHONE_DATA_STREAMS"]["USE"]], - participants_table = "participants", - device_id_table = "esm", - start_end_date_table = "esm" - output: - participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"] - script: - "../src/data/translate_usernames_into_participants_data.R" - rule create_participants_files: input: participants_file = config["CREATE_PARTICIPANT_FILES"]["CSV_FILE_PATH"] @@ -248,4 +218,4 @@ rule empatica_readable_datetime: output: "data/raw/{pid}/empatica_{sensor}_with_datetime.csv" script: - "../src/data/datetime/readable_datetime.R" + "../src/data/datetime/readable_datetime.R" \ No newline at end of file diff --git a/src/data/streams/empatica_zip/container.py b/src/data/streams/empatica_zip/container.py index 542acdcd..93a6e5d8 100644 --- a/src/data/streams/empatica_zip/container.py +++ b/src/data/streams/empatica_zip/container.py @@ -2,11 +2,16 @@ from zipfile import ZipFile import warnings from pathlib import Path import pandas as pd +import numpy as np from pandas.core import indexing import yaml import csv from collections import OrderedDict from io import BytesIO, StringIO +import sys, os + +from cr_features.hrv import get_HRV_features, get_patched_ibi_with_bvp +from cr_features.helper_functions import empatica1d_to_array, empatica2d_to_array def processAcceleration(x, y, z): x = float(x) @@ -62,13 +67,18 @@ def extract_empatica_data(data, sensor): df.index.name = 'timestamp' elif sensor == 'EMPATICA_INTER_BEAT_INTERVAL': - df = pd.read_csv(sensor_data_file, names=['timestamp', column], header=None) + + df = pd.read_csv(sensor_data_file, names=['timings', column], header=None) + df['timestamp'] = df['timings'] + if df.empty: + df = df.set_index('timestamp') + return df timestampstart = float(df['timestamp'][0]) - df['timestamp'] = (df['timestamp'][1:len(df)]).astype(float) + timestampstart + df['timestamp'] = (df['timestamp'][1:len(df)]).astype(float) + timestampstart df = df.drop([0]) df[column] = df[column].astype(float) df = df.set_index('timestamp') - + else: raise ValueError( "sensor has an invalid name: {}".format(sensor)) @@ -84,6 +94,10 @@ def pull_data(data_configuration, device, sensor, container, columns_to_download participant_data = pd.DataFrame(columns=columns_to_download.values()) participant_data.set_index('timestamp', inplace=True) + with open('config.yaml', 'r') as stream: + config = yaml.load(stream, Loader=yaml.FullLoader) + cr_ibi_provider = config['EMPATICA_INTER_BEAT_INTERVAL']['PROVIDERS']['CR'] + available_zipfiles = list((Path(data_configuration["FOLDER"]) / Path(device)).rglob("*.zip")) if len(available_zipfiles) == 0: warnings.warn("There were no zip files in: {}. If you were expecting data for this participant the [EMPATICA][DEVICE_IDS] key in their participant file is missing the pid".format((Path(data_configuration["FOLDER"]) / Path(device)))) @@ -94,7 +108,13 @@ def pull_data(data_configuration, device, sensor, container, columns_to_download listOfFileNames = zipFile.namelist() for fileName in listOfFileNames: if fileName == sensor_csv: - participant_data = pd.concat([participant_data, extract_empatica_data(zipFile.read(fileName), sensor)], axis=0) + if sensor == "EMPATICA_INTER_BEAT_INTERVAL" and cr_ibi_provider.get('PATCH_WITH_BVP', False): + participant_data = \ + pd.concat([participant_data, patch_ibi_with_bvp(zipFile.read('IBI.csv'), zipFile.read('BVP.csv'))], axis=0) + #print("patch with ibi") + else: + participant_data = pd.concat([participant_data, extract_empatica_data(zipFile.read(fileName), sensor)], axis=0) + #print("no patching") warning = False if warning: warnings.warn("We could not find a zipped file for {} in {} (we tried to find {})".format(sensor, zipFile, sensor_csv)) @@ -105,4 +125,53 @@ def pull_data(data_configuration, device, sensor, container, columns_to_download participant_data["device_id"] = device return(participant_data) +def patch_ibi_with_bvp(ibi_data, bvp_data): + ibi_data_file = BytesIO(ibi_data).getvalue().decode('utf-8') + ibi_data_file = StringIO(ibi_data_file) + + # Begin with the cr-features part + try: + ibi_data, ibi_start_timestamp = empatica2d_to_array(ibi_data_file) + except IndexError as e: + # Checks whether IBI.csv is empty + df_test = pd.read_csv(ibi_data_file, names=['timings', 'inter_beat_interval'], header=None) + if df_test.empty: + df_test['timestamp'] = df_test['timings'] + df_test = df_test.set_index('timestamp') + return df_test + else: + raise IndexError("Something went wrong with indices. Error that was previously caught:\n", repr(e)) + + bvp_data_file = BytesIO(bvp_data).getvalue().decode('utf-8') + bvp_data_file = StringIO(bvp_data_file) + + bvp_data, bvp_start_timestamp, sample_rate = empatica1d_to_array(bvp_data_file) + + hrv_time_and_freq_features, sample, bvp_rr, bvp_timings, peak_indx = \ + get_HRV_features(bvp_data, ma=False, + detrend=False, m_deternd=False, low_pass=False, winsorize=True, + winsorize_value=25, hampel_fiter=False, median_filter=False, + mod_z_score_filter=True, sampling=64, feature_names=['meanHr']) + + ibi_timings, ibi_rr = get_patched_ibi_with_bvp(ibi_data[0], ibi_data[1], bvp_timings, bvp_rr) + + df = \ + pd.DataFrame(np.array([ibi_timings, ibi_rr]).transpose(), columns=['timestamp', 'inter_beat_interval']) + df.loc[-1] = [ibi_start_timestamp, 'IBI'] # adding a row + df.index = df.index + 1 # shifting index + df = df.sort_index() # sorting by index + + # Repeated as in extract_empatica_data for IBI + df['timings'] = df['timestamp'] + timestampstart = float(df['timestamp'][0]) + df['timestamp'] = (df['timestamp'][1:len(df)]).astype(float) + timestampstart + df = df.drop([0]) + df['inter_beat_interval'] = df['inter_beat_interval'].astype(float) + df = df.set_index('timestamp') + + # format timestamps + df.index *= 1000 + df.index = df.index.astype(int) + return(df) + # print(pull_data({'FOLDER': 'data/external/empatica'}, "e01", "EMPATICA_accelerometer", {'TIMESTAMP': 'timestamp', 'DEVICE_ID': 'device_id', 'DOUBLE_VALUES_0': 'x', 'DOUBLE_VALUES_1': 'y', 'DOUBLE_VALUES_2': 'z'})) \ No newline at end of file diff --git a/src/data/streams/empatica_zip/format.yaml b/src/data/streams/empatica_zip/format.yaml index d8f0aac9..90180c13 100644 --- a/src/data/streams/empatica_zip/format.yaml +++ b/src/data/streams/empatica_zip/format.yaml @@ -50,6 +50,7 @@ EMPATICA_INTER_BEAT_INTERVAL: TIMESTAMP: timestamp DEVICE_ID: device_id INTER_BEAT_INTERVAL: inter_beat_interval + TIMINGS: timings MUTATION: COLUMN_MAPPINGS: SCRIPTS: # List any python or r scripts that mutate your raw data diff --git a/src/data/streams/rapids_columns.yaml b/src/data/streams/rapids_columns.yaml index 09b67566..94faddee 100644 --- a/src/data/streams/rapids_columns.yaml +++ b/src/data/streams/rapids_columns.yaml @@ -227,6 +227,7 @@ EMPATICA_INTER_BEAT_INTERVAL: - TIMESTAMP - DEVICE_ID - INTER_BEAT_INTERVAL + - TIMINGS EMPATICA_TAGS: - TIMESTAMP diff --git a/src/features/__init__.py b/src/features/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/features/cr_features_helper_methods.py b/src/features/cr_features_helper_methods.py new file mode 100644 index 00000000..9e96c497 --- /dev/null +++ b/src/features/cr_features_helper_methods.py @@ -0,0 +1,59 @@ +import pandas as pd +import numpy as np +import math as m + +import sys + +def extract_second_order_features(intraday_features, so_features_names, prefix=""): + + if prefix: + groupby_cols = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime'] + else: + groupby_cols = ['local_segment'] + + if not intraday_features.empty: + so_features = pd.DataFrame() + #print(intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).nsmallest()) + if "mean" in so_features_names: + so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).mean().add_suffix("_SO_mean")], axis=1) + + if "median" in so_features_names: + so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).median().add_suffix("_SO_median")], axis=1) + + if "sd" in so_features_names: + so_features = pd.concat([so_features, intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols).std().add_suffix("_SO_sd")], axis=1) + + if "nlargest" in so_features_names: # largest 5 -- maybe there is a faster groupby solution? + for column in intraday_features.loc[:, ~intraday_features.columns.isin(groupby_cols+[prefix+"level_1"])]: + so_features[column+"_SO_nlargest"] = intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols)[column].apply(lambda x: x.nlargest(5).mean()) + + if "nsmallest" in so_features_names: # smallest 5 -- maybe there is a faster groupby solution? + for column in intraday_features.loc[:, ~intraday_features.columns.isin(groupby_cols+[prefix+"level_1"])]: + so_features[column+"_SO_nsmallest"] = intraday_features.drop(prefix+"level_1", axis=1).groupby(groupby_cols)[column].apply(lambda x: x.nsmallest(5).mean()) + + if "count_windows" in so_features_names: + so_features["SO_windowsCount"] = intraday_features.groupby(groupby_cols).count()[prefix+"level_1"] + + # numPeaksNonZero specialized for EDA sensor + if "eda_num_peaks_non_zero" in so_features_names and prefix+"numPeaks" in intraday_features.columns: + so_features[prefix+"SO_numPeaksNonZero"] = intraday_features.groupby(groupby_cols)[prefix+"numPeaks"].apply(lambda x: (x!=0).sum()) + + # numWindowsNonZero specialized for BVP and IBI sensors + if "hrv_num_windows_non_nan" in so_features_names and prefix+"meanHr" in intraday_features.columns: + so_features[prefix+"SO_numWindowsNonNaN"] = intraday_features.groupby(groupby_cols)[prefix+"meanHr"].apply(lambda x: (~np.isnan(x)).sum()) + + so_features.reset_index(inplace=True) + + else: + so_features = pd.DataFrame(columns=groupby_cols) + + return so_features + +def get_sample_rate(data): # To-Do get the sample rate information from the file's metadata + try: + timestamps_diff = data['timestamp'].diff().dropna().mean() + print("Timestamp diff:", timestamps_diff) + except: + raise Exception("Error occured while trying to get the mean sample rate from the data.") + + return m.ceil(1000/timestamps_diff) \ No newline at end of file diff --git a/src/features/empatica_accelerometer/cr/main.py b/src/features/empatica_accelerometer/cr/main.py new file mode 100644 index 00000000..77d18bfe --- /dev/null +++ b/src/features/empatica_accelerometer/cr/main.py @@ -0,0 +1,71 @@ +import pandas as pd +from scipy.stats import entropy + +from cr_features.helper_functions import convert_to2d, accelerometer_features, frequency_features +from cr_features.calculate_features_old import calculateFeatures +from cr_features.calculate_features import calculate_features +from cr_features_helper_methods import extract_second_order_features + +import sys + +def extract_acc_features_from_intraday_data(acc_intraday_data, features, window_length, time_segment, filter_data_by_segment): + acc_intraday_features = pd.DataFrame(columns=["local_segment"] + features) + + if not acc_intraday_data.empty: + sample_rate = 32 + + acc_intraday_data = filter_data_by_segment(acc_intraday_data, time_segment) + + if not acc_intraday_data.empty: + + acc_intraday_features = pd.DataFrame() + + # apply methods from calculate features module + if window_length is None: + acc_intraday_features = \ + acc_intraday_data.groupby('local_segment').apply(lambda x: calculate_features( \ + convert_to2d(x['double_values_0'], x.shape[0]), \ + convert_to2d(x['double_values_1'], x.shape[0]), \ + convert_to2d(x['double_values_2'], x.shape[0]), \ + fs=sample_rate, feature_names=features, show_progress=False)) + else: + acc_intraday_features = \ + acc_intraday_data.groupby('local_segment').apply(lambda x: calculate_features( \ + convert_to2d(x['double_values_0'], window_length*sample_rate), \ + convert_to2d(x['double_values_1'], window_length*sample_rate), \ + convert_to2d(x['double_values_2'], window_length*sample_rate), \ + fs=sample_rate, feature_names=features, show_progress=False)) + + acc_intraday_features.reset_index(inplace=True) + + return acc_intraday_features + + + +def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): + acc_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) + + requested_intraday_features = provider["FEATURES"] + + calc_windows = kwargs.get('calc_windows', False) + + if provider["WINDOWS"]["COMPUTE"] and calc_windows: + requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] + else: + requested_window_length = None + + # name of the features this function can compute + base_intraday_features_names = accelerometer_features + frequency_features + # the subset of requested features this function can compute + intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names)) + + # extract features from intraday data + acc_intraday_features = extract_acc_features_from_intraday_data(acc_intraday_data, intraday_features_to_compute, + requested_window_length, time_segment, filter_data_by_segment) + + if calc_windows: + so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"] + acc_second_order_features = extract_second_order_features(acc_intraday_features, so_features_names) + return acc_intraday_features, acc_second_order_features + + return acc_intraday_features \ No newline at end of file diff --git a/src/features/empatica_blood_volume_pulse/cr/main.py b/src/features/empatica_blood_volume_pulse/cr/main.py new file mode 100644 index 00000000..1e7df9b2 --- /dev/null +++ b/src/features/empatica_blood_volume_pulse/cr/main.py @@ -0,0 +1,73 @@ +import pandas as pd +from sklearn.preprocessing import StandardScaler + +from cr_features.helper_functions import convert_to2d, hrv_features +from cr_features.hrv import extract_hrv_features_2d_wrapper +from cr_features_helper_methods import extract_second_order_features + +import sys + +# pd.set_option('display.max_rows', 1000) +pd.set_option('display.max_columns', None) + +def extract_bvp_features_from_intraday_data(bvp_intraday_data, features, window_length, time_segment, filter_data_by_segment): + bvp_intraday_features = pd.DataFrame(columns=["local_segment"] + features) + + if not bvp_intraday_data.empty: + sample_rate = 64 + + bvp_intraday_data = filter_data_by_segment(bvp_intraday_data, time_segment) + + if not bvp_intraday_data.empty: + + bvp_intraday_features = pd.DataFrame() + + # apply methods from calculate features module + if window_length is None: + bvp_intraday_features = \ + bvp_intraday_data.groupby('local_segment').apply(\ + lambda x: + extract_hrv_features_2d_wrapper( + convert_to2d(x['blood_volume_pulse'], x.shape[0]), + sampling=sample_rate, hampel_fiter=False, median_filter=False, mod_z_score_filter=True, feature_names=features)) + + else: + bvp_intraday_features = \ + bvp_intraday_data.groupby('local_segment').apply(\ + lambda x: + extract_hrv_features_2d_wrapper( + convert_to2d(x['blood_volume_pulse'], window_length*sample_rate), + sampling=sample_rate, hampel_fiter=False, median_filter=False, mod_z_score_filter=True, feature_names=features)) + + bvp_intraday_features.reset_index(inplace=True) + + return bvp_intraday_features + + +def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): + bvp_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) + + requested_intraday_features = provider["FEATURES"] + + calc_windows = kwargs.get('calc_windows', False) + + if provider["WINDOWS"]["COMPUTE"] and calc_windows: + requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] + else: + requested_window_length = None + + # name of the features this function can compute + base_intraday_features_names = hrv_features + # the subset of requested features this function can compute + intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names)) + + # extract features from intraday data + bvp_intraday_features = extract_bvp_features_from_intraday_data(bvp_intraday_data, intraday_features_to_compute, + requested_window_length, time_segment, filter_data_by_segment) + + if calc_windows: + so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"] + bvp_second_order_features = extract_second_order_features(bvp_intraday_features, so_features_names) + return bvp_intraday_features, bvp_second_order_features + + return bvp_intraday_features \ No newline at end of file diff --git a/src/features/empatica_electrodermal_activity/cr/main.py b/src/features/empatica_electrodermal_activity/cr/main.py new file mode 100644 index 00000000..0b09f02b --- /dev/null +++ b/src/features/empatica_electrodermal_activity/cr/main.py @@ -0,0 +1,78 @@ +import pandas as pd +import numpy as np +from scipy.stats import entropy + +from cr_features.helper_functions import convert_to2d, gsr_features +from cr_features.calculate_features import calculate_features +from cr_features.gsr import extractGsrFeatures2D +from cr_features_helper_methods import extract_second_order_features + +import sys + +#pd.set_option('display.max_columns', None) +#pd.set_option('display.max_rows', None) +#np.seterr(invalid='ignore') + + +def extract_eda_features_from_intraday_data(eda_intraday_data, features, window_length, time_segment, filter_data_by_segment): + eda_intraday_features = pd.DataFrame(columns=["local_segment"] + features) + + if not eda_intraday_data.empty: + sample_rate = 4 + + eda_intraday_data = filter_data_by_segment(eda_intraday_data, time_segment) + + if not eda_intraday_data.empty: + + eda_intraday_features = pd.DataFrame() + + # apply methods from calculate features module + if window_length is None: + eda_intraday_features = \ + eda_intraday_data.groupby('local_segment').apply(\ + lambda x: extractGsrFeatures2D(convert_to2d(x['electrodermal_activity'], x.shape[0]), sampleRate=sample_rate, featureNames=features, + threshold=.01, offset=1, riseTime=5, decayTime=15)) + else: + eda_intraday_features = \ + eda_intraday_data.groupby('local_segment').apply(\ + lambda x: extractGsrFeatures2D(convert_to2d(x['electrodermal_activity'], window_length*sample_rate), sampleRate=sample_rate, featureNames=features, + threshold=.01, offset=1, riseTime=5, decayTime=15)) + + eda_intraday_features.reset_index(inplace=True) + + return eda_intraday_features + + +def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): + eda_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) + + requested_intraday_features = provider["FEATURES"] + + calc_windows = kwargs.get('calc_windows', False) + + if provider["WINDOWS"]["COMPUTE"] and calc_windows: + requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] + else: + requested_window_length = None + + # name of the features this function can compute + base_intraday_features_names = gsr_features + # the subset of requested features this function can compute + intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names)) + + # extract features from intraday data + eda_intraday_features = extract_eda_features_from_intraday_data(eda_intraday_data, intraday_features_to_compute, + requested_window_length, time_segment, filter_data_by_segment) + + if calc_windows: + if provider["WINDOWS"]["IMPUTE_NANS"]: + eda_intraday_features[eda_intraday_features["numPeaks"] == 0] = \ + eda_intraday_features[eda_intraday_features["numPeaks"] == 0].fillna(0) + pd.set_option('display.max_columns', None) + + so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"] + eda_second_order_features = extract_second_order_features(eda_intraday_features, so_features_names) + + return eda_intraday_features, eda_second_order_features + + return eda_intraday_features \ No newline at end of file diff --git a/src/features/empatica_inter_beat_interval/cr/main.py b/src/features/empatica_inter_beat_interval/cr/main.py new file mode 100644 index 00000000..803bf3a8 --- /dev/null +++ b/src/features/empatica_inter_beat_interval/cr/main.py @@ -0,0 +1,79 @@ +import pandas as pd +from sklearn.preprocessing import StandardScaler +import numpy as np + +from cr_features.helper_functions import convert_ibi_to2d_time, hrv_features +from cr_features.hrv import extract_hrv_features_2d_wrapper, get_HRV_features +from cr_features_helper_methods import extract_second_order_features + +import math +import sys + +# pd.set_option('display.max_rows', 1000) +pd.set_option('display.max_columns', None) + + +def extract_ibi_features_from_intraday_data(ibi_intraday_data, features, window_length, time_segment, filter_data_by_segment): + ibi_intraday_features = pd.DataFrame(columns=["local_segment"] + features) + + if not ibi_intraday_data.empty: + + ibi_intraday_data = filter_data_by_segment(ibi_intraday_data, time_segment) + + if not ibi_intraday_data.empty: + + ibi_intraday_features = pd.DataFrame() + + # apply methods from calculate features module + if window_length is None: + ibi_intraday_features = \ + ibi_intraday_data.groupby('local_segment').apply(\ + lambda x: + extract_hrv_features_2d_wrapper( + signal_2D = \ + convert_ibi_to2d_time(x[['timings', 'inter_beat_interval']], math.ceil(x['timings'].iloc[-1]))[0], + ibi_timings = \ + convert_ibi_to2d_time(x[['timings', 'inter_beat_interval']], math.ceil(x['timings'].iloc[-1]))[1], + sampling=None, hampel_fiter=False, median_filter=False, mod_z_score_filter=True, feature_names=features)) + else: + ibi_intraday_features = \ + ibi_intraday_data.groupby('local_segment').apply(\ + lambda x: + extract_hrv_features_2d_wrapper( + signal_2D = convert_ibi_to2d_time(x[['timings', 'inter_beat_interval']], window_length)[0], + ibi_timings = convert_ibi_to2d_time(x[['timings', 'inter_beat_interval']], window_length)[1], + sampling=None, hampel_fiter=False, median_filter=False, mod_z_score_filter=True, feature_names=features)) + + ibi_intraday_features.reset_index(inplace=True) + + return ibi_intraday_features + + +def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): + ibi_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) + + requested_intraday_features = provider["FEATURES"] + + calc_windows = kwargs.get('calc_windows', False) + + if provider["WINDOWS"]["COMPUTE"] and calc_windows: + requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] + else: + requested_window_length = None + + # name of the features this function can compute + base_intraday_features_names = hrv_features + # the subset of requested features this function can compute + intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names)) + + # extract features from intraday data + ibi_intraday_features = extract_ibi_features_from_intraday_data(ibi_intraday_data, intraday_features_to_compute, + requested_window_length, time_segment, filter_data_by_segment) + + if calc_windows: + so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"] + ibi_second_order_features = extract_second_order_features(ibi_intraday_features, so_features_names) + + return ibi_intraday_features, ibi_second_order_features + + return ibi_intraday_features \ No newline at end of file diff --git a/src/features/empatica_temperature/cr/main.py b/src/features/empatica_temperature/cr/main.py new file mode 100644 index 00000000..36e720bd --- /dev/null +++ b/src/features/empatica_temperature/cr/main.py @@ -0,0 +1,65 @@ +import pandas as pd +from scipy.stats import entropy + +from cr_features.helper_functions import convert_to2d, generic_features +from cr_features.calculate_features_old import calculateFeatures +from cr_features.calculate_features import calculate_features +from cr_features_helper_methods import extract_second_order_features + +import sys + +def extract_temp_features_from_intraday_data(temperature_intraday_data, features, window_length, time_segment, filter_data_by_segment): + temperature_intraday_features = pd.DataFrame(columns=["local_segment"] + features) + + if not temperature_intraday_data.empty: + sample_rate = 4 + + temperature_intraday_data = filter_data_by_segment(temperature_intraday_data, time_segment) + + if not temperature_intraday_data.empty: + + temperature_intraday_features = pd.DataFrame() + + # apply methods from calculate features module + if window_length is None: + temperature_intraday_features = \ + temperature_intraday_data.groupby('local_segment').apply(\ + lambda x: calculate_features(convert_to2d(x['temperature'], x.shape[0]), fs=sample_rate, feature_names=features, show_progress=False)) + else: + temperature_intraday_features = \ + temperature_intraday_data.groupby('local_segment').apply(\ + lambda x: calculate_features(convert_to2d(x['temperature'], window_length*sample_rate), fs=sample_rate, feature_names=features, show_progress=False)) + + + temperature_intraday_features.reset_index(inplace=True) + + return temperature_intraday_features + + +def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): + temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) + + requested_intraday_features = provider["FEATURES"] + + calc_windows = kwargs.get('calc_windows', False) + + if provider["WINDOWS"]["COMPUTE"] and calc_windows: + requested_window_length = provider["WINDOWS"]["WINDOW_LENGTH"] + else: + requested_window_length = None + + # name of the features this function can compute + base_intraday_features_names = generic_features + # the subset of requested features this function can compute + intraday_features_to_compute = list(set(requested_intraday_features) & set(base_intraday_features_names)) + + # extract features from intraday data + temperature_intraday_features = extract_temp_features_from_intraday_data(temperature_intraday_data, intraday_features_to_compute, + requested_window_length, time_segment, filter_data_by_segment) + + if calc_windows: + so_features_names = provider["WINDOWS"]["SECOND_ORDER_FEATURES"] + temperature_second_order_features = extract_second_order_features(temperature_intraday_features, so_features_names) + return temperature_intraday_features, temperature_second_order_features + + return temperature_intraday_features \ No newline at end of file diff --git a/src/features/entry.py b/src/features/entry.py index 2f65f8ad..288ba168 100644 --- a/src/features/entry.py +++ b/src/features/entry.py @@ -1,12 +1,16 @@ import pandas as pd from utils.utils import fetch_provider_features, run_provider_cleaning_script +import sys + sensor_data_files = dict(snakemake.input) provider = snakemake.params["provider"] provider_key = snakemake.params["provider_key"] sensor_key = snakemake.params["sensor_key"] +calc_windows = True if (provider.get("WINDOWS", False) and provider["WINDOWS"].get("COMPUTE", False)) else False + if sensor_key == "all_cleaning_individual" or sensor_key == "all_cleaning_overall": # Data cleaning sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files) @@ -14,6 +18,18 @@ else: # Extract sensor features del sensor_data_files["time_segments_labels"] time_segments_file = snakemake.input["time_segments_labels"] - sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file) -sensor_features.to_csv(snakemake.output[0], index=False) \ No newline at end of file + if calc_windows: + window_features, second_order_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=True) + + window_features.to_csv(snakemake.output[1], index=False) + second_order_features.to_csv(snakemake.output[0], index=False) + + elif "empatica" in sensor_key: + pd.DataFrame().to_csv(snakemake.output[1], index=False) + + if not calc_windows: + sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False) + +if not calc_windows: + sensor_features.to_csv(snakemake.output[0], index=False) diff --git a/src/features/standardization/main.py b/src/features/standardization/main.py new file mode 100644 index 00000000..b6026d0b --- /dev/null +++ b/src/features/standardization/main.py @@ -0,0 +1,37 @@ +import pandas as pd +import numpy as np +from sklearn.preprocessing import StandardScaler + +import sys + +sensor_data_files = dict(snakemake.input) + +provider = snakemake.params["provider"] +provider_key = snakemake.params["provider_key"] +sensor_key = snakemake.params["sensor_key"] + +pd.set_option('display.max_columns', None) + +if provider_key == "cr": + sys.path.append('/rapids/src/features/') + from cr_features_helper_methods import extract_second_order_features + + provider_main = snakemake.params["provider_main"] + prefix = sensor_key + "_" + provider_key + "_" + + windows_features_data = pd.read_csv(sensor_data_files["windows_features_data"]) + excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime', prefix + "level_1"] + windows_features_data.loc[:, ~windows_features_data.columns.isin(excluded_columns)] = \ + StandardScaler().fit_transform(windows_features_data.loc[:, ~windows_features_data.columns.isin(excluded_columns)]) + + windows_features_data.to_csv(snakemake.output[1], index=False) + + if provider_main["WINDOWS"]["COMPUTE"] and "SECOND_ORDER_FEATURES" in provider_main["WINDOWS"]: + so_features_names = provider_main["WINDOWS"]["SECOND_ORDER_FEATURES"] + windows_so_features_data = extract_second_order_features(windows_features_data, so_features_names, prefix) + windows_so_features_data.to_csv(snakemake.output[0], index=False) + else: + pd.DataFrame().to_csv(snakemake.output[0], index=False) + +else: + pass #To-Do for the rest of the sensors. \ No newline at end of file diff --git a/src/features/utils/utils.py b/src/features/utils/utils.py index d2986699..7303ac86 100644 --- a/src/features/utils/utils.py +++ b/src/features/utils/utils.py @@ -88,11 +88,13 @@ def chunk_episodes(sensor_episodes): return merged_sensor_episodes -def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file): +def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file, calc_windows=False): import pandas as pd from importlib import import_module, util sensor_features = pd.DataFrame(columns=["local_segment"]) + sensor_fo_features = pd.DataFrame(columns=["local_segment"]) + sensor_so_features = pd.DataFrame(columns=["local_segment"]) time_segments_labels = pd.read_csv(time_segments_file, header=0) if "FEATURES" not in provider: raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(sensor_key.upper(), provider_key.upper())) @@ -106,23 +108,57 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file time_segments_labels["label"] = [""] for time_segment in time_segments_labels["label"]: print("{} Processing {} {} {}".format(rapids_log_tag, sensor_key, provider_key, time_segment)) - features = feature_function(sensor_data_files, time_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes) - if not "local_segment" in features.columns: - raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)") - features.columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features.columns] - sensor_features = pd.concat([sensor_features, features], axis=0, sort=False) + + features = feature_function(sensor_data_files, time_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes, calc_windows=calc_windows) + + # In case of calc_window = True + if isinstance(features, tuple): + if not "local_segment" in features[0].columns or not "local_segment" in features[1].columns: + raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)") + features[0].columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features[0].columns] + features[1].columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features[1].columns] + if not features[0].empty: + sensor_fo_features = pd.concat([sensor_fo_features, features[0]], axis=0, sort=False) + if not features[1].empty: + sensor_so_features = pd.concat([sensor_so_features, features[1]], axis=0, sort=False) + else: + if not "local_segment" in features.columns: + raise ValueError("The dataframe returned by the " + sensor_key + " provider '" + provider_key + "' is missing the 'local_segment' column added by the 'filter_data_by_segment()' function. Check the provider script is using such function and is not removing 'local_segment' by accident (" + provider["SRC_SCRIPT"] + ")\n The 'local_segment' column is used to index a provider's features (each row corresponds to a different time segment instance (e.g. 2020-01-01, 2020-01-02, 2020-01-03, etc.)") + features.columns = ["{}{}".format("" if col.startswith("local_segment") else (sensor_key + "_"+ provider_key + "_"), col) for col in features.columns] + sensor_features = pd.concat([sensor_features, features], axis=0, sort=False) else: for feature in provider["FEATURES"]: sensor_features[feature] = None - segment_colums = pd.DataFrame() - sensor_features['local_segment'] = sensor_features['local_segment'].str.replace(r'_RR\d+SS', '') - split_segemnt_columns = sensor_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True) - new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"]) - segment_colums[["local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]] = new_segment_columns - for i in range(segment_colums.shape[1]): - sensor_features.insert(1 + i, segment_colums.columns[i], segment_colums[segment_colums.columns[i]]) + + if calc_windows: + segment_colums = pd.DataFrame() + sensor_fo_features['local_segment'] = sensor_fo_features['local_segment'].str.replace(r'_RR\d+SS', '') + split_segemnt_columns = sensor_fo_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True) + new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"]) + segment_colums[["local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]] = new_segment_columns + for i in range(segment_colums.shape[1]): + sensor_fo_features.insert(1 + i, segment_colums.columns[i], segment_colums[segment_colums.columns[i]]) + + segment_colums = pd.DataFrame() + sensor_so_features['local_segment'] = sensor_so_features['local_segment'].str.replace(r'_RR\d+SS', '') + split_segemnt_columns = sensor_so_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True) + new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"]) + segment_colums[["local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]] = new_segment_columns + for i in range(segment_colums.shape[1]): + sensor_so_features.insert(1 + i, segment_colums.columns[i], segment_colums[segment_colums.columns[i]]) - return sensor_features + return sensor_fo_features, sensor_so_features + + else: + segment_colums = pd.DataFrame() + sensor_features['local_segment'] = sensor_features['local_segment'].str.replace(r'_RR\d+SS', '') + split_segemnt_columns = sensor_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True) + new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"]) + segment_colums[["local_segment_label", "local_segment_start_datetime", "local_segment_end_datetime"]] = new_segment_columns + for i in range(segment_colums.shape[1]): + sensor_features.insert(1 + i, segment_colums.columns[i], segment_colums[segment_colums.columns[i]]) + + return sensor_features def run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files): from importlib import import_module, util @@ -132,4 +168,4 @@ def run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data cleaning_function = getattr(cleaning_module, provider_key.lower() + "_cleaning") sensor_features = cleaning_function(sensor_data_files, provider) - return sensor_features + return sensor_features \ No newline at end of file diff --git a/tests/scripts/missing_vals.py b/tests/scripts/missing_vals.py new file mode 100644 index 00000000..acbae0bb --- /dev/null +++ b/tests/scripts/missing_vals.py @@ -0,0 +1,39 @@ +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + + +participant = "p031" +all_sensors = ["eda", "bvp", "ibi", "temp", "acc"] + +for sensor in all_sensors: + + if sensor == "eda": + path = f"/rapids/data/interim/{participant}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_cr_windows.csv" + elif sensor == "bvp": + path = f"/rapids/data/interim/{participant}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_cr_windows.csv" + elif sensor == "ibi": + path = f"/rapids/data/interim/{participant}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_cr_windows.csv" + elif sensor == "acc": + path = f"/rapids/data/interim/{participant}/empatica_accelerometer_features/empatica_accelerometer_python_cr_windows.csv" + elif sensor == "temp": + path = f"/rapids/data/interim/{participant}/empatica_temperature_features/empatica_temperature_python_cr_windows.csv" + else: + path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv" # all features all participants + + + df = pd.read_csv(path) + print(df) + is_NaN = df.isnull() + row_has_NaN = is_NaN.any(axis=1) + rows_with_NaN = df[row_has_NaN] + + print("All rows:", len(df.index)) + print("\nCount NaN vals:", rows_with_NaN.size) + print("\nDf mean:") + print(df.mean()) + + sns.heatmap(df.isna(), cbar=False) + plt.savefig(f'{sensor}_{participant}_windows_NaN.png', bbox_inches='tight') + + diff --git a/tests/scripts/zero_vals.py b/tests/scripts/zero_vals.py new file mode 100644 index 00000000..0b02a07c --- /dev/null +++ b/tests/scripts/zero_vals.py @@ -0,0 +1,48 @@ +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt +from itertools import compress + + +participant = "p031" +sensor = "eda" + +if sensor == "eda": + path = f"/rapids/data/interim/{participant}/empatica_electrodermal_activity_features/empatica_electrodermal_activity_python_cr_windows.csv" +elif sensor == "bvp": + path = f"/rapids/data/interim/{participant}/empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_python_cr_windows.csv" +elif sensor == "ibi": + path = f"/rapids/data/interim/{participant}/empatica_inter_beat_interval_features/empatica_inter_beat_interval_python_cr_windows.csv" +elif sensor == "acc": + path = f"/rapids/data/interim/{participant}/empatica_accelerometer_features/empatica_accelerometer_python_cr_windows.csv" +elif sensor == "temp": + path = f"/rapids/data/interim/{participant}/empatica_temperature_features/empatica_temperature_python_cr_windows.csv" +else: + path = "/rapids/data/processed/features/all_participants/all_sensor_features.csv" # all features all participants" + + +df = pd.read_csv(path) +df_num_peaks_zero = df[df["empatica_electrodermal_activity_cr_numPeaks"] == 0] +columns_num_peaks_zero = df_num_peaks_zero.columns[df_num_peaks_zero.isna().any()].tolist() + +df_num_peaks_non_zero = df[df["empatica_electrodermal_activity_cr_numPeaks"] != 0] +df_num_peaks_non_zero = df_num_peaks_non_zero[columns_num_peaks_zero] + +pd.set_option('display.max_columns', None) + +df_q = pd.DataFrame() +for col in df_num_peaks_non_zero: + df_q[col] = pd.to_numeric(pd.cut(df_num_peaks_non_zero[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False)) + +sns.heatmap(df_q) +plt.savefig(f'eda_{participant}_window_non_zero_peak_other_vals.png', bbox_inches='tight') +plt.close() + +# Filter columns that do not contain 0 +non_zero_cols = list(compress(columns_num_peaks_zero, df_num_peaks_non_zero.all().tolist())) +zero_cols = list(set(columns_num_peaks_zero) - set(non_zero_cols)) + +print(non_zero_cols, "\n") +print(zero_cols) + +