From 84a8a9308203489550103e9ccc3d5141f1f6a03a Mon Sep 17 00:00:00 2001 From: JulioV Date: Mon, 1 Feb 2021 13:01:43 -0500 Subject: [PATCH 1/5] Initial support for a config schema --- Snakefile | 2 + tools/check_schema.py | 17 ++++ tools/config.schema.yaml | 168 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 187 insertions(+) create mode 100644 tools/check_schema.py create mode 100644 tools/config.schema.yaml diff --git a/Snakefile b/Snakefile index 122b1421..68fc10e7 100644 --- a/Snakefile +++ b/Snakefile @@ -1,4 +1,6 @@ +from snakemake.utils import validate configfile: "config.yaml" +validate(config, "tools/config.schema.yaml") include: "rules/common.smk" include: "rules/renv.smk" include: "rules/preprocessing.smk" diff --git a/tools/check_schema.py b/tools/check_schema.py new file mode 100644 index 00000000..ab21a283 --- /dev/null +++ b/tools/check_schema.py @@ -0,0 +1,17 @@ +# Adapted from https://bitbucket.org/snakemake/snakemake/pull-requests/291/schema-based-validation/diff +from jsonschema import Draft7Validator +import yaml +import collections +class OrderedLoader(yaml.Loader): + pass + +def construct_mapping(loader, node): + loader.flatten_mapping(node) + return collections.OrderedDict(loader.construct_pairs(node)) + +OrderedLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping) +with open("tools/config.schema.yaml") as f: + data = yaml.load(f, OrderedLoader) + +Draft7Validator.check_schema(data) +print("Schema is OK") \ No newline at end of file diff --git a/tools/config.schema.yaml b/tools/config.schema.yaml new file mode 100644 index 00000000..fb931d63 --- /dev/null +++ b/tools/config.schema.yaml @@ -0,0 +1,168 @@ +$schema: "http://json-schema.org/draft-07/schema#" +description: RAPIDS configuration schema +required: + - DATABASE_GROUP + - TIMEZONE + - CREATE_PARTICIPANT_FILES + - PHONE_CALLS + - PHONE_KEYBOARD + - PHONE_LOCATIONS + +definitions: + PROVIDER: + type: object + required: [COMPUTE, SRC_FOLDER, SRC_LANGUAGE, FEATURES] + properties: + COMPUTE: + type: boolean + FEATURES: + type: [array, object] + SRC_FOLDER: + type: string + SRC_LANGUAGE: + type: string + enum: [python, r] + +properties: + DATABASE_GROUP: + type: string + + TIMEZONE: + type: string + + CREATE_PARTICIPANT_FILES: + type: object + required: [SOURCE] + properties: + SOURCE: + type: object + required: [TYPE] + properties: + TYPE: + type: string + enum: [AWARE_DEVICE_TABLE, CSV_FILE] + DATABASE_GROUP: + type: string + CSV_FILE_PATH: + type: string + pattern: "^.*\\.csv$" + TIMEZONE: + type: string + PHONE_SECTION: + type: object + properties: + ADD: + type: boolean + DEVICE_ID_COLUMN: + type: string + IGNORED_DEVICE_IDS: + type: array + items: + type: string + FITBIT_SECTION: + properties: + ADD: + type: boolean + DEVICE_ID_COLUMN: + type: string + IGNORED_DEVICE_IDS: + type: array + items: + type: string + + PHONE_CALLS: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + CALL_TYPES: + type: array + items: + type: string + enum: [missed, incoming, outgoing] + FEATURES: + type: object + required: [missed, incoming, outgoing] + properties: + missed: + type: array + uniqueItems: True + items: + type: + string + enum: [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact] + incoming: + type: array + uniqueItems: True + items: + type: + string + enum: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact] + outgoing: + type: array + uniqueItems: True + items: + type: + string + enum: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact] + additionalProperties: + $ref: "#/definitions/PROVIDER" + + PHONE_KEYBOARD: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + additionalProperties: + $ref: "#/definitions/PROVIDER" + + PHONE_LOCATIONS: + type: object + required: [TABLE, LOCATIONS_TO_USE, FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD, PROVIDERS] + properties: + TABLE: + type: string + LOCATIONS_TO_USE: + type: string + enum: [ALL, GPS, ALL_RESAMPLED, FUSED_RESAMPLED] + FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: + type: integer + exclusiveMinimum: 0 + FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: + type: integer + exclusiveMinimum: 0 + PROVIDERS: + type: ["null", object] + properties: + DORYAB: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + type: array + uniqueItems: True + items: + type: string + enum: [locationvariance,loglocationvariance,totaldistance,averagespeed,varspeed,circadianmovement,numberofsignificantplaces,numberlocationtransitions,radiusgyration,timeattop1location,timeattop2location,timeattop3location,movingtostaticratio,outlierstimepercent,maxlengthstayatclusters,minlengthstayatclusters,meanlengthstayatclusters,stdlengthstayatclusters,locationentropy,normalizedlocationentropy] + ACCURACY_LIMIT: + type: integer + exclusiveMinimum: 0 + additionalProperties: + $ref: "#/definitions/PROVIDER" + + + + + + From 327b015206c5552cdbb7a969e6b6c92c7b7ec466 Mon Sep 17 00:00:00 2001 From: Weiyu Date: Tue, 2 Feb 2021 17:53:49 -0500 Subject: [PATCH 2/5] Add validation for config keys --- config.yaml | 2 +- renv/activate.R | 3 + tools/config.schema.yaml | 792 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 796 insertions(+), 1 deletion(-) diff --git a/config.yaml b/config.yaml index 63277eb2..04626fc9 100644 --- a/config.yaml +++ b/config.yaml @@ -155,7 +155,7 @@ PHONE_BLUETOOTH: SRC_FOLDER: "rapids" # inside src/features/phone_bluetooth SRC_LANGUAGE: "r" DORYAB: - COMPUTE: FALSE + COMPUTE: False FEATURES: ALL: DEVICES: ["countscans", "uniquedevices", "meanscans", "stdscans"] diff --git a/renv/activate.R b/renv/activate.R index d3b271d8..3459bdd2 100644 --- a/renv/activate.R +++ b/renv/activate.R @@ -18,6 +18,9 @@ local({ if(grepl("Darwin", Sys.info()["sysname"], fixed = TRUE) & grepl("ARM64", Sys.info()["version"], fixed = TRUE)) # M1 Macs Sys.setenv("TZDIR" = file.path(R.home(), "share", "zoneinfo")) + # set timezone library + #Sys.setenv("TZDIR" = file.path(R.home(), "share", "zoneinfo")) + # signal that we've consented to use renv options(renv.consent = TRUE) diff --git a/tools/config.schema.yaml b/tools/config.schema.yaml index fb931d63..31031e35 100644 --- a/tools/config.schema.yaml +++ b/tools/config.schema.yaml @@ -3,10 +3,38 @@ description: RAPIDS configuration schema required: - DATABASE_GROUP - TIMEZONE + - PIDS - CREATE_PARTICIPANT_FILES + - PHONE_DATA_CONFIGURATION + - PHONE_ACCELEROMETER + - PHONE_ACTIVITY_RECOGNITION + - PHONE_APPLICATIONS_CRASHES + - PHONE_APPLICATIONS_FOREGROUND + - PHONE_APPLICATIONS_NOTIFICATIONS + - PHONE_AWARE_LOG + - PHONE_BATTERY + - PHONE_BLUETOOTH - PHONE_CALLS + - PHONE_CONVERSATION + - PHONE_DATA_YIELD - PHONE_KEYBOARD - PHONE_LOCATIONS + - PHONE_MESSAGES + - PHONE_SCREEN + - PHONE_WIFI_CONNECTED + - PHONE_WIFI_VISIBLE + - FITBIT_DATA_CONFIGURATION + - FITBIT_DATA_YIELD + - FITBIT_HEARTRATE_SUMMARY + - FITBIT_HEARTRATE_INTRADAY + - FITBIT_SLEEP_SUMMARY + - FITBIT_STEPS_SUMMARY + - FITBIT_STEPS_INTRADAY + - HISTOGRAM_PHONE_DATA_YIELD + - HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT + - HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT + - HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT + - HEATMAP_FEATURE_CORRELATION_MATRIX definitions: PROVIDER: @@ -22,7 +50,68 @@ definitions: SRC_LANGUAGE: type: string enum: [python, r] + + DORYAB_BLUETOOTH_FEATURE: + type: object + required: [DEVICES, SCANS_MOST_FREQUENT_DEVICE, SCANS_LEAST_FREQUENT_DEVICE] + properties: + DEVICES: + type: array + uniqueItems: True + items: + type: string + enum: ["countscans", "uniquedevices", "meanscans", "stdscans"] + SCANS_MOST_FREQUENT_DEVICE: + type: array + uniqueItems: True + items: + type: string + enum: ["withinsegments", "acrosssegments", "acrossdataset"] + SCANS_LEASE_FREQUENT_DEVICE: + type: array + uniqueItems: True + items: + type: string + enum: ["withinsegments", "acrosssegments", "acrossdataset"] + APPLICATION_CATEGORIES_FEATURE: + type: object + required: [CATALOGUE_SOURCE, CATALOGUE_FILE, UPDATE_CATALOGUE_FILE, SCRAPE_MISSING_CATEGORIES] + properties: + CATALOGUE_SOURCE: + type: string + enum: ["FILE", "GOOGLE"] + CATALOGUE_FILE: + type: string + pattern: "^.*\\.csv$" + UPDATE_CATALOGUE_FILE: + type: boolean + SCRAPE_MISSING_CATEGORIES: + type: boolean + + DATA_CONFIGURATION: + type: object + required: [SOURCE, TIMEZONE] + properties: + SOURCE: + type: object + required: [TYPE, DATABASE_GROUP, DEVICE_ID_COLUMN] + properties: + TYPE: + type: string + DATABASE_GROUP: + type: string + DEVICE_ID_COLUMN: + type: string + TIMEZONE: + type: object + required: [TYPE, VALUE] + properties: + TYPE: + type: string + VALUE: + type: string + properties: DATABASE_GROUP: type: string @@ -30,6 +119,12 @@ properties: TIMEZONE: type: string + PIDS: + type: array + uniqueItems: True + items: + type: string + CREATE_PARTICIPANT_FILES: type: object required: [SOURCE] @@ -70,6 +165,256 @@ properties: items: type: string + + + + PHONE_DATA_CONFIGURATION: + allOf: + - $ref: "#/definitions/DATA_CONFIGURATION" + + PHONE_ACCELEROMETER: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + type: array + uniqueItems: True + items: + type: string + enum: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] + PANDA: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + VALID_SENSED_MINUTES: + type: boolean + FEATURES: + type: object + required: [exertional_activity_episode, nonexertional_activity_episode] + properties: + exertional_activity_episode: + type: array + uniqueItems: True + items: + type: string + enum: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"] + nonexertional_activity_episode: + type: array + uniqueItems: True + items: + type: string + enum: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"] + additionalProperties: + $ref: "#/definitions/PROVIDER" + + PHONE_ACTIVITY_RECOGNITION: + type: object + required: [TABLE, EPISODE_THRESHOLD_BETWEEN_ROWS, PROVIDERS] + properties: + TABLE: + type: object + required: [ANDROID, IOS] + properties: + ANDROID: + type: string + IOS: + type: string + EPISODE_THRESHOLD_BETWEENROWS: + type: integer + maximum: 5 + minimum: 0 + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"] + ACTIVITY_CLASSES: + type: object + required: [STATIONARY, MOBILE, VEHICLE] + properties: + STATIONARY: + type: array + uniqueItems: True + items: + type: string + enum: ["still", "tilting"] + MOBILE: + type: array + uniqueItems: True + items: + type: string + enum: ["on_foot", "walking", "running", "on_bicycle"] + VEHICLE: + type: array + uniqueItems: True + items: + type: string + enum: ["in_vehicle"] + additionalProperties: + $ref: "#/definitions/PROVIDER" + + PHONE_APPLICATIONS_CRASHES: + type: object + required: [TABLE, APPLICATION_CATEGORIES, PROVIDERS] + properties: + TABLE: + type: string + APPLICATION_CATEGORIES: + allOf: + - $ref: "#/definitions/APPLICATION_CATEGORIES_FEATURE" + PROVIDERS: + additionalProperties: + $ref: "#/definitions/PROVIDER" + + PHONE_APPLICATIONS_FOREGROUND: + type: object + required: [TABLE, APPLICATION_CATEGORIES, PROVIDERS] + properties: + TABLE: + type: string + APPLICATION_CATEGORIES: + allOf: + - $ref: "#/definitions/APPLICATION_CATEGORIES_FEATURE" + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + SINGLE_CATEGORIES: + type: array + uniqueItems: True + items: + type: string + MULTIPLE_CATEGORIES: + type: object + additionalProperties: + type: array + uniqueItems: True + items: + type: string + EXCLUDED_CATEGORIES: + type: ["null", array] + uniqueItems: True + items: + type: string + EXCLUDED_APPS: + type: array + uniqueItems: true + items: + type: string + FEATURES: + uniqueItems: True + items: + type: string + enum: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] + + PHONE_APPLICATIONS_NOTIFICATIONS: + type: object + required: [TABLE, APPLICATION_CATEGORIES, PROVIDERS] + properties: + TABLE: + type: string + APPLICATION_CATEGORIES: + allOf: + - $ref: "#/definitions/APPLICATION_CATEGORIES_FEATURE" + PROVIDERS: + type: ["null", object] + additionalProperties: + $ref: "#/definitions/PROVIDER" + + PHONE_AWARE_LOG: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + additionalProperties: + $ref: "#/definitions/PROVIDER" + + PHONE_BATTERY: + type: object + required: [TABLE, PROVIDERS, EPISODE_THRESHOLD_BETWEEN_ROWS] + properties: + TABLE: + type: string + EPISODE_THRESHOLD_BETWEEN_ROWS: + type: integer + maximum: 1440 + minimum: 0 + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"] + additionalProperties: + $ref: "#/definitions/PROVIDER" + + PHONE_BLUETOOTH: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: ["countscans", "uniquedevices", "countscansmostuniquedevice"] + DORYAB: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + type: object + required: [ALL, OWN, OTHERS] + properties: + ALL: + allOf: + - $ref: "#/definitions/DORYAB_BLUETOOTH_FEATURE" + OWN: + allOf: + - $ref: "#/definitions/DORYAB_BLUETOOTH_FEATURE" + OTHERS: + allOf: + - $ref: "#/definitions/DORYAB_BLUETOOTH_FEATURE" + additionalProperties: + $ref: "#/definitions/PROVIDER" + + PHONE_CALLS: type: object required: [TABLE, PROVIDERS] @@ -116,6 +461,73 @@ properties: additionalProperties: $ref: "#/definitions/PROVIDER" + PHONE_CONVERSATION: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: object + required: [ANDROID, IOS] + properties: + ANDROID: + type: string + IOS: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + items: + type: string + enum: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration", + "sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","noisesumenergy", + "noiseavgenergy","noisesdenergy","noiseminenergy","noisemaxenergy","voicesumenergy", + "voiceavgenergy","voicesdenergy","voiceminenergy","voicemaxenergy","silencesensedfraction","noisesensedfraction", + "voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction", + "unknownexpectedfraction","countconversation"] + RECORDING_MINUTES: + type: integer + minimum: 1 + maximum: 1440 + PAUSED_MINUTES: + type: integer + minimum: 1 + maximum: 1440 + additionalProperties: + $ref: "#/definitions/PROVIDER" + + PHONE_DATA_YIELD: + type: object + required: [SENSORS, PROVIDERS] + properties: + SENSORS: + type: ["null", array] + items: + type: string + enum: ["PHONE_ACCELEROMETER", "PHONE_ACTIVITY_RECOGNITION", "PHONE_APPLICATIONS_FOREGROUND", "PHONE_BATTERY", "PHONE_BLUETOOTH", "PHONE_CALLS", "PHONE_CONVERSATION", "PHONE_LIGHT", "PHONE_LOCATIONS", "PHONE_MESSAGES", "PHONE_SCREEN", "PHONE_WIFI_CONNECTED", "PHONE_WIFI_VISIBLE"] + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: [ratiovalidyieldedminutes, ratiovalidyieldedhours] + MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: + type: number + minimum: 0 + maximum: 1 + additionalProperties: + $ref: "#/definitions/PROVIDER" + PHONE_KEYBOARD: type: object required: [TABLE, PROVIDERS] @@ -127,6 +539,27 @@ properties: additionalProperties: $ref: "#/definitions/PROVIDER" + PHONE_LIGHT: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"] + additionalProperties: + $ref: "#/definitions/PROVIDER" + PHONE_LOCATIONS: type: object required: [TABLE, LOCATIONS_TO_USE, FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD, PROVIDERS] @@ -158,11 +591,370 @@ properties: ACCURACY_LIMIT: type: integer exclusiveMinimum: 0 + DBSCAN_EPS: + type: integer + exclusiveMinimum: 0 + DBSCAN_MINSAMPLES: + type: integer + exclusiveMinimum: 0 + THRESHOLD_STATIC: + type: integer + exclusiveMinimum: 0 + MAXIMUM_ROW_GAP: + type: integer + exclusiveMinimum: 0 + MAXIMUM_ROW_DURATION: + type: integer + exclusiveMinimum: 0 + MINUTES_DATA_USED: + type: boolean + CLUSTER_ON: + type: string + enum: ["PARTICIPANT_DATASET", "TIME_SEGMENT"] + CLUSTERING_ALGORITHM: + type: string + enum: ["DBSCAN", "OPTICS"] + + BARNETT: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + type: array + uniqueItems: True + items: + type: string + enum: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"] + ACCURACY_LIMIT: + type: integer + exclusiveMinimum: 0 + TIMEZONE: + type: string + MINUTES_DATA_USED: + type: boolean additionalProperties: $ref: "#/definitions/PROVIDER" + + PHONE_MESSAGES: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + MESSAGES_TYPES: + type: array + uniqueItems: True + items: + type: string + enum: ["received", "sent"] + FEATURES: + type: object + required: [received, sent] + properties: + received: + type: array + uniqueItems: True + items: + type: string + enum: ["count", "distinctcontacts", "timefirstmessage", "timelastmessage", "countmostfrequentcontact"] + + sent: + type: array + uniqueItems: True + items: + type: string + enum: ["count", "distinctcontacts", "timefirstmessage", "timelastmessage", "countmostfrequentcontact"] + + PHONE_SCREEN: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + REFERENCE_HOUR_FIRST_USE: + type: integer + minimum: 0 + maximum: 23 + IGNORE_EPISODES_SHORTER_THAN: + type: integer + minimum: 0 + IGNORE_EPISODES_LONGER_THAN: + type: integer + minimum: 0 + FEATURES: + uniqueItems: True + items: + type: string + enum: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"] + EPISODE_TYPES: + type: array + uniqueItems: True + items: + type: string + enum: ["unlock"] + + PHONE_WIFI_CONNECTED: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: ["countscans", "uniquedevices", "countscansmostuniquedevice"] + additionalProperties: + $ref: "#/definitions/PROVIDER" + + PHONE_WIFI_VISIBLE: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + additionalProperties: + $ref: "#/definitions/PROVIDER" + + FITBIT_DATA_CONFIGURATION: + allOf: + - $ref: "#/definitions/DATA_CONFIGURATION" + - properties: + SOURCE: + properties: + COLUMN_FORMAT: + type: string + enum: ["JSON", "PLAIN_TEXT"] + + FITBIT_DATA_YIELD: + type: object + required: [SENSOR, PROVIDERS] + properties: + SENSOR: + type: string + enum: ["FITBIT_HEARTRATE_INTRADAY"] + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: ["ratiovalidyieldedminutes", "ratiovalidyieldedhours"] + MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: + type: number + minimum: 0 + maximum: 1 + FITBIT_HEARTRATE_SUMMARY: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: ["maxrestinghr", "minrestinghr", "avgrestinghr", "medianrestinghr", "moderestinghr", "stdrestinghr", "diffmaxmoderestinghr", "diffminmoderestinghr", "entropyrestinghr"] + FITBIT_HEARTRATE_INTRADAY: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"] + additionalProperties: + $ref: "#/definitions/PROVIDER" + FITBIT_SLEEP_SUMMARY: + type: object + required: [TABLE, SLEEP_EPISODE_TIMESTAMP, PROVIDERS] + properties: + TABLE: + type: string + SLEEP_EPISODE_TIMESTAMP: + type: string + enum: ["start", "end"] + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: ["countepisode", "avgefficiency", "sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgdurationafterwakeup", "avgdurationasleep", "avgdurationawake", "avgdurationtofallasleep", "avgdurationinbed"] + SLEEP_TYPES: + type: array + uniqueItems: True + items: + type: string + enum: ["main", "nap", "all"] + additionalProperties: + $ref: "#/definitions/PROVIDER" + + FITBIT_STEPS_SUMMARY: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: ["maxsumsteps", "minsumsteps", "avgsumsteps", "mediansumsteps", "stdsumsteps"] + + FITBIT_STEPS_INTRADAY: + type: object + required: [TABLE, PROVIDERS] + properties: + TABLE: + type: string + PROVIDERS: + type: ["null", object] + properties: + RAPIDS: + allOf: + - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + required: [STEPS, SEDENTARY_BOUT, ACTIVE_BOUT] + properties: + STEPS: + type: array + uniqueItems: True + items: + type: string + enum: ["sum", "max", "min", "avg", "std"] + SEDENTARY_BOUT: + type: array + uniqueItems: True + items: + type: string + enum: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration"] + ACTIVE_BOUT: + type: array + uniqueItems: True + items: + type: string + enum: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration"] + THRESHOLD_ACTIVE_BOUT: + type: integer + minimum: 0 + INCLUDE_ZERO_STEP_ROWS: + type: boolean + HISTOGRAM_PHONE_DATA_YIELD: + type: object + required: [PLOT] + properties: + PLOT: + type: boolean + + HEATMAP_PHONE_DATA_YIELD_PER_PARTICIPANT_PER_TIME_SEGMENT: + type: object + required: [PLOT] + properties: + PLOT: + type: boolean + + HEATMAP_SENSORS_PER_MINUTE_PER_TIME_SEGMENT: + type: object + required: [PLOT] + properties: + PLOT: + type: boolean + + HEATMAP_SENSOR_ROW_COUNT_PER_TIME_SEGMENT: + type: object + required: [PLOT, SENSORS] + properties: + PLOT: + type: boolean + SENSORS: + type: array + uniqueItems: True + items: + type: string + enum: ["PHONE_ACCELEROMETER", "PHONE_ACTIVITY_RECOGNITION", "PHONE_APPLICATIONS_FOREGROUND", "PHONE_BATTERY", "PHONE_BLUETOOTH", "PHONE_CALLS", "PHONE_CONVERSATION", "PHONE_LIGHT", "PHONE_LOCATIONS", "PHONE_MESSAGES", "PHONE_SCREEN", "PHONE_WIFI_CONNECTED", "PHONE_WIFI_VISIBLE"] + HEATMAP_FEATURE_CORRELATION_MATRIX: + type: object + required: [PLOT, MIN_ROWS_RATIO, CORR_THRESHOLD, CORR_METHOD] + properties: + PLOT: + type: boolean + MIN_ROWS_RATIO: + type: number + minimum: 0 + maximum: 1 + CORR_THRESHOLD: + type: number + minimum: 0 + maximum: 1 + CORR_METHOD: + type: string + enum: ["pearson", "kendall", "spearman"] From e2d45460f7739a834eecd0f47d334032b17dbec5 Mon Sep 17 00:00:00 2001 From: JulioV Date: Fri, 12 Feb 2021 13:19:00 -0500 Subject: [PATCH 3/5] Apply suggestions from code review --- tools/config.schema.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/config.schema.yaml b/tools/config.schema.yaml index 31031e35..812e2068 100644 --- a/tools/config.schema.yaml +++ b/tools/config.schema.yaml @@ -127,7 +127,7 @@ properties: CREATE_PARTICIPANT_FILES: type: object - required: [SOURCE] + required: [SOURCE, PHONE_SECTION, FITBIT_SECTION] properties: SOURCE: type: object @@ -228,9 +228,9 @@ properties: type: string IOS: type: string - EPISODE_THRESHOLD_BETWEENROWS: + EPISODE_THRESHOLD_BETWEEN_ROWS: type: integer - maximum: 5 + maximum: 1440 minimum: 0 PROVIDERS: type: ["null", object] From 6bad9066f861fec4718b11fb95f439bc4be8c4a4 Mon Sep 17 00:00:00 2001 From: Weiyu Date: Fri, 12 Feb 2021 18:11:43 -0500 Subject: [PATCH 4/5] Added time_segment feature --- tools/config.schema.yaml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/config.schema.yaml b/tools/config.schema.yaml index 812e2068..bff5963d 100644 --- a/tools/config.schema.yaml +++ b/tools/config.schema.yaml @@ -5,6 +5,7 @@ required: - TIMEZONE - PIDS - CREATE_PARTICIPANT_FILES + - TIME_SEGMENTS - PHONE_DATA_CONFIGURATION - PHONE_ACCELEROMETER - PHONE_ACTIVITY_RECOGNITION @@ -165,8 +166,18 @@ properties: items: type: string - - + TIME_SEGMENTS: + type: object + required: [TYPE, FILE, INCLUDE_PAST_PERIODIC_SEGMENTS] + properties: + TYPE: + type: string + enum: ["FREQUENCY", "PERIODIC", "EVENT"] + FILE: + type: string + pattern: "^.*\\.csv$" + INCLUDE_PAST_PERIODIC_SEGMENTS: + type: boolean PHONE_DATA_CONFIGURATION: allOf: From 5abca8bb0f25c93b6edc6dc60edcac0bd3be84d6 Mon Sep 17 00:00:00 2001 From: JulioV Date: Sun, 21 Feb 2021 19:34:42 -0500 Subject: [PATCH 5/5] Missing config validations --- config.yaml | 6 +++--- tools/config.schema.yaml | 25 +++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/config.yaml b/config.yaml index 04626fc9..67f5344e 100644 --- a/config.yaml +++ b/config.yaml @@ -75,7 +75,7 @@ PHONE_ACTIVITY_RECOGNITION: TABLE: ANDROID: plugin_google_activity_recognition IOS: plugin_ios_activity_recognition - EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode. + EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same AR episode. PROVIDERS: RAPIDS: COMPUTE: False @@ -212,7 +212,7 @@ PHONE_DATA_YIELD: RAPIDS: COMPUTE: False FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours] - MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1 representing the number of minutes with at least + MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1, minimum percentage of valid minutes in an hour to be considered valid. SRC_LANGUAGE: "r" SRC_FOLDER: "rapids" # inside src/features/phone_data_yield @@ -335,7 +335,7 @@ FITBIT_DATA_YIELD: RAPIDS: COMPUTE: False FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours] - MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1 representing the number of minutes with at least + MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1, minimum percentage of valid minutes in an hour to be considered valid. SRC_LANGUAGE: "r" SRC_FOLDER: "rapids" # inside src/features/fitbit_data_yield diff --git a/tools/config.schema.yaml b/tools/config.schema.yaml index bff5963d..20cebb9b 100644 --- a/tools/config.schema.yaml +++ b/tools/config.schema.yaml @@ -68,7 +68,7 @@ definitions: items: type: string enum: ["withinsegments", "acrosssegments", "acrossdataset"] - SCANS_LEASE_FREQUENT_DEVICE: + SCANS_LEAST_FREQUENT_DEVICE: type: array uniqueItems: True items: @@ -182,6 +182,13 @@ properties: PHONE_DATA_CONFIGURATION: allOf: - $ref: "#/definitions/DATA_CONFIGURATION" + - properties: + SOURCE: + type: object + properties: + TYPE: + type: string + enum: [DATABASE] PHONE_ACCELEROMETER: type: object @@ -321,6 +328,11 @@ properties: uniqueItems: True items: type: string + SINGLE_APPS: + type: array + uniqueItems: True + items: + type: string EXCLUDED_CATEGORIES: type: ["null", array] uniqueItems: True @@ -519,7 +531,7 @@ properties: type: ["null", array] items: type: string - enum: ["PHONE_ACCELEROMETER", "PHONE_ACTIVITY_RECOGNITION", "PHONE_APPLICATIONS_FOREGROUND", "PHONE_BATTERY", "PHONE_BLUETOOTH", "PHONE_CALLS", "PHONE_CONVERSATION", "PHONE_LIGHT", "PHONE_LOCATIONS", "PHONE_MESSAGES", "PHONE_SCREEN", "PHONE_WIFI_CONNECTED", "PHONE_WIFI_VISIBLE"] + enum: ["PHONE_ACCELEROMETER", "PHONE_ACTIVITY_RECOGNITION", "PHONE_APPLICATIONS_CRASHES", "PHONE_APPLICATIONS_FOREGROUND", "PHONE_APPLICATIONS_NOTIFICATIONS", "PHONE_AWARE_LOG", "PHONE_BATTERY", "PHONE_BLUETOOTH", "PHONE_CALLS", "PHONE_CONVERSATION", "PHONE_KEYBOARD", "PHONE_LIGHT", "PHONE_LOCATIONS", "PHONE_MESSAGES", "PHONE_SCREEN", "PHONE_WIFI_CONNECTED", "PHONE_WIFI_VISIBLE"] PROVIDERS: type: ["null", object] properties: @@ -751,6 +763,12 @@ properties: RAPIDS: allOf: - $ref: "#/definitions/PROVIDER" + - properties: + FEATURES: + uniqueItems: True + items: + type: string + enum: ["countscans", "uniquedevices", "countscansmostuniquedevice"] additionalProperties: $ref: "#/definitions/PROVIDER" @@ -760,6 +778,9 @@ properties: - properties: SOURCE: properties: + TYPE: + type: string + enum: [DATABASE, FILES] COLUMN_FORMAT: type: string enum: ["JSON", "PLAIN_TEXT"]