Update wildcard constraints, schema and entry scripts

data_cleaning
Meng Li 2021-10-22 15:00:01 -04:00
parent 512355ca01
commit 8250416a7f
4 changed files with 74 additions and 58 deletions

View File

@ -963,7 +963,7 @@ rule clean_sensor_features_for_individual_participants:
input: input:
sensor_data = rules.merge_sensor_features_for_individual_participants.output sensor_data = rules.merge_sensor_features_for_individual_participants.output
wildcard_constraints: wildcard_constraints:
pid = config["PIDS"] pid = "("+"|".join(config["PIDS"])+")"
params: params:
provider = lambda wildcards: config["ALL_CLEANING_INDIVIDUAL"]["PROVIDERS"][wildcards.provider_key.upper()], provider = lambda wildcards: config["ALL_CLEANING_INDIVIDUAL"]["PROVIDERS"][wildcards.provider_key.upper()],
provider_key = "{provider_key}", provider_key = "{provider_key}",

View File

@ -9,14 +9,14 @@ provider <- snakemake@params["provider"][["provider"]]
provider_key <- snakemake@params["provider_key"] provider_key <- snakemake@params["provider_key"]
sensor_key <- snakemake@params["sensor_key"] sensor_key <- snakemake@params["sensor_key"]
if("time_segments_labels" %in% names(sensor_data_files)){ if(sensor_key == "all_cleaning_individual" | sensor_key == "all_cleaning_overall"){
# Data cleaning
sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files)
}else{
# Extract sensor features # Extract sensor features
sensor_data_files$time_segments_labels <- NULL sensor_data_files$time_segments_labels <- NULL
time_segments_file <- snakemake@input[["time_segments_labels"]] time_segments_file <- snakemake@input[["time_segments_labels"]]
sensor_features <- fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file) sensor_features <- fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file)
}else{
# Data cleaning
sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files)
} }
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE) write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)

View File

@ -7,13 +7,13 @@ provider = snakemake.params["provider"]
provider_key = snakemake.params["provider_key"] provider_key = snakemake.params["provider_key"]
sensor_key = snakemake.params["sensor_key"] sensor_key = snakemake.params["sensor_key"]
if "time_segments_labels" in sensor_data_files.keys(): if sensor_key == "all_cleaning_individual" or sensor_key == "all_cleaning_overall":
# Data cleaning
sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files)
else:
# Extract sensor features # Extract sensor features
del sensor_data_files["time_segments_labels"] del sensor_data_files["time_segments_labels"]
time_segments_file = snakemake.input["time_segments_labels"] time_segments_file = snakemake.input["time_segments_labels"]
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file) sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, time_segments_file)
else:
# Data cleaning
sensor_features = run_provider_cleaning_script(provider, provider_key, sensor_key, sensor_data_files)
sensor_features.to_csv(snakemake.output[0], index=False) sensor_features.to_csv(snakemake.output[0], index=False)

View File

@ -40,7 +40,19 @@ required:
- ALL_CLEANING_OVERALL - ALL_CLEANING_OVERALL
definitions: definitions:
PROVIDER: FEATURES_PROVIDER:
type: object
required: [COMPUTE, SRC_SCRIPT, FEATURES]
properties:
COMPUTE:
type: boolean
FEATURES:
type: [array, object]
SRC_SCRIPT:
type: string
pattern: "^.*\\.(py|R)$"
CLEANING_PROVIDER:
type: object type: object
required: [COMPUTE, SRC_SCRIPT] required: [COMPUTE, SRC_SCRIPT]
properties: properties:
@ -226,7 +238,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
type: array type: array
@ -236,7 +248,7 @@ properties:
enum: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] enum: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
PANDA: PANDA:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
VALID_SENSED_MINUTES: VALID_SENSED_MINUTES:
type: boolean type: boolean
@ -257,7 +269,7 @@ properties:
type: string type: string
enum: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"] enum: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_ACTIVITY_RECOGNITION: PHONE_ACTIVITY_RECOGNITION:
type: object type: object
@ -280,7 +292,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -310,7 +322,7 @@ properties:
type: string type: string
enum: ["in_vehicle"] enum: ["in_vehicle"]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_APPLICATIONS_CRASHES: PHONE_APPLICATIONS_CRASHES:
type: object type: object
@ -323,7 +335,7 @@ properties:
- $ref: "#/definitions/APPLICATION_CATEGORIES_FEATURE" - $ref: "#/definitions/APPLICATION_CATEGORIES_FEATURE"
PROVIDERS: PROVIDERS:
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_APPLICATIONS_FOREGROUND: PHONE_APPLICATIONS_FOREGROUND:
type: object type: object
@ -339,7 +351,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
SINGLE_CATEGORIES: SINGLE_CATEGORIES:
type: array type: array
@ -410,7 +422,7 @@ properties:
PROVIDERS: PROVIDERS:
type: ["null", object] type: ["null", object]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_BATTERY: PHONE_BATTERY:
type: object type: object
@ -427,7 +439,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -435,7 +447,7 @@ properties:
type: string type: string
enum: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"] enum: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_BLUETOOTH: PHONE_BLUETOOTH:
type: object type: object
@ -448,7 +460,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -457,7 +469,7 @@ properties:
enum: ["countscans", "uniquedevices", "countscansmostuniquedevice"] enum: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
DORYAB: DORYAB:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
type: object type: object
@ -473,7 +485,7 @@ properties:
allOf: allOf:
- $ref: "#/definitions/DORYAB_BLUETOOTH_FEATURE" - $ref: "#/definitions/DORYAB_BLUETOOTH_FEATURE"
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_CALLS: PHONE_CALLS:
@ -487,7 +499,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES_TYPE: FEATURES_TYPE:
type: string type: string
@ -523,7 +535,7 @@ properties:
string string
enum: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact] enum: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_CONVERSATION: PHONE_CONVERSATION:
type: object type: object
@ -542,7 +554,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
items: items:
@ -562,7 +574,7 @@ properties:
minimum: 1 minimum: 1
maximum: 1440 maximum: 1440
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_DATA_YIELD: PHONE_DATA_YIELD:
type: object type: object
@ -578,7 +590,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -590,7 +602,7 @@ properties:
minimum: 0 minimum: 0
maximum: 1 maximum: 1
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_KEYBOARD: PHONE_KEYBOARD:
type: object type: object
@ -601,7 +613,7 @@ properties:
PROVIDERS: PROVIDERS:
type: ["null", object] type: ["null", object]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_LIGHT: PHONE_LIGHT:
type: object type: object
@ -614,7 +626,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -622,7 +634,7 @@ properties:
type: string type: string
enum: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"] enum: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_LOCATIONS: PHONE_LOCATIONS:
type: object type: object
@ -647,7 +659,7 @@ properties:
properties: properties:
DORYAB: DORYAB:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
type: array type: array
@ -687,7 +699,7 @@ properties:
BARNETT: BARNETT:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
type: array type: array
@ -701,7 +713,7 @@ properties:
MINUTES_DATA_USED: MINUTES_DATA_USED:
type: boolean type: boolean
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_LOG: PHONE_LOG:
type: object type: object
@ -718,7 +730,7 @@ properties:
PROVIDERS: PROVIDERS:
type: ["null", object] type: ["null", object]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_MESSAGES: PHONE_MESSAGES:
type: object type: object
@ -731,7 +743,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
MESSAGES_TYPES: MESSAGES_TYPES:
type: array type: array
@ -768,7 +780,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
REFERENCE_HOUR_FIRST_USE: REFERENCE_HOUR_FIRST_USE:
type: integer type: integer
@ -803,7 +815,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -811,7 +823,7 @@ properties:
type: string type: string
enum: ["countscans", "uniquedevices", "countscansmostuniquedevice"] enum: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
PHONE_WIFI_VISIBLE: PHONE_WIFI_VISIBLE:
type: object type: object
@ -824,7 +836,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -832,7 +844,7 @@ properties:
type: string type: string
enum: ["countscans", "uniquedevices", "countscansmostuniquedevice"] enum: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
FITBIT_DATA_STREAMS: FITBIT_DATA_STREAMS:
type: object type: object
@ -892,7 +904,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -916,7 +928,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -935,7 +947,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -943,7 +955,7 @@ properties:
type: string type: string
enum: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"] enum: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
FITBIT_SLEEP_SUMMARY: FITBIT_SLEEP_SUMMARY:
type: object type: object
@ -956,7 +968,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -970,7 +982,7 @@ properties:
type: string type: string
enum: ["main", "nap", "all"] enum: ["main", "nap", "all"]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
FITBIT_SLEEP_INTRADAY: FITBIT_SLEEP_INTRADAY:
type: object type: object
@ -983,7 +995,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
type: object type: object
@ -1039,7 +1051,7 @@ properties:
enum: [main, nap, all] enum: [main, nap, all]
PRICE: PRICE:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -1081,7 +1093,7 @@ properties:
minimum: 0 minimum: 0
maximum: 1439 maximum: 1439
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
FITBIT_STEPS_SUMMARY: FITBIT_STEPS_SUMMARY:
type: object type: object
@ -1094,7 +1106,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -1132,7 +1144,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
required: [STEPS, SEDENTARY_BOUT, ACTIVE_BOUT] required: [STEPS, SEDENTARY_BOUT, ACTIVE_BOUT]
@ -1173,7 +1185,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/FEATURES_PROVIDER"
- properties: - properties:
FEATURES: FEATURES:
uniqueItems: True uniqueItems: True
@ -1200,7 +1212,7 @@ properties:
type: string type: string
enum: [MIDNIGHT, START_OF_THE_SEGMENT] enum: [MIDNIGHT, START_OF_THE_SEGMENT]
additionalProperties: additionalProperties:
$ref: "#/definitions/PROVIDER" $ref: "#/definitions/FEATURES_PROVIDER"
HISTOGRAM_PHONE_DATA_YIELD: HISTOGRAM_PHONE_DATA_YIELD:
type: object type: object
@ -1266,7 +1278,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/CLEANING_PROVIDER"
- properties: - properties:
IMPUTE_SELECTED_EVENT_FEATURES: IMPUTE_SELECTED_EVENT_FEATURES:
type: object type: object
@ -1306,6 +1318,8 @@ properties:
type: number type: number
minimum: 0 minimum: 0
maximum: 1 maximum: 1
additionalProperties:
$ref: "#/definitions/CLEANING_PROVIDER"
ALL_CLEANING_OVERALL: ALL_CLEANING_OVERALL:
type: object type: object
@ -1316,7 +1330,7 @@ properties:
properties: properties:
RAPIDS: RAPIDS:
allOf: allOf:
- $ref: "#/definitions/PROVIDER" - $ref: "#/definitions/CLEANING_PROVIDER"
- properties: - properties:
IMPUTE_SELECTED_EVENT_FEATURES: IMPUTE_SELECTED_EVENT_FEATURES:
type: object type: object
@ -1356,3 +1370,5 @@ properties:
type: number type: number
minimum: 0 minimum: 0
maximum: 1 maximum: 1
additionalProperties:
$ref: "#/definitions/CLEANING_PROVIDER"