Initial support for a config schema

feature/plugin_sentimental
JulioV 2021-02-01 13:01:43 -05:00
parent 12fffb9c63
commit 84a8a93082
3 changed files with 187 additions and 0 deletions

View File

@ -1,4 +1,6 @@
from snakemake.utils import validate
configfile: "config.yaml"
validate(config, "tools/config.schema.yaml")
include: "rules/common.smk"
include: "rules/renv.smk"
include: "rules/preprocessing.smk"

View File

@ -0,0 +1,17 @@
# Adapted from https://bitbucket.org/snakemake/snakemake/pull-requests/291/schema-based-validation/diff
from jsonschema import Draft7Validator
import yaml
import collections
class OrderedLoader(yaml.Loader):
pass
def construct_mapping(loader, node):
loader.flatten_mapping(node)
return collections.OrderedDict(loader.construct_pairs(node))
OrderedLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping)
with open("tools/config.schema.yaml") as f:
data = yaml.load(f, OrderedLoader)
Draft7Validator.check_schema(data)
print("Schema is OK")

View File

@ -0,0 +1,168 @@
$schema: "http://json-schema.org/draft-07/schema#"
description: RAPIDS configuration schema
required:
- DATABASE_GROUP
- TIMEZONE
- CREATE_PARTICIPANT_FILES
- PHONE_CALLS
- PHONE_KEYBOARD
- PHONE_LOCATIONS
definitions:
PROVIDER:
type: object
required: [COMPUTE, SRC_FOLDER, SRC_LANGUAGE, FEATURES]
properties:
COMPUTE:
type: boolean
FEATURES:
type: [array, object]
SRC_FOLDER:
type: string
SRC_LANGUAGE:
type: string
enum: [python, r]
properties:
DATABASE_GROUP:
type: string
TIMEZONE:
type: string
CREATE_PARTICIPANT_FILES:
type: object
required: [SOURCE]
properties:
SOURCE:
type: object
required: [TYPE]
properties:
TYPE:
type: string
enum: [AWARE_DEVICE_TABLE, CSV_FILE]
DATABASE_GROUP:
type: string
CSV_FILE_PATH:
type: string
pattern: "^.*\\.csv$"
TIMEZONE:
type: string
PHONE_SECTION:
type: object
properties:
ADD:
type: boolean
DEVICE_ID_COLUMN:
type: string
IGNORED_DEVICE_IDS:
type: array
items:
type: string
FITBIT_SECTION:
properties:
ADD:
type: boolean
DEVICE_ID_COLUMN:
type: string
IGNORED_DEVICE_IDS:
type: array
items:
type: string
PHONE_CALLS:
type: object
required: [TABLE, PROVIDERS]
properties:
TABLE:
type: string
PROVIDERS:
type: ["null", object]
properties:
RAPIDS:
allOf:
- $ref: "#/definitions/PROVIDER"
- properties:
CALL_TYPES:
type: array
items:
type: string
enum: [missed, incoming, outgoing]
FEATURES:
type: object
required: [missed, incoming, outgoing]
properties:
missed:
type: array
uniqueItems: True
items:
type:
string
enum: [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact]
incoming:
type: array
uniqueItems: True
items:
type:
string
enum: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
outgoing:
type: array
uniqueItems: True
items:
type:
string
enum: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
additionalProperties:
$ref: "#/definitions/PROVIDER"
PHONE_KEYBOARD:
type: object
required: [TABLE, PROVIDERS]
properties:
TABLE:
type: string
PROVIDERS:
type: ["null", object]
additionalProperties:
$ref: "#/definitions/PROVIDER"
PHONE_LOCATIONS:
type: object
required: [TABLE, LOCATIONS_TO_USE, FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD, PROVIDERS]
properties:
TABLE:
type: string
LOCATIONS_TO_USE:
type: string
enum: [ALL, GPS, ALL_RESAMPLED, FUSED_RESAMPLED]
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD:
type: integer
exclusiveMinimum: 0
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION:
type: integer
exclusiveMinimum: 0
PROVIDERS:
type: ["null", object]
properties:
DORYAB:
allOf:
- $ref: "#/definitions/PROVIDER"
- properties:
FEATURES:
type: array
uniqueItems: True
items:
type: string
enum: [locationvariance,loglocationvariance,totaldistance,averagespeed,varspeed,circadianmovement,numberofsignificantplaces,numberlocationtransitions,radiusgyration,timeattop1location,timeattop2location,timeattop3location,movingtostaticratio,outlierstimepercent,maxlengthstayatclusters,minlengthstayatclusters,meanlengthstayatclusters,stdlengthstayatclusters,locationentropy,normalizedlocationentropy]
ACCURACY_LIMIT:
type: integer
exclusiveMinimum: 0
additionalProperties:
$ref: "#/definitions/PROVIDER"