Merge branch 'master' of https://github.com/carissalow/rapids
commit
f292ba3bff
|
@ -1,6 +1,7 @@
|
|||
source("packrat/init.R")
|
||||
|
||||
library(dplyr)
|
||||
library(tidyr)
|
||||
|
||||
filter_by_day_segment <- function(data, day_segment) {
|
||||
if(day_segment %in% c("morning", "afternoon", "evening", "night"))
|
||||
|
@ -38,4 +39,6 @@ for(requested_feature in requested_features){
|
|||
features <- merge(features, feature, by="local_date", all = TRUE)
|
||||
}
|
||||
|
||||
features <- features %>% mutate_at(vars(contains("countscansmostuniquedevice")), list( ~ replace_na(., 0)))
|
||||
|
||||
write.csv(features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -0,0 +1,20 @@
|
|||
configfile: "config.yaml"
|
||||
include: "../rules/packrat.snakefile"
|
||||
include: "../rules/preprocessing.snakefile"
|
||||
include: "../rules/features.snakefile"
|
||||
include: "../rules/models.snakefile"
|
||||
include: "../rules/reports.snakefile"
|
||||
include: "../rules/mystudy.snakefile" # You can add snakfiles with rules tailored to your project
|
||||
|
||||
rule all:
|
||||
input:
|
||||
expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||
expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
|
||||
expand("data/processed/{pid}/sms_{sms_type}_{day_segment}.csv",
|
||||
pid=config["PIDS"],
|
||||
sms_type = config["SMS"]["TYPES"],
|
||||
day_segment = config["SMS"]["DAY_SEGMENTS"]),
|
||||
|
||||
rule clean:
|
||||
shell:
|
||||
"rm -rf data/raw/* && rm -rf data/interim/* && rm -rf data/processed/* && rm -rf reports/figures/* && rm -rf reports/*.zip && rm -rf reports/compliance/*"
|
|
@ -0,0 +1,2 @@
|
|||
"local_date","sms_received_daily_countmostfrequentcontact","sms_received_daily_count","sms_received_daily_distinctcontacts","sms_received_daily_timefirstsms","sms_received_daily_timelastsms"
|
||||
"2017-03-27",1,1,1,16,16
|
|
|
@ -0,0 +1,2 @@
|
|||
"local_date","sms_sent_daily_countmostfrequentcontact","sms_sent_daily_count","sms_sent_daily_distinctcontacts","sms_sent_daily_timefirstsms","sms_sent_daily_timelastsms"
|
||||
"2017-05-14",1,1,1,18.10,18.45
|
|
|
@ -0,0 +1,3 @@
|
|||
"timestamp","device_id","message_type","trace"
|
||||
1490644832746,"00cf6eff-7d67-4b7d-be0c-9e7596c9640f",1,"8c2037e569c3e5c9574601071b7c8268b3158ff8"
|
||||
1494800874206,"00cf6eff-7d67-4b7d-be0c-9e7596c9640f",2,"e3cbb6efdccf0d9e3c961b019ee0bda4f299d22f"
|
|
|
@ -0,0 +1,41 @@
|
|||
import unittest
|
||||
import hashlib
|
||||
import pandas as pd
|
||||
import utils
|
||||
import yaml
|
||||
import os
|
||||
|
||||
class TestSensorFeatures(unittest.TestCase):
|
||||
|
||||
# Hack to run code in positional order (not 100% full proof)
|
||||
unittest.TestLoader.sortTestMethodsUsing = lambda self, a, b: (a < b) - (a > b)
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
# Runs once to Setup env
|
||||
global configs
|
||||
with open(r'tests/settings/testing_config.yaml') as file:
|
||||
configs = yaml.full_load(file)
|
||||
|
||||
|
||||
def test_sensors_files_exist(self):
|
||||
# Loop through the file_list dictionary and check if the files exist.
|
||||
|
||||
file_lists = utils.generate_sensor_file_lists(configs)
|
||||
for each in file_lists:
|
||||
for out_file, _ in file_lists[each]:
|
||||
self.assertEqual(os.path.exists(out_file), 1)
|
||||
|
||||
|
||||
def test_sensors_features_calculations(self):
|
||||
calc_files = utils.generate_sensor_file_lists(configs)
|
||||
for each in calc_files:
|
||||
for act_result, exp_result in calc_files[each]:
|
||||
df_act = pd.read_csv(act_result)
|
||||
df_exp = pd.read_csv(exp_result)
|
||||
pd.testing.assert_frame_equal(df_exp, df_act, obj=df_exp)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
unittest.main()
|
|
@ -0,0 +1,124 @@
|
|||
from snakemake.io import expand
|
||||
import os
|
||||
import subprocess
|
||||
import shutil
|
||||
import yaml
|
||||
|
||||
def setUp():
|
||||
# This utility setUp is intended to be run once before all tests are run
|
||||
# It is intended the set up all the necessary fake data in order to test
|
||||
# the rules and scipt files.
|
||||
|
||||
# Load the configuration file to get basic parameters
|
||||
with open(r'tests/settings/testing_config.yaml') as file:
|
||||
configs = yaml.full_load(file)
|
||||
|
||||
# Get the settings
|
||||
pids = configs['PIDS']
|
||||
|
||||
|
||||
# Reset the test data files
|
||||
for pid in pids:
|
||||
# Remove old data files if they exist
|
||||
despath = os.path.join('data/raw/', pid)
|
||||
if os.path.exists(despath) and os.path.isdir(despath):
|
||||
shutil.rmtree(despath)
|
||||
|
||||
# Remove old processed files if they exist
|
||||
propath = os.path.join('data/processed/', pid)
|
||||
if os.path.exists(propath) and os.path.isdir(propath):
|
||||
shutil.rmtree(propath)
|
||||
|
||||
# Create a fresh PID data directories necessary for this round of tests
|
||||
os.mkdir(despath)
|
||||
|
||||
# Copy necessary data files
|
||||
srcpath = os.path.join('tests/data/raw', pid)
|
||||
srcfiles = os.listdir(srcpath)
|
||||
for srcfile in srcfiles:
|
||||
srcfile_path = os.path.join(srcpath, srcfile)
|
||||
desfile = os.path.join(despath, srcfile)
|
||||
shutil.copy(srcfile_path, desfile)
|
||||
|
||||
return configs
|
||||
|
||||
|
||||
|
||||
def generate_file_list(configs, sensor):
|
||||
# Generates the list of files that would be produced for one sensor
|
||||
# i.e. The sensor passed into the function.
|
||||
|
||||
# Initialize string of file path for both expected and actual metric values
|
||||
act_str = "data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
|
||||
exp_str = "tests/data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
|
||||
|
||||
sensor_cap = sensor.upper()
|
||||
if 'DAY_SEGMENTS' and 'FEATURES' in configs[sensor_cap]:
|
||||
sensor_type = []
|
||||
if 'TYPES' in configs[sensor_cap]:
|
||||
for each in configs[sensor_cap]['TYPES']:
|
||||
sensor_type.append(each+'_')
|
||||
|
||||
|
||||
act_file_list = expand(act_str,pid=configs["PIDS"],
|
||||
sensor = sensor,
|
||||
sensor_type = sensor_type,
|
||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
||||
|
||||
exp_file_list = expand(exp_str,pid=configs["PIDS"],
|
||||
sensor = sensor,
|
||||
sensor_type = sensor_type,
|
||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
||||
|
||||
return zip(act_file_list, exp_file_list)
|
||||
|
||||
|
||||
def generate_sensor_file_lists(configs):
|
||||
# Go through the configs and select those sensors with DAY_SEGMENTS,
|
||||
# optionally TYPES then create expected files Return dictionary with
|
||||
# list of file paths of expected and actual files for each sensor
|
||||
# listed in the config file.
|
||||
|
||||
# Initialize string of file path for both expected and actual metric values
|
||||
act_str = "data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
|
||||
exp_str = "tests/data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
|
||||
|
||||
# Get all the SENSORS in the config.yaml files
|
||||
sensors = configs['SENSORS']
|
||||
sensor_file_lists = {}
|
||||
|
||||
# Loop though all sensors and create the actual and expected file paths
|
||||
for sensor in sensors:
|
||||
if sensor == 'messages':
|
||||
sensor = 'sms'
|
||||
sensor_cap = 'SMS'
|
||||
else:
|
||||
sensor_cap = sensor.upper()
|
||||
if 'DAY_SEGMENTS' in configs[sensor_cap]:
|
||||
sensor_type = []
|
||||
if 'TYPES' in configs[sensor_cap]:
|
||||
for each in configs[sensor_cap]['TYPES']:
|
||||
sensor_type.append(each+'_')
|
||||
|
||||
if sensor_type:
|
||||
act_file_list = expand(act_str, pid=configs["PIDS"],
|
||||
sensor = sensor,
|
||||
sensor_type = sensor_type,
|
||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
||||
exp_file_list = expand(exp_str, pid=configs["PIDS"],
|
||||
sensor = sensor,
|
||||
sensor_type = sensor_type,
|
||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
||||
else:
|
||||
act_file_list = expand(act_str, pid=configs["PIDS"],
|
||||
sensor = sensor,
|
||||
sensor_type = '',
|
||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
||||
exp_file_list = expand(exp_str, pid=configs["PIDS"],
|
||||
sensor = sensor,
|
||||
sensor_type = '',
|
||||
day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
|
||||
|
||||
sensor_file_lists[sensor_cap] = list(zip(act_file_list,exp_file_list))
|
||||
|
||||
return sensor_file_lists
|
|
@ -0,0 +1,5 @@
|
|||
directory: ./
|
||||
configfile: ./tests/settings/testing_config.yaml
|
||||
snakefile: ./tests/Snakefile
|
||||
cores: 1
|
||||
forcerun: sms_features
|
|
@ -0,0 +1,19 @@
|
|||
# Valid database table name
|
||||
SENSORS: [messages]
|
||||
|
||||
# Test Participant data to include in the unit testing
|
||||
# You must create a file for each participant
|
||||
# named pXXX containing their device_id
|
||||
PIDS: [test01]
|
||||
|
||||
# Global var with common day segments
|
||||
DAY_SEGMENTS: &day_segments
|
||||
[daily]
|
||||
|
||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||
SMS:
|
||||
TYPES : [received, sent]
|
||||
FEATURES:
|
||||
received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
|
||||
sent: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
|
||||
DAY_SEGMENTS: *day_segments
|
Loading…
Reference in New Issue