diff --git a/src/features/bluetooth_features.R b/src/features/bluetooth_features.R index 8027dd10..750dbd53 100644 --- a/src/features/bluetooth_features.R +++ b/src/features/bluetooth_features.R @@ -1,6 +1,7 @@ source("packrat/init.R") library(dplyr) +library(tidyr) filter_by_day_segment <- function(data, day_segment) { if(day_segment %in% c("morning", "afternoon", "evening", "night")) @@ -38,4 +39,6 @@ for(requested_feature in requested_features){ features <- merge(features, feature, by="local_date", all = TRUE) } +features <- features %>% mutate_at(vars(contains("countscansmostuniquedevice")), list( ~ replace_na(., 0))) + write.csv(features, snakemake@output[[1]], row.names = FALSE) \ No newline at end of file diff --git a/tests/Snakefile b/tests/Snakefile new file mode 100644 index 00000000..860caecb --- /dev/null +++ b/tests/Snakefile @@ -0,0 +1,20 @@ +configfile: "config.yaml" +include: "../rules/packrat.snakefile" +include: "../rules/preprocessing.snakefile" +include: "../rules/features.snakefile" +include: "../rules/models.snakefile" +include: "../rules/reports.snakefile" +include: "../rules/mystudy.snakefile" # You can add snakfiles with rules tailored to your project + +rule all: + input: + expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]), + expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]), + expand("data/processed/{pid}/sms_{sms_type}_{day_segment}.csv", + pid=config["PIDS"], + sms_type = config["SMS"]["TYPES"], + day_segment = config["SMS"]["DAY_SEGMENTS"]), + +rule clean: + shell: + "rm -rf data/raw/* && rm -rf data/interim/* && rm -rf data/processed/* && rm -rf reports/figures/* && rm -rf reports/*.zip && rm -rf reports/compliance/*" \ No newline at end of file diff --git a/tests/data/interim/.gitkeep b/tests/data/interim/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/processed/test01/sms_received_daily.csv b/tests/data/processed/test01/sms_received_daily.csv new file mode 100644 index 00000000..acbf1bfb --- /dev/null +++ b/tests/data/processed/test01/sms_received_daily.csv @@ -0,0 +1,2 @@ +"local_date","sms_received_daily_countmostfrequentcontact","sms_received_daily_count","sms_received_daily_distinctcontacts","sms_received_daily_timefirstsms","sms_received_daily_timelastsms" +"2017-03-27",1,1,1,16,16 diff --git a/tests/data/processed/test01/sms_sent_daily.csv b/tests/data/processed/test01/sms_sent_daily.csv new file mode 100644 index 00000000..2b44c1f1 --- /dev/null +++ b/tests/data/processed/test01/sms_sent_daily.csv @@ -0,0 +1,2 @@ +"local_date","sms_sent_daily_countmostfrequentcontact","sms_sent_daily_count","sms_sent_daily_distinctcontacts","sms_sent_daily_timefirstsms","sms_sent_daily_timelastsms" +"2017-05-14",1,1,1,18.10,18.45 diff --git a/tests/data/raw/.gitkeep b/tests/data/raw/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/raw/test01/messages_raw.csv b/tests/data/raw/test01/messages_raw.csv new file mode 100644 index 00000000..41d440d3 --- /dev/null +++ b/tests/data/raw/test01/messages_raw.csv @@ -0,0 +1,3 @@ +"timestamp","device_id","message_type","trace" +1490644832746,"00cf6eff-7d67-4b7d-be0c-9e7596c9640f",1,"8c2037e569c3e5c9574601071b7c8268b3158ff8" +1494800874206,"00cf6eff-7d67-4b7d-be0c-9e7596c9640f",2,"e3cbb6efdccf0d9e3c961b019ee0bda4f299d22f" diff --git a/tests/scripts/test_sensor_features.py b/tests/scripts/test_sensor_features.py new file mode 100644 index 00000000..6cbadaca --- /dev/null +++ b/tests/scripts/test_sensor_features.py @@ -0,0 +1,41 @@ +import unittest +import hashlib +import pandas as pd +import utils +import yaml +import os + +class TestSensorFeatures(unittest.TestCase): + + # Hack to run code in positional order (not 100% full proof) + unittest.TestLoader.sortTestMethodsUsing = lambda self, a, b: (a < b) - (a > b) + + @classmethod + def setUpClass(cls): + # Runs once to Setup env + global configs + with open(r'tests/settings/testing_config.yaml') as file: + configs = yaml.full_load(file) + + + def test_sensors_files_exist(self): + # Loop through the file_list dictionary and check if the files exist. + + file_lists = utils.generate_sensor_file_lists(configs) + for each in file_lists: + for out_file, _ in file_lists[each]: + self.assertEqual(os.path.exists(out_file), 1) + + + def test_sensors_features_calculations(self): + calc_files = utils.generate_sensor_file_lists(configs) + for each in calc_files: + for act_result, exp_result in calc_files[each]: + df_act = pd.read_csv(act_result) + df_exp = pd.read_csv(exp_result) + pd.testing.assert_frame_equal(df_exp, df_act, obj=df_exp) + + +if __name__ == '__main__': + + unittest.main() \ No newline at end of file diff --git a/tests/scripts/utils.py b/tests/scripts/utils.py new file mode 100644 index 00000000..ca8c9e16 --- /dev/null +++ b/tests/scripts/utils.py @@ -0,0 +1,124 @@ +from snakemake.io import expand +import os +import subprocess +import shutil +import yaml + +def setUp(): + # This utility setUp is intended to be run once before all tests are run + # It is intended the set up all the necessary fake data in order to test + # the rules and scipt files. + + # Load the configuration file to get basic parameters + with open(r'tests/settings/testing_config.yaml') as file: + configs = yaml.full_load(file) + + # Get the settings + pids = configs['PIDS'] + + + # Reset the test data files + for pid in pids: + # Remove old data files if they exist + despath = os.path.join('data/raw/', pid) + if os.path.exists(despath) and os.path.isdir(despath): + shutil.rmtree(despath) + + # Remove old processed files if they exist + propath = os.path.join('data/processed/', pid) + if os.path.exists(propath) and os.path.isdir(propath): + shutil.rmtree(propath) + + # Create a fresh PID data directories necessary for this round of tests + os.mkdir(despath) + + # Copy necessary data files + srcpath = os.path.join('tests/data/raw', pid) + srcfiles = os.listdir(srcpath) + for srcfile in srcfiles: + srcfile_path = os.path.join(srcpath, srcfile) + desfile = os.path.join(despath, srcfile) + shutil.copy(srcfile_path, desfile) + + return configs + + + +def generate_file_list(configs, sensor): + # Generates the list of files that would be produced for one sensor + # i.e. The sensor passed into the function. + + # Initialize string of file path for both expected and actual metric values + act_str = "data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv" + exp_str = "tests/data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv" + + sensor_cap = sensor.upper() + if 'DAY_SEGMENTS' and 'FEATURES' in configs[sensor_cap]: + sensor_type = [] + if 'TYPES' in configs[sensor_cap]: + for each in configs[sensor_cap]['TYPES']: + sensor_type.append(each+'_') + + + act_file_list = expand(act_str,pid=configs["PIDS"], + sensor = sensor, + sensor_type = sensor_type, + day_segment = configs[sensor_cap]["DAY_SEGMENTS"]) + + exp_file_list = expand(exp_str,pid=configs["PIDS"], + sensor = sensor, + sensor_type = sensor_type, + day_segment = configs[sensor_cap]["DAY_SEGMENTS"]) + + return zip(act_file_list, exp_file_list) + + +def generate_sensor_file_lists(configs): + # Go through the configs and select those sensors with DAY_SEGMENTS, + # optionally TYPES then create expected files Return dictionary with + # list of file paths of expected and actual files for each sensor + # listed in the config file. + + # Initialize string of file path for both expected and actual metric values + act_str = "data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv" + exp_str = "tests/data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv" + + # Get all the SENSORS in the config.yaml files + sensors = configs['SENSORS'] + sensor_file_lists = {} + + # Loop though all sensors and create the actual and expected file paths + for sensor in sensors: + if sensor == 'messages': + sensor = 'sms' + sensor_cap = 'SMS' + else: + sensor_cap = sensor.upper() + if 'DAY_SEGMENTS' in configs[sensor_cap]: + sensor_type = [] + if 'TYPES' in configs[sensor_cap]: + for each in configs[sensor_cap]['TYPES']: + sensor_type.append(each+'_') + + if sensor_type: + act_file_list = expand(act_str, pid=configs["PIDS"], + sensor = sensor, + sensor_type = sensor_type, + day_segment = configs[sensor_cap]["DAY_SEGMENTS"]) + exp_file_list = expand(exp_str, pid=configs["PIDS"], + sensor = sensor, + sensor_type = sensor_type, + day_segment = configs[sensor_cap]["DAY_SEGMENTS"]) + else: + act_file_list = expand(act_str, pid=configs["PIDS"], + sensor = sensor, + sensor_type = '', + day_segment = configs[sensor_cap]["DAY_SEGMENTS"]) + exp_file_list = expand(exp_str, pid=configs["PIDS"], + sensor = sensor, + sensor_type = '', + day_segment = configs[sensor_cap]["DAY_SEGMENTS"]) + + sensor_file_lists[sensor_cap] = list(zip(act_file_list,exp_file_list)) + + return sensor_file_lists diff --git a/tests/settings/config.yaml b/tests/settings/config.yaml new file mode 100644 index 00000000..fb54276c --- /dev/null +++ b/tests/settings/config.yaml @@ -0,0 +1,5 @@ +directory: ./ +configfile: ./tests/settings/testing_config.yaml +snakefile: ./tests/Snakefile +cores: 1 +forcerun: sms_features diff --git a/tests/settings/testing_config.yaml b/tests/settings/testing_config.yaml new file mode 100644 index 00000000..80a94180 --- /dev/null +++ b/tests/settings/testing_config.yaml @@ -0,0 +1,19 @@ +# Valid database table name +SENSORS: [messages] + +# Test Participant data to include in the unit testing +# You must create a file for each participant +# named pXXX containing their device_id +PIDS: [test01] + +# Global var with common day segments +DAY_SEGMENTS: &day_segments + [daily] + +# Communication SMS features config, TYPES and FEATURES keys need to match +SMS: + TYPES : [received, sent] + FEATURES: + received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact] + sent: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact] + DAY_SEGMENTS: *day_segments