Merge branch 'master' of https://github.com/carissalow/rapids

2020-04-29 15:05:18 -04:00 · 2020-04-29 15:05:18 -04:00 · f292ba3bff
parent 9ddb50ed59 b78c0504d3
commit f292ba3bff
11 changed files with 219 additions and 0 deletions
--- a/src/features/bluetooth_features.R
+++ b/src/features/bluetooth_features.R
@ -1,6 +1,7 @@
 source("packrat/init.R")

 library(dplyr)
+library(tidyr)

 filter_by_day_segment <- function(data, day_segment) {
  if(day_segment %in% c("morning", "afternoon", "evening", "night"))
@ -38,4 +39,6 @@ for(requested_feature in requested_features){
  features <- merge(features, feature, by="local_date", all = TRUE)
 }

+features <- features %>% mutate_at(vars(contains("countscansmostuniquedevice")), list( ~ replace_na(., 0)))
+
 write.csv(features, snakemake@output[[1]], row.names = FALSE)
--- a/tests/Snakefile
+++ b/tests/Snakefile
@ -0,0 +1,20 @@
+configfile: "config.yaml"
+include: "../rules/packrat.snakefile"
+include: "../rules/preprocessing.snakefile"
+include: "../rules/features.snakefile"
+include: "../rules/models.snakefile"
+include: "../rules/reports.snakefile"
+include: "../rules/mystudy.snakefile" # You can add snakfiles with rules tailored to your project
+
+rule all:
+    input:
+        expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
+        expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
+        expand("data/processed/{pid}/sms_{sms_type}_{day_segment}.csv",
+                            pid=config["PIDS"],
+                            sms_type = config["SMS"]["TYPES"],
+                            day_segment = config["SMS"]["DAY_SEGMENTS"]),
+
+rule clean:
+    shell:
+        "rm -rf data/raw/* && rm -rf data/interim/* && rm -rf data/processed/* && rm -rf reports/figures/* && rm -rf reports/*.zip && rm -rf reports/compliance/*"
--- a/tests/data/interim/.gitkeep
+++ b/tests/data/interim/.gitkeep
--- a/tests/data/processed/test01/sms_received_daily.csv
+++ b/tests/data/processed/test01/sms_received_daily.csv
@ -0,0 +1,2 @@
+"local_date","sms_received_daily_countmostfrequentcontact","sms_received_daily_count","sms_received_daily_distinctcontacts","sms_received_daily_timefirstsms","sms_received_daily_timelastsms"
+"2017-03-27",1,1,1,16,16
--- a/tests/data/processed/test01/sms_sent_daily.csv
+++ b/tests/data/processed/test01/sms_sent_daily.csv
@ -0,0 +1,2 @@
+"local_date","sms_sent_daily_countmostfrequentcontact","sms_sent_daily_count","sms_sent_daily_distinctcontacts","sms_sent_daily_timefirstsms","sms_sent_daily_timelastsms"
+"2017-05-14",1,1,1,18.10,18.45
--- a/tests/data/raw/.gitkeep
+++ b/tests/data/raw/.gitkeep
--- a/tests/data/raw/test01/messages_raw.csv
+++ b/tests/data/raw/test01/messages_raw.csv
@ -0,0 +1,3 @@
+"timestamp","device_id","message_type","trace"
+1490644832746,"00cf6eff-7d67-4b7d-be0c-9e7596c9640f",1,"8c2037e569c3e5c9574601071b7c8268b3158ff8"
+1494800874206,"00cf6eff-7d67-4b7d-be0c-9e7596c9640f",2,"e3cbb6efdccf0d9e3c961b019ee0bda4f299d22f"
--- a/tests/scripts/test_sensor_features.py
+++ b/tests/scripts/test_sensor_features.py
@ -0,0 +1,41 @@
+import unittest
+import hashlib
+import pandas as pd
+import utils
+import yaml
+import os
+
+class TestSensorFeatures(unittest.TestCase):
+
+    # Hack to run code in positional order (not 100% full proof)
+    unittest.TestLoader.sortTestMethodsUsing = lambda self, a, b: (a < b) - (a > b)
+    
+    @classmethod
+    def setUpClass(cls):
+        # Runs once to Setup env
+        global configs 
+        with open(r'tests/settings/testing_config.yaml') as file:
+            configs = yaml.full_load(file)
+
+
+    def test_sensors_files_exist(self):
+        # Loop through the file_list dictionary and check if the files exist. 
+
+        file_lists = utils.generate_sensor_file_lists(configs)
+        for each in file_lists:
+            for out_file, _ in file_lists[each]:
+                self.assertEqual(os.path.exists(out_file), 1)
+
+
+    def test_sensors_features_calculations(self):
+        calc_files = utils.generate_sensor_file_lists(configs)
+        for each in calc_files:
+            for act_result, exp_result in calc_files[each]:
+                df_act = pd.read_csv(act_result)
+                df_exp = pd.read_csv(exp_result)
+                pd.testing.assert_frame_equal(df_exp, df_act, obj=df_exp)
+
+
+if __name__ == '__main__':
+
+    unittest.main()
--- a/tests/scripts/utils.py
+++ b/tests/scripts/utils.py
@ -0,0 +1,124 @@
+from snakemake.io import expand
+import os
+import subprocess
+import shutil
+import yaml
+
+def setUp():
+    # This utility setUp is intended to be run once before all tests are run
+    # It is intended the set up all the necessary fake data in order to test
+    # the rules and scipt files.  
+
+    # Load the configuration file to get basic parameters 
+    with open(r'tests/settings/testing_config.yaml') as file:
+        configs = yaml.full_load(file)
+
+        # Get the settings 
+        pids = configs['PIDS']
+
+    
+    # Reset the test data files  
+    for pid in pids:
+        # Remove old data files if they exist
+        despath = os.path.join('data/raw/', pid)
+        if os.path.exists(despath) and os.path.isdir(despath):
+            shutil.rmtree(despath)
+
+        # Remove old processed files if they exist
+        propath = os.path.join('data/processed/', pid)
+        if os.path.exists(propath) and os.path.isdir(propath):
+            shutil.rmtree(propath)
+
+        # Create a fresh PID data directories necessary for this round of tests
+        os.mkdir(despath)
+
+        # Copy necessary data files
+        srcpath = os.path.join('tests/data/raw', pid)
+        srcfiles = os.listdir(srcpath)
+        for srcfile in srcfiles:
+            srcfile_path = os.path.join(srcpath, srcfile)
+            desfile = os.path.join(despath, srcfile)
+            shutil.copy(srcfile_path, desfile)
+    
+    return configs
+
+
+ 
+def generate_file_list(configs, sensor):
+    # Generates the list of files that would be produced for one sensor
+    # i.e. The sensor passed into the function. 
+
+    # Initialize string of file path for both expected and actual metric values
+    act_str = "data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
+    exp_str = "tests/data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
+    
+    sensor_cap = sensor.upper()
+    if 'DAY_SEGMENTS' and 'FEATURES' in configs[sensor_cap]:
+        sensor_type = []
+        if 'TYPES' in configs[sensor_cap]:
+            for each in configs[sensor_cap]['TYPES']:
+                sensor_type.append(each+'_')
+            
+    
+    act_file_list = expand(act_str,pid=configs["PIDS"],
+                                   sensor = sensor,
+                                   sensor_type = sensor_type,
+                                   day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
+    
+    exp_file_list = expand(exp_str,pid=configs["PIDS"],
+                                   sensor = sensor,
+                                   sensor_type = sensor_type,
+                                   day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
+
+    return zip(act_file_list, exp_file_list)
+
+
+def generate_sensor_file_lists(configs):
+    # Go through the configs and select those sensors with DAY_SEGMENTS,
+    # optionally TYPES then create expected files Return dictionary with 
+    # list of file paths of expected and actual files for each sensor 
+    # listed in the config file. 
+    
+    # Initialize string of file path for both expected and actual metric values
+    act_str = "data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
+    exp_str = "tests/data/processed/{pid}/{sensor}_{sensor_type}{day_segment}.csv"
+
+    # Get all the SENSORS in the config.yaml files
+    sensors = configs['SENSORS']
+    sensor_file_lists = {}
+    
+    # Loop though all sensors and create the actual and expected file paths
+    for sensor in sensors:
+        if sensor == 'messages':
+            sensor = 'sms'
+            sensor_cap = 'SMS'
+        else:
+            sensor_cap = sensor.upper()
+        if 'DAY_SEGMENTS' in configs[sensor_cap]:
+            sensor_type = []
+            if 'TYPES' in configs[sensor_cap]:
+                for each in configs[sensor_cap]['TYPES']:
+                    sensor_type.append(each+'_')
+            
+            if sensor_type:
+                act_file_list = expand(act_str, pid=configs["PIDS"], 
+                                                sensor = sensor, 
+                                                sensor_type = sensor_type, 
+                                                day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
+                exp_file_list = expand(exp_str, pid=configs["PIDS"], 
+                                                sensor = sensor, 
+                                                sensor_type = sensor_type, 
+                                                day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
+            else:
+                act_file_list = expand(act_str, pid=configs["PIDS"], 
+                                                sensor = sensor, 
+                                                sensor_type = '', 
+                                                day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
+                exp_file_list = expand(exp_str, pid=configs["PIDS"], 
+                                                sensor = sensor, 
+                                                sensor_type = '', 
+                                                day_segment = configs[sensor_cap]["DAY_SEGMENTS"])
+
+            sensor_file_lists[sensor_cap] = list(zip(act_file_list,exp_file_list))
+
+    return sensor_file_lists
--- a/tests/settings/config.yaml
+++ b/tests/settings/config.yaml
@ -0,0 +1,5 @@
+directory: ./
+configfile: ./tests/settings/testing_config.yaml
+snakefile: ./tests/Snakefile
+cores: 1
+forcerun: sms_features
--- a/tests/settings/testing_config.yaml
+++ b/tests/settings/testing_config.yaml
@ -0,0 +1,19 @@
+# Valid database table name
+SENSORS: [messages]
+
+# Test Participant data to include in the unit testing
+# You must create a file for each participant
+# named pXXX containing their device_id
+PIDS: [test01]
+
+# Global var with common day segments
+DAY_SEGMENTS: &day_segments
+    [daily]
+
+# Communication SMS features config, TYPES and FEATURES keys need to match
+SMS:
+  TYPES : [received, sent]
+  FEATURES: 
+    received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
+    sent: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
+  DAY_SEGMENTS: *day_segments