Refactor Snakefile and docs. Rename SMS scripts

2020-06-23 11:33:34 -04:00 · 2020-06-23 11:33:34 -04:00 · 36017d5dca
parent 9da4fb165c
commit 36017d5dca
13 changed files with 363 additions and 463 deletions
--- a/253
+++ b/253
@ -6,174 +6,97 @@ include: "rules/models.snakefile"
 include: "rules/reports.snakefile"
 include: "rules/mystudy.snakefile" # You can add snakfiles with rules tailored to your project

-models, scalers = [], []
-for model_name in config["PARAMS_FOR_ANALYSIS"]["MODEL_NAMES"]:
-    models = models + [model_name] * len(config["PARAMS_FOR_ANALYSIS"]["MODEL_SCALER"][model_name])
-    scalers = scalers + config["PARAMS_FOR_ANALYSIS"]["MODEL_SCALER"][model_name]
+files_to_compute = []
+
+if len(config["PIDS"]) == 0:
+    raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")
+
+if config["MESSAGES"]["COMPUTE"]:
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/processed/{pid}/messages_{messages_type}_{day_segment}.csv", pid=config["PIDS"], messages_type = config["MESSAGES"]["TYPES"], day_segment = config["MESSAGES"]["DAY_SEGMENTS"]))
+
+if config["CALLS"]["COMPUTE"]:
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/processed/{pid}/calls_{call_type}_{segment}.csv", pid=config["PIDS"], call_type=config["CALLS"]["TYPES"], segment = config["CALLS"]["DAY_SEGMENTS"]))
+
+if config["BARNETT_LOCATION"]["COMPUTE"]:
+    # TODO add files_to_compute.extend(optional_location_input(None))
+    if config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED" and config["BARNETT_LOCATION"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]:
+        raise ValueError("Error: Add your locations table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BARNETT_LOCATION"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["BARNETT_LOCATION"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/processed/{pid}/location_barnett_{segment}.csv", pid=config["PIDS"], segment = config["BARNETT_LOCATION"]["DAY_SEGMENTS"]))
+
+if config["BLUETOOTH"]["COMPUTE"]:
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BLUETOOTH"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["BLUETOOTH"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/processed/{pid}/bluetooth_{segment}.csv", pid=config["PIDS"], segment = config["BLUETOOTH"]["DAY_SEGMENTS"]))
+
+if config["ACTIVITY_RECOGNITION"]["COMPUTE"]:
+    # TODO add files_to_compute.extend(optional_ar_input(None)), the Android or iOS table gets processed depending on each participant
+    files_to_compute.extend(expand("data/processed/{pid}/activity_recognition_{segment}.csv",pid=config["PIDS"], segment = config["ACTIVITY_RECOGNITION"]["DAY_SEGMENTS"]))
+
+if config["BATTERY"]["COMPUTE"]:
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BATTERY"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["BATTERY"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["BATTERY"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/processed/{pid}/battery_deltas.csv", pid=config["PIDS"]))
+    files_to_compute.extend(expand("data/processed/{pid}/battery_{day_segment}.csv", pid = config["PIDS"], day_segment = config["BATTERY"]["DAY_SEGMENTS"]))
+
+if config["SCREEN"]["COMPUTE"]:
+    if config["SCREEN"]["DB_TABLE"] not in config["TABLES_FOR_SENSED_BINS"]:
+        raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to TABLES_FOR_SENSED_BINS in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]))
+    files_to_compute.extend(expand("data/processed/{pid}/screen_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SCREEN"]["DAY_SEGMENTS"]))
+
+if config["LIGHT"]["COMPUTE"]:
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/processed/{pid}/light_{day_segment}.csv", pid = config["PIDS"], day_segment = config["LIGHT"]["DAY_SEGMENTS"]))
+
+if config["ACCELEROMETER"]["COMPUTE"]:
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/processed/{pid}/accelerometer_{day_segment}.csv", pid = config["PIDS"], day_segment = config["ACCELEROMETER"]["DAY_SEGMENTS"]))
+
+if config["APPLICATIONS_FOREGROUND"]["COMPUTE"]:
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/interim/{pid}/{sensor}_with_datetime_with_genre.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/processed/{pid}/applications_foreground_{day_segment}.csv", pid = config["PIDS"], day_segment = config["APPLICATIONS_FOREGROUND"]["DAY_SEGMENTS"]))
+
+if config["WIFI"]["COMPUTE"]:
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/processed/{pid}/wifi_{day_segment}.csv", pid = config["PIDS"], day_segment = config["WIFI"]["DAY_SEGMENTS"]))
+
+if config["HEARTRATE"]["COMPUTE"]:
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["HEARTRATE"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
+    files_to_compute.extend(expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv", pid = config["PIDS"], day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]))
+
+if config["STEP"]["COMPUTE"]:
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["STEP"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/fitbit_step_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
+    files_to_compute.extend(expand("data/processed/{pid}/fitbit_step_{day_segment}.csv", pid = config["PIDS"], day_segment = config["STEP"]["DAY_SEGMENTS"]))
+
+if config["SLEEP"]["COMPUTE"]:
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SLEEP"]["DB_TABLE"]))
+    files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday"]))
+    files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"]))
+
+if config["CONVERSATION"]["COMPUTE"]:
+    # TODO add files_to_compute.extend(optional_conversation_input(None)), the Android or iOS table gets processed depending on each participant
+    files_to_compute.extend(expand("data/processed/{pid}/conversation_{segment}.csv",pid=config["PIDS"], segment = config["CONVERSATION"]["DAY_SEGMENTS"]))

 rule all:
    input:
-        # My study (this is an example of a rule created specifically for a study)
-        expand("data/interim/{pid}/days_to_analyse_{days_before_surgery}_{days_in_hospital}_{days_after_discharge}.csv",
-                            pid = config["PIDS"],
-                            days_before_surgery = config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["DAYS_BEFORE_SURGERY"],
-                            days_after_discharge = config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["DAYS_AFTER_DISCHARGE"],
-                            days_in_hospital = config["PARAMS_FOR_ANALYSIS"]["DAYS_TO_ANALYSE"]["DAYS_IN_HOSPITAL"]),
-        expand("data/processed/{pid}/targets_{summarised}.csv", 
-                            pid = config["PIDS"],
-                            summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
-        expand("data/processed/{pid}/demographic_features.csv", pid=config["PIDS"]),
-        # Feature extraction
-        expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
-        expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["FITBIT_TABLE"]),
-        expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),
-        expand("data/processed/{pid}/battery_deltas.csv", pid=config["PIDS"]),
-        expand("data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv", pid=config["PIDS"]),
-        expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]),
-        expand("data/processed/{pid}/plugin_google_activity_recognition_deltas.csv", pid=config["PIDS"]),
-        expand("data/interim/{pid}/phone_valid_sensed_days.csv", pid=config["PIDS"]),
-        expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]),
-        expand("data/processed/{pid}/sms_{sms_type}_{day_segment}.csv",
-                            pid=config["PIDS"],
-                            sms_type = config["SMS"]["TYPES"],
-                            day_segment = config["SMS"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/call_{call_type}_{segment}.csv",
-                            pid=config["PIDS"], 
-                            call_type=config["CALLS"]["TYPES"],
-                            segment = config["CALLS"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/location_barnett_{segment}.csv", 
-                            pid=config["PIDS"],
-                            segment = config["BARNETT_LOCATION"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/bluetooth_{segment}.csv",
-                            pid=config["PIDS"], 
-                            segment = config["BLUETOOTH"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/activity_recognition_{segment}.csv",pid=config["PIDS"], 
-                            segment = config["ACTIVITY_RECOGNITION"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/battery_{day_segment}.csv",
-                            pid = config["PIDS"],
-                            day_segment = config["BATTERY"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/screen_{day_segment}.csv",
-                            pid = config["PIDS"],
-                            day_segment = config["SCREEN"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/light_{day_segment}.csv",
-                            pid = config["PIDS"],
-                            day_segment = config["LIGHT"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/conversation_{day_segment}.csv",
-                            pid = config["PIDS"],
-                            day_segment = config["CONVERSATION"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/accelerometer_{day_segment}.csv",
-                            pid = config["PIDS"],
-                            day_segment = config["ACCELEROMETER"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/applications_foreground_{day_segment}.csv",
-                            pid = config["PIDS"],
-                            day_segment = config["APPLICATIONS_FOREGROUND"]["DAY_SEGMENTS"]),
-        expand("data/raw/{pid}/fitbit_{fitbit_sensor}_{fitbit_data_type}_with_datetime.csv",
-                            pid=config["PIDS"],
-                            fitbit_sensor=config["FITBIT_SENSORS"],
-                            fitbit_data_type=config["FITBIT_DATA_TYPE"]),
-        expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv",
-                            pid = config["PIDS"],
-                            day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/fitbit_step_{day_segment}.csv",
-                            pid = config["PIDS"],
-                            day_segment = config["STEP"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv",
-                            pid = config["PIDS"],
-                            day_segment = config["SLEEP"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/wifi_{segment}.csv",
-                            pid=config["PIDS"], 
-                            segment = config["WIFI"]["DAY_SEGMENTS"]),
-        # Models
-        expand("data/processed/{pid}/data_for_individual_model/{source}_{day_segment}_original.csv",
-                            pid = config["PIDS"],
-                            source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
-                            day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
-        expand("data/processed/data_for_population_model/{source}_{day_segment}_original.csv",
-                            source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
-                            day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
-        expand("data/processed/{pid}/data_for_individual_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_clean.csv",
-                            pid = config["PIDS"],
-                            rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
-                            cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
-                            days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
-                            days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
-                            cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
-                            source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
-                            day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
-        expand("data/processed/data_for_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_clean.csv",
-                            rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
-                            cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
-                            days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
-                            days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
-                            cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
-                            source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
-                            day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
-        expand("data/processed/data_for_population_model/demographic_features.csv"),
-        expand("data/processed/data_for_population_model/targets_{summarised}.csv",
-                            summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
-        expand("data/processed/data_for_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_nancellsratio.csv",
-                            rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
-                            cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
-                            days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
-                            days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
-                            cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
-                            source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
-                            day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"]),
-        expand("data/processed/data_for_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_{summarised}.csv",
-                            rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
-                            cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
-                            days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
-                            days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
-                            cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
-                            source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
-                            day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
-                            summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
-        expand("data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{source}_{day_segment}_{summarised}_{cv_method}_baseline.csv",
-                            rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
-                            cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
-                            days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
-                            days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
-                            cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
-                            cv_method = config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"],
-                            source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
-                            day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
-                            summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]),
-        expand(
-            expand("data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{{model}}/{cv_method}/{source}_{day_segment}_{summarised}_{{scaler}}/{result_component}.csv",
-                rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
-                cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
-                days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
-                days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
-                cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
-                cv_method = config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"],
-                source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
-                day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
-                summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"],
-                result_component = config["PARAMS_FOR_ANALYSIS"]["RESULT_COMPONENTS"]), 
-            zip, 
-            model = models,
-            scaler = scalers),
-        expand(
-            expand("data/processed/output_population_model/{rows_nan_threshold}|{cols_nan_threshold}_{days_before_threshold}|{days_after_threshold}_{cols_var_threshold}/{{model}}/{cv_method}/{source}_{day_segment}_{summarised}_{{scaler}}/merged_population_model_results.csv",
-                rows_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["ROWS_NAN_THRESHOLD"],
-                cols_nan_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_NAN_THRESHOLD"],
-                days_before_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_BEFORE_THRESHOLD"],
-                days_after_threshold = config["PARAMS_FOR_ANALYSIS"]["PARTICIPANT_DAYS_AFTER_THRESHOLD"],
-                cols_var_threshold = config["PARAMS_FOR_ANALYSIS"]["COLS_VAR_THRESHOLD"],
-                cv_method = config["PARAMS_FOR_ANALYSIS"]["CV_METHODS"],
-                source = config["PARAMS_FOR_ANALYSIS"]["SOURCES"],
-                day_segment = config["PARAMS_FOR_ANALYSIS"]["DAY_SEGMENTS"],
-                summarised = config["PARAMS_FOR_ANALYSIS"]["SUMMARISED"]), 
-            zip, 
-            model = models,
-            scaler = scalers),
-
-        # Vizualisations
-        expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]),
-        expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"]),
-        expand("reports/figures/{pid}/battery_consumption_rates_barchart.html", pid=config["PIDS"]),
-        expand("reports/compliance/{pid}/compliance_report.html", pid=config["PIDS"]),
-        expand("reports/figures/overall_compliance_heatmap.html"),
+        files_to_compute

 rule clean:
    shell:
--- a/config.yaml
+++ b/config.yaml
@ -1,14 +1,10 @@
-# Valid database table names
-SENSORS: [applications_crashes, applications_foreground, applications_notifications, battery, bluetooth, calls, locations, messages, plugin_ambient_noise, plugin_device_usage, plugin_google_activity_recognition, plugin_ios_activity_recognition, screen,plugin_studentlife_audio]
-
-FITBIT_TABLE: [fitbit_data]
-FITBIT_SENSORS: [heartrate, steps, sleep, calories]
-FITBIT_DATA_TYPE: [summary, intraday]
+# Add as many sensor tables as you have, they all improve the computation of PHONE_SENSED_BINS. 
+# If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
+TABLES_FOR_SENSED_BINS: []

 # Participants to include in the analysis
-# You must create a file for each participant
-# named pXXX containing their device_id
-PIDS: [p01, p02]
+# You must create a file for each participant named pXXX containing their device_id. This can be done manually or automatically
+PIDS: []

 # Global var with common day segments
 DAY_SEGMENTS: &day_segments
@ -36,7 +32,9 @@ READABLE_DATETIME:
  FIXED_TIMEZONE: *timezone

 # Communication SMS features config, TYPES and FEATURES keys need to match
-SMS:
+MESSAGES:
+  COMPUTE: False
+  DB_TABLE: messages
  TYPES : [received, sent]
  FEATURES: 
    received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
@ -45,6 +43,8 @@ SMS:

 # Communication call features config, TYPES and FEATURES keys need to match
 CALLS:
+  COMPUTE: False
+  DB_TABLE: calls
  TYPES: [missed, incoming, outgoing]
  FEATURES:
    missed:  [count, distinctcontacts, timefirstcall, timelastcall, countmostfrequentcontact]
@ -69,36 +69,52 @@ RESAMPLE_FUSED_LOCATION:
  TIMEZONE: *timezone

 BARNETT_LOCATION:
+  COMPUTE: False
+  DB_TABLE: locations
  DAY_SEGMENTS: [daily] # These features are only available on a daily basis
  FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
-  LOCATIONS_TO_USE: ALL # ALL, ALL_EXCEPT_FUSED OR RESAMPLE_FUSED
+  LOCATIONS_TO_USE: RESAMPLE_FUSED # ALL, ALL_EXCEPT_FUSED OR RESAMPLE_FUSED
  ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
  TIMEZONE: *timezone
  MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features

 BLUETOOTH:
+  COMPUTE: False
+  DB_TABLE: bluetooth
  DAY_SEGMENTS: *day_segments
  FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]

 ACTIVITY_RECOGNITION:
+  COMPUTE: False
+  DB_TABLE: 
+    ANDROID: plugin_google_activity_recognition
+    IOS: plugin_ios_activity_recognition
  DAY_SEGMENTS: *day_segments
  FEATURES: ["count","mostcommonactivity","countuniqueactivities","activitychangecount","sumstationary","summobile","sumvehicle"]

 BATTERY:
+  COMPUTE: False
+  DB_TABLE: battery
  DAY_SEGMENTS: *day_segments
  FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]

 SCREEN:
+  COMPUTE: False
+  DB_TABLE: screen
  DAY_SEGMENTS: *day_segments
  REFERENCE_HOUR_FIRST_USE: 0
  FEATURES_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"]
  EPISODE_TYPES: ["unlock"]

 LIGHT:
+  COMPUTE: False
+  DB_TABLE: light
  DAY_SEGMENTS: *day_segments
  FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]

 ACCELEROMETER:
+  COMPUTE: False
+  DB_TABLE: accelerometer
  DAY_SEGMENTS: *day_segments
  FEATURES:
    MAGNITUDE: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
@ -107,6 +123,8 @@ ACCELEROMETER:
    VALID_SENSED_MINUTES: True

 APPLICATIONS_FOREGROUND:
+  COMPUTE: False
+  DB_TABLE: applications_foreground
  DAY_SEGMENTS: *day_segments
  SINGLE_CATEGORIES: ["all", "email"]
  MULTIPLE_CATEGORIES:
@ -118,12 +136,15 @@ APPLICATIONS_FOREGROUND:
  FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]

 HEARTRATE:
+  COMPUTE: False
+  DB_TABLE: fitbit_data
  DAY_SEGMENTS: *day_segments
-  # Only daily features are extracted from summary data
-  SUMMARY_FEATURES: ["restinghr"] # calories related features might be inaccurate: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"] 
+  SUMMARY_FEATURES: ["restinghr"] # calories features' accuracy depend on the accuracy of the participants fitbit profile (e.g. heigh, weight) use with care: ["caloriesoutofrange", "caloriesfatburn", "caloriescardio", "caloriespeak"] 
  INTRADAY_FEATURES: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "minutesonoutofrangezone", "minutesonfatburnzone", "minutesoncardiozone", "minutesonpeakzone"]

 STEP:
+  COMPUTE: False
+  DB_TABLE: fitbit_data
  DAY_SEGMENTS: *day_segments
  FEATURES:
    ALL_STEPS: ["sumallsteps", "maxallsteps", "minallsteps", "avgallsteps", "stdallsteps"]
@ -133,16 +154,23 @@ STEP:
  INCLUDE_ZERO_STEP_ROWS: True

 SLEEP:
+  COMPUTE: False
+  DB_TABLE: fitbit_data
  DAY_SEGMENTS: *day_segments
  SLEEP_TYPES: ["main", "nap", "all"]
-  # Only daily features are extracted from summary data
  SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]

 WIFI:
+  COMPUTE: False
+  DB_TABLE: wifi
  DAY_SEGMENTS: *day_segments
  FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]

 CONVERSATION:
+  COMPUTE: False
+  DB_TABLE: 
+    ANDROID: plugin_studentlife_audio_android
+    IOS: plugin_studentlife_audio
  DAY_SEGMENTS: *day_segments
  FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
    "sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
@ -152,6 +180,8 @@ CONVERSATION:
  RECORDINGMINUTES: 1
  PAUSEDMINUTES : 3

+
+### Analysis ################################################################
 PARAMS_FOR_ANALYSIS:
  GROUNDTRUTH_TABLE: participant_info
  SOURCES: &sources ["phone_features", "fitbit_features", "phone_fitbit_features"]
--- a/docs/features/extracted.rst
+++ b/docs/features/extracted.rst
@ -3,12 +3,23 @@
 RAPIDS Features
 ===============

+*How do I compute any of these features?* In your ``config.yaml``, go to the sensor section you are interested in and set the corresponding ``COMPUTE`` option to ``TRUE`` as well as ``DB_TABLE`` to the senor's table name in your database (the default table name is the one assigned by Aware), for example:
+
+      | ``MESSAGES:``
+      |     ``COMPUTE: True``
+      |      ``DB_TABLE: messages``
+      |      ``...``
+ 
+ If you want to extract phone_valid_sensed_days.csv, screen features or locaton features based on fused location data don't forget to configure ``TABLES_FOR_SENSED_BINS`` (see below).
+
+.. _global-sensor-doc:
+
 Global Parameters
 """""""""""""""""

 .. _sensor-list:

- ``SENSORS`` - List of sensors to include in the pipeline that have to match existent tables in your AWARE_ database. See SENSORS_ variable in ``config`` file.  
+- ``TABLES_FOR_SENSED_BINS`` - Add as many sensor tables as you have in your database. All sensors included are used to compute ``phone_sensed_bins.csv`` (bins of time when the smartphone was sensing data). In turn, these bins are used to compute ``PHONE_VALID_SENSED_DAYS`` (see below), ``episodepersensedminutes`` feature of :ref:`Screen<screen-sensor-doc>` and to resample fused location data if you configure Barnett's location features to use ``RESAMPLE_FUSED``. See TABLES_FOR_SENSED_BINS_ variable in ``config`` file (therefore, when you are extracting screen or Barnett's location features, screen and locations tables are mandatory).  

 .. _fitbit-table:

@ -53,34 +64,28 @@ Global Parameters
    
    Contains three attributes: ``BIN_SIZE``, ``MIN_VALID_HOURS``, ``MIN_BINS_PER_HOUR``. 

-    On any given day, Aware could have sensed data only for a few minutes or for 24 hours. Daily estimates of features should be considered more reliable the more hours Aware was running and logging data (for example, 10 calls logged on a day when only one hour of data was recorded is a less reliable measurement compared to 10 calls on a day when 23 hours of data were recorded. 
+    On any given day, Aware could have sensed data only for a few minutes or for 24 hours. Daily estimates of features should be considered more reliable the more hours Aware was running and logging data (for example, 10 calls logged on a day when only one hour of data was recorded is a less reliable feature compared to 10 calls on a day when 23 hours of data were recorded. 

-    Therefore, we define a valid hour as those that contain at least a certain number of valid bins. In turn, a valid bin are those that contain at least one row of data from any sensor logged within that period. We divide an hour into N bins of size ``BIN_SIZE`` (in minutes) and we mark an hour as valid if contains at least ``MIN_BINS_PER_HOUR`` of valid bins (out of the total possible number of bins that can be captured in an hour i.e. out of 60min/``BIN_SIZE`` bins). Days with valid sensed hours less than ``MIN_VALID_HOURS`` will be excluded form the output of this file. See PHONE_VALID_SENSED_DAYS_ in ``config.yaml``.
+    Therefore, we define a valid hour as those that contain a minimum number of valid bins. In turn, a valid bin are those that contain at least one row of data from any sensor logged within that period. We divide an hour into N bins of size ``BIN_SIZE`` (in minutes) and we mark an hour as valid if contains at least ``MIN_BINS_PER_HOUR`` (out of the total possible number of bins that can be captured in an hour based on their length i.e. 60min/``BIN_SIZE`` bins). Days with valid sensed hours less than ``MIN_VALID_HOURS`` will be excluded form the output of this file. See PHONE_VALID_SENSED_DAYS_ in ``config.yaml``.

-    In RAPIDS, we use ``phone_sensed_bins`` (a list of all valid and invalid bins of all monitored days) to improve the estimation of features that are ratios over time periods like ``episodepersensedminutes`` of :ref:`Screen<screen-sensor-doc>` or for resampling data like fused location coordinates.
+    Note that RAPIDS *DOES NOT* filter your feature files automatically, you need to do this manually based on ``"data/interim/{pid}/phone_valid_sensed_days.csv"``.

+    You can get access to every phone's sensed bins matrix (days x bins) in ``data/interim/{pid}/phone_sensed_bins.csv``. As mentioned above, RAPIDS uses this file to compute ``phone_valid_sensed_days.csv``, ``episodepersensedminutes`` feature of :ref:`Screen<screen-sensor-doc>` and to resample fused location data if you configure Barnett's location features to use ``RESAMPLE_FUSED``.

 .. _individual-sensor-settings:


 .. _sms-sensor-doc:

-SMS
+Messages (SMS)
 """""

-See `SMS Config Code`_
+See `Messages Config Code`_

 **Available Epochs (day_segment) :** daily, morning, afternoon, evening, night

 **Available Platforms:** Android

-**Snakefile entry to compute these features:**
-
-      | ``expand("data/processed/{pid}/sms_{sms_type}_{day_segment}.csv".``
-      |                     ``pid=config["PIDS"],``
-      |                     ``sms_type = config["SMS"]["TYPES"],``
-      |                     ``day_segment = config["SMS"]["DAY_SEGMENTS"]),``
-
 **Rule Chain:**

 - Rule ``rules/preprocessing.snakefile/download_dataset``
@ -134,13 +139,6 @@ See `Call Config Code`_

 **Available Platforms:** Android and iOS

-**Snakefile entry to compute these features:**
-
-      | ``expand("data/processed/{pid}/call_{call_type}_{segment}.csv",``
-      |                      ``pid=config["PIDS"],`` 
-      |                      ``call_type=config["CALLS"]["TYPES"],``
-      |                      ``segment = config["CALLS"]["DAY_SEGMENTS"]),``
-    
 **Rule Chain:**

 - Rule ``rules/preprocessing.snakefile/download_dataset``
@ -217,12 +215,6 @@ See `Bluetooth Config Code`_

 **Available Platforms:** Android and iOS

-**Snakefile entry to compute these features:**
-    
-      | ``expand("data/processed/{pid}/bluetooth_{segment}.csv",``
-      |          ``pid=config["PIDS"],`` 
-      |          ``segment = config["BLUETOOTH"]["DAY_SEGMENTS"]),``
-    
 **Snakemake rule chain:**

 - Rule ``rules/preprocessing.snakefile/download_dataset``
@ -254,6 +246,48 @@ countscansmostuniquedevice    scans         Number of scans of the most scanned

 **Assumptions/Observations:** N/A 

+.. _wifi-sensor-doc:
+
+WiFi
+""""""""""
+
+See `WiFi Config Code`_
+
+**Available Epochs (day_segment) :** daily, morning, afternoon, evening, night
+
+**Available Platforms:** Android and iOS
+
+**Snakemake rule chain:**
+
+- Rule ``rules/preprocessing.snakefile/download_dataset``
+- Rule ``rules/preprocessing.snakefile/readable_datetime``
+- Rule ``rules/features.snakefile/wifi_features``
+    
+.. _wifi-parameters:
+
+**WiFi Rule Parameters (wifi_features):**
+
+============    ===================
+Name	        Description
+============    ===================
+day_segment     The particular ``day_segment`` that will be analyzed. The available options are ``daily``, ``morning``, ``afternoon``, ``evening``, ``night``
+features        Features to be computed, see table below
+============    ===================
+
+.. _wifi-available-features:
+
+**Available WiFi Features**
+
+===========================   =========     =============
+Name                          Units         Description
+===========================   =========     =============
+countscans                    devices       Number of scanned WiFi access points during a ``day_segment``, an access point can be detected multiple times over time and these appearances are counted separately
+uniquedevices                 devices       Number of unique access point during a ``day_segment`` as identified by their hardware address
+countscansmostuniquedevice    scans         Number of scans of the most scanned access point during a ``day_segment`` across the whole monitoring period
+===========================   =========     =============
+
+**Assumptions/Observations:** N/A 
+

 .. _accelerometer-sensor-doc:

@ -266,12 +300,6 @@ See `Accelerometer Config Code`_

 **Available Platforms:** Android and iOS

-**Snakefile entry to compute these features:**
-
-    | ``expand("data/processed/{pid}/accelerometer_{day_segment}.csv",``
-    |                      ``pid=config["PIDS"],`` 
-    |                      ``day_segment = config["ACCELEROMETER"]["DAY_SEGMENTS"]),``
-
 **Rule chain:**

 - Rule ``rules/preprocessing.snakefile/download_dataset``
@ -325,12 +353,6 @@ See `Applications Foreground Config Code`_

 **Available Platforms:** Android

-**Snakefile entry to compute these features:**
-
-    | ``expand("data/processed/{pid}/applications_foreground_{day_segment}.csv",``
-    |                      ``pid=config["PIDS"],`` 
-    |                      ``day_segment = config["APPLICATIONS_FOREGROUND"]["DAY_SEGMENTS"]),``
-
 **Snakemake rule chain:**

 - Rule ``rules/preprocessing.snakefile/download_dataset`` 
@ -392,12 +414,6 @@ See `Battery Config Code`_

 **Available Platforms:** Android and iOS

-**Snakefile entry to compute these features:**
-
-    | ``expand("data/processed/{pid}/battery_{day_segment}.csv",``
-    |                      ``pid=config["PIDS"],`` 
-    |                      ``day_segment = config["BATTERY"]["DAY_SEGMENTS"]),``
-    
 **Snakemake rule chain:**

 - Rule ``rules/preprocessing.snakefile/download_dataset`` 
@ -444,11 +460,6 @@ Activity Recognition

 **Available Platforms:** Android and iOS

-**Snakefile entry to compute these features:**
-
-    | ``expand("data/processed/{pid}/activity_recognition_{segment}.csv",pid=config["PIDS"],`` 
-    |                        ``segment = config["ACTIVITY_RECOGNITION"]["DAY_SEGMENTS"]),``
-    
 **Snakemake rule chain:**

 - Rule ``rules/preprocessing.snakefile/download_dataset`` 
@ -502,12 +513,6 @@ See `Light Config Code`_

 **Available Platforms:** Android

-**Snakefile entry to compute these features:**
-
-    | ``expand("data/processed/{pid}/light_{day_segment}.csv",``
-    |                      ``pid=config["PIDS"],`` 
-    |                      ``day_segment = config["LIGHT"]["DAY_SEGMENTS"]),``
-    
 **Rule Chain:**

 - **Rule:** ``rules/preprocessing.snakefile/download_dataset`` - See the download_dataset_ rule.
@ -557,12 +562,6 @@ See `Location (Barnett’s) Config Code`_

 **Available Platforms:** Android and iOS

-**Snakefile entry to compute these features:**
-
-    | ``expand("data/processed/{pid}/location_barnett_{segment}.csv",``
-    |                        ``pid=config["PIDS"],``
-    |                        ``segment = config["BARNETT_LOCATION"]["DAY_SEGMENTS"]),``
-    
 **Snakemake rule chain:**

 - Rule ``rules/preprocessing.snakefile/download_dataset``
@ -643,12 +642,6 @@ See `Screen Config Code`_

 **Available Platforms:** Android and iOS

-**Snakefile entry to compute these features:**
-    
-      | ``expand("data/processed/{pid}/screen_{day_segment}.csv",``
-      |                      ``pid=config["PIDS"],`` 
-      |                      ``day_segment = config["SCREEN"]["DAY_SEGMENTS"]),``
-    
 **Snakemake rule chain:**

 - Rule ``rules/preprocessing.snakefile/download_dataset``
@ -701,12 +694,6 @@ See `Conversation Config Code`_

 **Available Platforms:** Android and iOS

-**Snakefile entry to compute these features:**
-    
-     | ``expand("data/processed/{pid}/conversation_{day_segment}.csv",``
-     |                      ``pid = config["PIDS"],``
-     |                       ``day_segment = config["CONVERSATION"]["DAY_SEGMENTS"]),``
-    
 **Snakemake rule chain:**

 - Rule ``rules/preprocessing.snakefile/download_dataset``
@ -721,9 +708,9 @@ See `Conversation Config Code`_
 Name	                     Description
 =========================    ===================
 day_segment                  The particular ``day_segments`` that will be analyzed. The available options are ``daily``, ``morning``, ``afternoon``, ``evening``, ``night``
-recordingMinutes             The current default configuration is 1 min recording/3 min pause.
-features_deltas              Features to be computed, see table below
-pausedMinutes                The current default configuration is 1 min recording/3 min pause.
+recordingMinutes             Minutes the plugin was recording audio (default 1 min)
+pausedMinutes                Minutes the plugin was NOT recording audio (default 3 min)
+features                     Features to be computed, see table below
 =========================    ===================

 .. _conversation-available-features:
@ -733,30 +720,30 @@ pausedMinutes                The current default configuration is 1 min recordin
 =========================   =================   =============
 Name                        Units               Description
 =========================   =================   =============
-minutessilence              minutes             Total duration of all minutes silence.
-minutesnoise                minutes             Total duration of all minutes noise.
-minutesvoice                minutes             Total duration of all minutes voice.
-minutesunknown              minutes             Total duration of all minutes unknown.
-sumconversationduration     minutes             Total duration of all the conversation.
-maxconversationduration     minutes             Longest duration of all the conversation.
-minconversationduration     minutes             Shortest duration of all the conversation.
-avgconversationduration     minutes             Average duration of all the conversation.
-sdconversationduration      minutes             Standard Deviation duration of all the conversation.
-timefirstconversation       minutes             Starting time of first conversation of the Day/Epoch.
-timelastconversation        minutes             Starting time of last conversation of the Day/Epoch.
-sumenergy                   L2-norm             Total sum of all the energy.
-avgenergy                   L2-norm             Average of all the energy.
-sdenergy                    L2-norm             Standard Deviation of all the energy.
-minenergy                   L2-norm             Minimum of all the energy.
-maxenergy                   L2-norm             Maximum of all the energy.
-silencesensedfraction       minutes
-noisesensedfraction         minutes
-voicesensedfraction         minutes
-unknownsensedfraction       minutes
-silenceexpectedfraction     minutes
-noiseexpectedfraction       minutes
-voiceexpectedfraction       minutes
-unknownexpectedfraction     minutes
+minutessilence              minutes             Minutes labeled as silence
+minutesnoise                minutes             Minutes labeled as noise
+minutesvoice                minutes             Minutes labeled as voice
+minutesunknown              minutes             Minutes labeled as unknown
+sumconversationduration     minutes             Total duration of all conversations
+maxconversationduration     minutes             Longest duration of all conversations
+minconversationduration     minutes             Shortest duration of all conversations
+avgconversationduration     minutes             Average duration of all conversations
+sdconversationduration      minutes             Standard Deviation of the duration of all conversations
+timefirstconversation       minutes             Minutes since midnight when the first conversation for a day segment was detected
+timelastconversation        minutes             Minutes since midnight when the last conversation for a day segment was detected
+sumenergy                   L2-norm             Sum of all energy values
+avgenergy                   L2-norm             Average of all energy values
+sdenergy                    L2-norm             Standard Deviation of all energy values
+minenergy                   L2-norm             Minimum of all energy values
+maxenergy                   L2-norm             Maximum of all energy values
+silencesensedfraction                           Ratio between minutessilence and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown)
+noisesensedfraction                             Ratio between minutesnoise and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown)
+voicesensedfraction                             Ratio between minutesvoice and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown)
+unknownsensedfraction                           Ratio between minutesunknown and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown)
+silenceexpectedfraction                         Ration between minutessilence and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes)
+noiseexpectedfraction                           Ration between minutesnoise and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes)
+voiceexpectedfraction                           Ration between minutesvoice and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes)
+unknownexpectedfraction                         Ration between minutesunknown and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes)
 =========================   =================   =============

 **Assumptions/Observations:** 
@ -774,13 +761,6 @@ See `Fitbit: Sleep Config Code`_
 **Available Epochs (day_segment) :** daily

 **Available Platforms:**: Fitbit
-
-**Snakefile entry to compute these features:**
-
-    | ``expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv",``
-    |                      ``pid = config["PIDS"],``
-    |                      ``day_segment = config["SLEEP"]["DAY_SEGMENTS"]),``
-
    
 **Snakemake rule chain:**

@ -818,25 +798,25 @@ countepisode               episodes       Number of sleep episodes for ``sleep_t

 **Assumptions/Observations:** 

-The `fitbit_with_datetime` rule will extract Summary data (`fitbit_sleep_summary_with_datetime.csv`) Intraday data (`fitbit_sleep_intraday_with_datetime.csv`). There are two versions of Fitbit's sleep API(`version 1`_ and `version 1.2`_), and each provides raw sleep data with different formats. 
+Only features from summary data are available at the momement.

-The differences between both API versions are:
+The `fitbit_with_datetime` rule will extract Summary data (`fitbit_sleep_summary_with_datetime.csv`) and Intraday data (`fitbit_sleep_intraday_with_datetime.csv`). There are two versions of Fitbit's sleep API (`version 1`_ and `version 1.2`_), and each provides raw sleep data in a different format:
    
-    - Sleep level. In `v1`, it is an integer with three possible values {1, 2, 3} while in `v1.2` it is a string. We convert integer levels of `v1` to strings: "asleep", "restless" or "awake" respectively.
-    - Count summaries. For Summary data, `v1` contains "count_awake", "duration_awake", "count_awakenings", "count_restless", and "duration_restless" fields in the summary of each sleep record while `v1.2` does not.
-    - Types of sleep records. `v1.2` has two types of sleep records: "classic" and "stages". The "classic" type contains three sleep levels: "awake", "restless" and "asleep". The "stages" type contains four sleep levels {"wake", "deep", "light", "rem"}. Sleep records from `v1` will have the same sleep levels as `v1.2` classic types; therefore we set their type to "classic".
+    - Sleep level. In ``v1``, sleep level is an integer with three possible values (1, 2, 3) while in ``v1.2`` is a string. We convert integer levels to strings, "asleep", "restless" or "awake" respectively.
+    - Count summaries. For Summary data, ``v1`` contains "count_awake", "duration_awake", "count_awakenings", "count_restless", and "duration_restless" fields for every sleep record while ``v1.2`` does not.
+    - Types of sleep records. ``v1.2`` has two types of sleep records: "classic" and "stages". The "classic" type contains three sleep levels: "awake", "restless" and "asleep". The "stages" type contains four sleep levels: "wake", "deep", "light", and "rem". Sleep records from ``v1`` will have the same sleep levels as `v1.2` classic type; therefore we set their type to "classic".
    - Unified level of sleep. For intraday data, we unify sleep levels of each sleep record with a column named "unified_level". Based on `this Fitbit forum post`_ , we merge levels into two categories:
-        - For the "classic" type: unified_level is one of {0, 1} where 0 means awake and groups "awake" + "restless", while 1 means asleep and groups "asleep".
-        - For the "stages" type, unified_level is one of {0, 1} where 0 means awake and groups "wake" while 1 means asleep and groups "deep" + "light" + "rem".
-    - Short Data. In `v1.2`, records of type "stages" contain "shortData" in addition to "data". We merge "data" part and "shortData" part to extract intraday data. 
-        - The "data" grouping displays the sleep stages and any wake periods > 3 minutes (180 seconds).
-        - The "shortData" grouping displays the short wake periods representing physiological awakenings that are <= 3 minutes (180 seconds).
-    - The following columns of Summary data are not computed by RAPIDS but taken directly from columns with a similar name provided by the API: `efficiency`, `minutes_after_wakeup`, `minutes_asleep`, `minutes_awake`, `minutes_to_fall_asleep`, `minutes_in_bed`, `is_main_sleep` and `type`
-    - The following columns of Intraday data are not computed by RAPIDS but taken directly from columns with a similar name provided by the API: `original_level`, `is_main_sleep` and `type`. We compute `unified_level` as explained above.
+      - For the "classic" type unified_level is one of {0, 1} where 0 means awake and groups "awake" + "restless", while 1 means asleep and groups "asleep".
+      - For the "stages" type, unified_level is one of {0, 1} where 0 means awake and groups "wake" while 1 means asleep and groups "deep" + "light" + "rem".
+    - Short Data. In ``v1.2``, records of type "stages" contain "shortData" in addition to "data". We merge both to extract intraday data. 
+      - "data" contains sleep stages and any wake periods > 3 minutes (180 seconds).
+      - "shortData" contains short wake periods representing physiological awakenings that are <= 3 minutes (180 seconds).
+    - The following columns of Summary data are not computed by RAPIDS but taken directly from columns with a similar name provided by Fitbit's API: `efficiency`, `minutes_after_wakeup`, `minutes_asleep`, `minutes_awake`, `minutes_to_fall_asleep`, `minutes_in_bed`, `is_main_sleep` and `type`
+    - The following columns of Intraday data are not computed by RAPIDS but taken directly from columns with a similar name provided by Fitbit's API: `original_level`, `is_main_sleep` and `type`. We compute `unified_level` as explained above.

-Detailed sleep data is stored in Intraday data every 30 seconds (for "stages" type) or 60 seconds (for "classic" type) while a summary is stored in Summary data. For example:
+These are examples of intraday and summary data:

- Intraday data
+- Intraday data (at 30-second intervals for "stages" type or 60-second intervals for "classic" type)

 =========    ==============    =============    =============    ======    ===================    ==========    ===========    =========    =================    ==========    ==========    ============    =================
 device_id    original_level    unified_level    is_main_sleep    type      local_date_time        local_date    local_month    local_day    local_day_of_week    local_time    local_hour    local_minute    local_day_segment
@ -868,13 +848,6 @@ See `Fitbit: Heart Rate Config Code`_

 **Available Platforms:**: Fitbit

-**Snakefile entry to compute these features:**
-
-
-    | ``expand("data/processed/{pid}/fitbit_heartrate_{day_segment}.csv",``
-    |                      ``pid=config["PIDS"],`` 
-    |                      ``day_segment = config["HEARTRATE"]["DAY_SEGMENTS"]),``
-    
 **Snakemake rule chain:**

 - Rule ``rules/preprocessing.snakefile/download_dataset``
@ -910,7 +883,7 @@ stdhr                beats/mins     The standard deviation of heart rate during
 diffmaxmodehr        beats/mins     The difference between the maximum and mode heart rate during ``day_segment`` epoch.
 diffminmodehr        beats/mins     The difference between the mode and minimum heart rate during ``day_segment`` epoch.
 entropyhr            nats           Shannon’s entropy measurement based on heart rate during ``day_segment`` epoch.
-lengthZONE           minutes        Number of minutes the user's heartrate fell within each ``heartrate_zone`` during ``day_segment`` epoch.
+minutesonZONE        minutes        Number of minutes the user's heartrate fell within each ``heartrate_zone`` during ``day_segment`` epoch.
 ==================   ===========    =============

 **Assumptions/Observations:** 
@ -930,12 +903,6 @@ See `Fitbit: Steps Config Code`_

 **Available Platforms:**: Fitbit

-**Snakefile entry to compute these features:**
-
-    | ``expand("data/processed/{pid}/fitbit_step_{day_segment}.csv",``
-    |                      ``pid=config["PIDS"],`` 
-    |                      ``day_segment = config["STEP"]["DAY_SEGMENTS"]),``
-    
 **Snakemake rule chain:**

 - Rule ``rules/preprocessing.snakefile/download_dataset``
@ -985,7 +952,7 @@ Active and sedentary bouts. If the step count per minute is smaller than ``THRES

 .. -------------------------Links ------------------------------------ ..

-.. _SENSORS: https://github.com/carissalow/rapids/blob/f22d1834ee24ab3bcbf051bc3cc663903d822084/config.yaml#L2
+.. _TABLES_FOR_SENSED_BINS: https://github.com/carissalow/rapids/blob/f22d1834ee24ab3bcbf051bc3cc663903d822084/config.yaml#L2
 .. _`SMS Config Code`: https://github.com/carissalow/rapids/blob/f22d1834ee24ab3bcbf051bc3cc663903d822084/config.yaml#L38
 .. _AWARE: https://awareframework.com/what-is-aware/
 .. _`List of Timezones`: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
--- a/docs/usage/installation.rst
+++ b/docs/usage/installation.rst
@ -154,14 +154,9 @@ Once RAPIDS is installed, follow these steps to start processing mobile data.
            Participant01
            2020/02/01,2020/03/03

-#. Configure the sensors to process:
+#. Choose what features to extract:

-   - See :ref:`Minimal Working Example<minimal-working-example>`. The variable ``SENSORS`` in the ``config.yaml`` file_ should match existent sensor tables in your Aware database (See :ref:`rapids-structure` for more information). Each sensor in this list will be processed in RAPIDS.
-
-
-    .. note::
-
-        It is beneficial to list all collected sensors even if you don't plan to include them in a model later on in the pipeline. This is because we use all data available to estimate whether the phone was sensing data or not (i.e. to know if Aware crashed or the battery died). See :ref:`PHONE_VALID_SENSED_DAYS<phone-valid-sensed-days>` for more information.
+   - See :ref:`Minimal Working Example<minimal-working-example>`.

 #. Execute RAPIDS

--- a/docs/usage/introduction.rst
+++ b/docs/usage/introduction.rst
@ -13,6 +13,7 @@ Available features:
 - :ref:`applications-foreground-sensor-doc`
 - :ref:`battery-sensor-doc`
 - :ref:`bluetooth-sensor-doc`
+- :ref:`wifi-sensor-doc`
 - :ref:`call-sensor-doc`
 - :ref:`activity-recognition-sensor-doc`
 - :ref:`light-doc`
--- a/docs/usage/quick_rule.rst
+++ b/docs/usage/quick_rule.rst
@ -3,7 +3,7 @@
 Minimal Working Example 
 =======================

-The following is a quick guide for creating and running a simple pipeline to extract Call metrics for daily and night epochs of one participant monitored on the US East coast.
+This is a quick guide for creating and running a simple pipeline to extract call features for daily and night epochs of one participant monitored on the US East coast.

 #. Make sure your database connection credentials in ``.env`` are correct. See step 1 of :ref:`Usage Section <db-configuration>`.

@ -11,33 +11,9 @@ The following is a quick guide for creating and running a simple pipeline to ext

 #. Make sure your Conda (python) environment is active. See step 6 of :ref:`install-page`.

-#. Replace the contents of the ``Snakefile`` with the following snippet
-    
-    ::
-
-        configfile: "config.yaml"
-        include: "rules/renv.snakefile"
-        include: "rules/preprocessing.snakefile"
-        include: "rules/features.snakefile"
-        include: "rules/reports.snakefile"
-
-        rule all:
-            input:
-                expand("data/processed/{pid}/call_{call_type}_{day_segment}.csv",
-                                pid=config["PIDS"], 
-                                call_type=config["CALLS"]["TYPES"],
-                                day_segment = config["CALLS"]["DAY_SEGMENTS"]),
-
-
 #. Modify the following settings in the ``config.yaml`` file with the values shown below (leave all other settings as they are)

    ::
-
-        SENSORS: [calls]
-
-        FITBIT_TABLE: []
-        FITBIT_SENSORS: []
-
        PIDS: [p01]
        
        DAY_SEGMENTS: &day_segments
@ -47,7 +23,11 @@ The following is a quick guide for creating and running a simple pipeline to ext
            America/New_York
        
        DATABASE_GROUP: &database_group
-            MY_GROUP
+            MY_GROUP (change this if you added your DB credentials to .env with a different label)
+
+        CALLS:
+            COMPUTE: True
+            DB_TABLE: calls (only change DB_TABLE if your database calls table has a different name)
    
    For more information on the ``calls`` sensor see :ref:`call-sensor-doc`

--- a/docs/usage/snakemake_docs.rst
+++ b/docs/usage/snakemake_docs.rst
@ -3,11 +3,25 @@
 RAPIDS Structure
 =================

+.. _the-config-file:
+
+The ``config.yaml`` File
+------------------------
+
+RAPIDS configuration settings are defined in ``config.yaml`` (See `config.yaml`_). This is the only file that you need to understand in order to compute the features that RAPIDS ships with. 
+
+It has global settings like ``TABLES_FOR_SENSED_BINS``, ``PIDS``, ``DAY_SEGMENTS``, among others (see :ref:`global-sensor-doc` for more information). As well as per sensor settings, for example, for the :ref:`sms-sensor-doc`::
+
+      | ``MESSAGES:``
+      |     ``COMPUTE: True``
+      |      ``DB_TABLE: messages``
+      |      ``...``
+
 .. _the-snakefile-file:

 The ``Snakefile`` File
 ----------------------
-The ``Snakefile`` file (see the actual `Snakefile`_) pulls the entire system together and can be thought of as the menu of RAPIDS allowing the user to define the sensor data that is desired. The first line in this file identifies the configuration file. Next are a list of included files that define the rules used to pull, clean, process, analyze and report on the data. Next is the ``all`` rule that list the sensor data (menu items) that would be processed by the pipeline. 
+The ``Snakefile`` file (see the actual `Snakefile`_) pulls the entire system together. The first line in this file identifies the configuration file. Next are a list of included directives that import the rules used to pull, clean, process, analyze and report data. Finally, the ``all`` rule lists the files that need to be computed (raw files, intermediate files, feature files, reports, etc). 

 .. _includes-section:

@ -15,22 +29,20 @@ Includes
 """""""""
 There are 5 included files in the ``Snakefile`` file. 

-    - ``renv.snakefile`` - This file defines the rules to manager the R packages that are used by RAPIDS. (See `renv`_)
-    - ``preprocessing.snakefile`` - This file contains the rules that are used to preprocess the data such as downloading, cleaning and formatting. (See `preprocessing`_)
-    - ``features.snakefile`` - This file contains the rules that used for behavioral feature extraction. (See `features`_)
-    - ``models.snakefile`` - This file contains the rules that are used to build models from features that have been extreacted from the sensor data. (See `models`_)
-    - ``reports.snakefile`` - The file contains the rules that are used to produce the reports based on the models produced. (See `reports`_)
-    - ``mystudy.snakefile`` - The file contains the rules that you add that are specifically tailored to your project/study. (See `mystudy`_)
-
-..  - ``analysis.snakefile`` - The rules that define how the data is analyzed is outlined in this file. (see `analysis <https://github.com/carissalow/rapids/blob/master/rules/analysis.snakefile>`_)
+    - ``renv.snakefile`` - Rules to create, backup and restore the R renv virtual environment for RAPIDS. (See `renv`_)
+    - ``preprocessing.snakefile`` - Rules that are used to pre-preprocess the data such as downloading, cleaning and formatting. (See `preprocessing`_)
+    - ``features.snakefile`` - Rules that used for behavioral feature extraction. (See `features`_)
+    - ``models.snakefile`` - Rules that are used to build models from features that have been extreacted from the sensor data. (See `models`_)
+    - ``reports.snakefile`` - Rules that are used to produce reports and visualizations. (See `reports`_)
+    - ``mystudy.snakefile`` - Example file that contains rules specific to your project/study. (See `mystudy`_)
    
-Includes are relative to the directory of the Snakefile in which they occur. For example, if above Snakefile resides in the directory ``my/dir``, then Snakemake will search for the include file at ``my/dir/path/to/other/snakefile``, regardless of the working directory.
+Includes are relative to the root directory.

 .. _rule-all-section:

 ``Rule all:``
 """""""""""""
-In RAPIDS the ``all`` rule indirectly specifies the features/sensors that are desired by listing the output files of the pipeline using the ``expand`` directive. The ``expand`` function allows the combination of different variables. Consider the following::
+In RAPIDS the ``all`` rule lists the output files we expect the pipeline to compute using the ``expand`` directive. The ``expand`` function allows us to generate a list of file paths that have a common structure except for PIDS or other parameters. Consider the following::

    expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SENSORS"]),

@ -38,59 +50,33 @@ If ``pids = ['p01','p02']`` and ``sensor = ['sms', 'calls']`` then the above dir

    ["data/raw/p01/sms_raw.csv", "data/raw/p01/calls_raw.csv", "data/raw/p02/sms_raw.csv", "data/raw/p02/calls_raw.csv"]

-Thus, this allows the user of RAPIDS to define all of the desired output files without having to manually list all for the participants of the research. The way Snakemake works is that it looks for the rule that produces the desired output files and then executes that rule. For more information on ``expand`` see `The Expand Function`_
+Thus, this allows us to define all the desired output files without having to manually list each path for every participant and every sensor. The way Snakemake works is that it looks for the rule that produces the desired output files and then executes that rule. For more information on ``expand`` see `The Expand Function`_


 .. _the-env-file:

 The ``.env`` File
 -------------------
-The database credentials for database server is placed in the .env file (Remember step 9 on :ref:`install-page` page). The format of the configurations are shown below::
+Your database credentials are stored in the ``.env`` file (See :ref:`install-page`)::

    [MY_GROUP_NAME]
    user=MyUSER
    password=MyPassword
-    host=MyIP
+    host=MyIP/DOMAIN
    port=3306

-
-.. _the-config-file:
-
-The ``config.yaml`` File
------------------------
-
-The configurations for the pipeline are defined in the ``config.yaml`` (See `config.yaml`_). This contains global settings and variables that are used by the rules. Some of the global variables defined in the ``config.yaml`` file are briefly explained below:
-
-    - ``SENSORS`` - This is a global variable that contains a list of the sensor/feature tables in the database that will be analyzed.
-    - ``PIDS`` - This is the list of the participant IDs to include in the analysis. Create a file for each participant with a matching name ``pXXX`` containing the device_id in the ``data/external/`` directory. (Remember step 8 on the :ref:`install-page` page)
-    - ``DAY_SEGMENTS`` - A variable used to list all of the common day segments. 
-    - ``TIMEZONE`` - Time variable. Use timezone names from the `List of Timezone`_ and double check your code, for example EST is not US Eastern Time.
-    - ``DATABASE_GROUP`` - Label for the database credentials group. (See :ref:`Configure the database connection <db-configuration>`.)
-    - ``DOWNLOAD_DATASET`` - Variable used to store the name of the dataset that will be download for analysis. 
-
-There are a number of other settings that are specific to the sensor/feature that will be pulled and analyzed by the pipeline. An example of the configuration settings for the :ref:`sms-sensor-doc` data is shown below::
-
-    SMS:
-        TYPES : [received, sent]
-        FEATURES: 
-            received: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
-            sent: [count, distinctcontacts, timefirstsms, timelastsms, countmostfrequentcontact]
-        DAY_SEGMENTS: *day_segments  
-
-The ``TYPES`` setting defines the type of SMS data that will be analyzed. ``FEATURES`` defines the features of the data for each the type of SMS data being analyzed. Finally, ``DAY_SEGMENTS`` list the day segment (times of day) that the data is captured.
-
 .. _rules-syntax:

 The ``Rules`` Directory 
 ------------------------

-The ``rules`` directory contains the ``snakefiles`` that were included in the ``Snakefile`` file. A short description of these files are given in the :ref:`includes-section` section. 
+The ``rules`` directory contains the ``snakefiles`` that were included in the main ``Snakefile`` file. A short description of these files are given in the :ref:`includes-section` section. 


 Rules
 """"""

-A Snakemake workflow is defined by specifying rules in a ``Snakefile`` (See the features_ snakefile as an actual example). Rules decompose the workflow into small steps (e.g., the application of a single tool) by specifying how to create sets of output files from sets of input files. Snakemake automatically determines the dependencies between the rules by matching file names. Thus, a rule can consist of a name, input files, output files, and a command to generate the output from the input. The following is the basic structure of a Snakemake rule::
+A Snakemake workflow is defined by rules (See the features_ snakefile as an actual example). Rules decompose the workflow into small steps by specifying what output files should be created by running a script on a set of input files. Snakemake automatically determines the dependencies between the rules by matching file names. Thus, a rule can consist of a name, input files, output files, and a command to generate the output from the input. The following is the basic structure of a Snakemake rule::

    rule NAME:
        input: "path/to/inputfile", "path/to/other/inputfile"
@ -113,17 +99,17 @@ A sample rule from the RAPIDS source code is shown below::
            "../src/features/sms_features.R"


-The ``rule`` directive specifies the name of the rule that is being defined. ``params`` defines the additional parameters that needs to be set for the rule. In the example immediately above, the parameters will be pasted to the script defined in the ``script`` directive of the rule. Instead of ``script`` a ``shell`` command call can also be called by replacing the ``script`` directive of the rule and replacing it with the lines similar to the folllowing::
+The ``rule`` directive specifies the name of the rule that is being defined. ``params`` defines additional parameters for the rule's script. In the example above, the parameters are passed to the ``sms_features.R`` script as an dictionary. Instead of ``script`` a ``shell`` command call can also be called by replacing the ``script`` directive of the rule and replacing it with::

        shell: "somecommand {input} {output}"

-Here input and output (and in general any list or tuple) automatically evaluate to a space-separated list of files (i.e. ``path/to/inputfile path/to/other/inputfile``).  It should be noted that rules can defined without input and output as seen in the ``renv.snakemake``. For more information see `Rules documentation`_ and for an actual example see the `renv`_ snakefile.
+It should be noted that rules can defined without input and output as seen in the ``renv.snakemake``. For more information see `Rules documentation`_ and for an actual example see the `renv`_ snakefile.

 .. _wildcards:

 Wildcards
 """"""""""
-There are times that it would be useful to generalize a rule to be applicable to a number of e.g. datasets. For this purpose, wildcards can be used. Consider the sample code from above again repeated below for quick reference.::
+There are times when the same rule should be applied to different participants and day segments. For this we use wildcards ``{my_wildcard}``. All wildcards are inferred from the files listed in the ``all` rule of the ``Snakefile`` file and therfore from the output of any rule::

    rule sms_features:
        input: 
@ -147,10 +133,10 @@ The ``data`` Directory

 This directory contains the data files for the project. These directories are as follows:

-    - ``external`` - This directory stores the participant `pxxx` files that contains the device_id and the type of device as well as data from third party sources. (Remember step 8 on :ref:`install-page` page)
-    - ``raw`` - This directory contains the original, immutable data dump from the sensor database.
-    - ``interim`` - This directory would contain intermediate data that has been transformed but has not been completely analyzed.
-    - ``processed`` - This directory contains the final canonical data sets for modeling.
+    - ``external`` - This directory stores the participant `pxxx` files as well as data from third party sources (see :ref:`install-page` page).
+    - ``raw`` - This directory contains the original, immutable data dump from your database.
+    - ``interim`` - This directory contains intermediate data that has been transformed but do not represent features.
+    - ``processed`` - This directory contains all behavioral features.


 .. _the-src-directory:
@ -158,12 +144,12 @@ This directory contains the data files for the project. These directories are as
 The ``src`` Directory
 ----------------------

-The ``src`` directory holds all of the scripts used by the pipeline for data manipulation. These scripts can be in any programming language including but not limited to Python_, R_ and Julia_. This directory is organized into the following directories:
+The ``src`` directory holds all the scripts used by the pipeline for data manipulation. These scripts can be in any programming language including but not limited to Python_, R_ and Julia_. This directory is organized into the following directories:

    - ``data`` - This directory contains scripts that are used to download and preprocess raw data that will be used in analysis. See `data directory`_
    - ``features`` - This directory contains scripts to extract behavioral features. See `features directory`_
-    - ``models`` - This directory contains the model scripts for building and training models. See `models directory`_
-    - ``visualization`` - This directory contains the scripts to create plots and reports that visualize the results of the models. See `visualization directory`_
+    - ``models`` - This directory contains the scripts for building and training models. See `models directory`_
+    - ``visualization`` - This directory contains the scripts to create plots and reports. See `visualization directory`_


 .. _the-report-directory:
@ -171,7 +157,7 @@ The ``src`` directory holds all of the scripts used by the pipeline for data man
 The ``reports`` Directory
 --------------------------

-This contains the reports of the results of the analysis done by the pipeline. 
+This directory contains reports and visualizations. 

    .. _Python: https://www.python.org/
    .. _Julia: https://julialang.org/
--- a/rules/features.snakefile
+++ b/rules/features.snakefile
@ -2,55 +2,57 @@ def optional_ar_input(wildcards):
    with open("data/external/"+wildcards.pid, encoding="ISO-8859-1") as external_file:
        external_file_content = external_file.readlines()
    platform = external_file_content[1].strip()
-    if platform == "android":
-        return ["data/raw/{pid}/plugin_google_activity_recognition_with_datetime_unified.csv",
-                "data/processed/{pid}/plugin_google_activity_recognition_deltas.csv"]
+    if platform == "android": 
+        return ["data/raw/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_with_datetime_unified.csv",
+                "data/processed/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_deltas.csv"]
+    elif platform == "ios":
+        return ["data/raw/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_with_datetime_unified.csv",
+                "data/processed/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_deltas.csv"]
    else:
-        return ["data/raw/{pid}/plugin_ios_activity_recognition_with_datetime_unified.csv",
-                "data/processed/{pid}/plugin_ios_activity_recognition_deltas.csv"]
+        return []

 def optional_conversation_input(wildcards):
    with open("data/external/"+wildcards.pid, encoding="ISO-8859-1") as external_file:
        external_file_content = external_file.readlines()
    platform = external_file_content[1].strip()
    if platform == "android":
-        return ["data/raw/{pid}/plugin_studentlife_audio_android_with_datetime.csv"]
+        return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "_with_datetime.csv"]
    else:
-        return ["data/raw/{pid}/plugin_studentlife_audio_with_datetime.csv"]
+        return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "_with_datetime.csv"]

 def optional_location_input(wildcards):
    if config["BARNETT_LOCATION"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED":
-        return rules.resample_fused_location.output
+        return expand("data/raw/{{pid}}/{sensor}_resampled.csv", sensor=config["BARNETT_LOCATION"]["DB_TABLE"])
    else:
-        return "data/raw/{pid}/locations_with_datetime.csv",
+        return expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BARNETT_LOCATION"]["DB_TABLE"])

-rule sms_features:
+rule messages_features:
    input: 
-        "data/raw/{pid}/messages_with_datetime.csv"
+        expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"])
    params:
-        sms_type = "{sms_type}",
+        messages_type = "{messages_type}",
        day_segment = "{day_segment}",
-        features = lambda wildcards: config["SMS"]["FEATURES"][wildcards.sms_type]
+        features = lambda wildcards: config["MESSAGES"]["FEATURES"][wildcards.messages_type]
    output:
-        "data/processed/{pid}/sms_{sms_type}_{day_segment}.csv"
+        "data/processed/{pid}/messages_{messages_type}_{day_segment}.csv"
    script:
-        "../src/features/sms_features.R"
+        "../src/features/messages_features.R"

 rule call_features:
    input: 
-        "data/raw/{pid}/calls_with_datetime_unified.csv"
+        expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"])
    params:
        call_type = "{call_type}",
        day_segment = "{day_segment}",
        features = lambda wildcards: config["CALLS"]["FEATURES"][wildcards.call_type]
    output:
-        "data/processed/{pid}/call_{call_type}_{day_segment}.csv"
+        "data/processed/{pid}/calls_{call_type}_{day_segment}.csv"
    script:
        "../src/features/call_features.R"

 rule battery_deltas:
    input:
-        "data/raw/{pid}/battery_with_datetime_unified.csv"
+        expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["BATTERY"]["DB_TABLE"])
    output:
        "data/processed/{pid}/battery_deltas.csv"
    script:
@ -58,7 +60,7 @@ rule battery_deltas:

 rule screen_deltas:
    input:
-        screen = "data/raw/{pid}/screen_with_datetime.csv",
+        screen = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["SCREEN"]["DB_TABLE"]),
        participant_info = "data/external/{pid}"
    output:
        "data/processed/{pid}/screen_deltas.csv"
@ -67,17 +69,17 @@ rule screen_deltas:

 rule google_activity_recognition_deltas:
    input:
-        "data/raw/{pid}/plugin_google_activity_recognition_with_datetime_unified.csv"
+        expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
    output:
-        "data/processed/{pid}/plugin_google_activity_recognition_deltas.csv"
+        expand("data/processed/{{pid}}/{sensor}_deltas.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
    script:
        "../src/features/activity_recognition_deltas.R"

 rule ios_activity_recognition_deltas:
    input:
-        "data/raw/{pid}/plugin_ios_activity_recognition_with_datetime_unified.csv"
+        expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
    output:
-        "data/processed/{pid}/plugin_ios_activity_recognition_deltas.csv"
+        expand("data/processed/{{pid}}/{sensor}_deltas.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
    script:
        "../src/features/activity_recognition_deltas.R"

@ -98,7 +100,7 @@ rule location_barnett_features:

 rule bluetooth_features:
    input: 
-        "data/raw/{pid}/bluetooth_with_datetime.csv"
+        expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"])
    params:
        day_segment = "{day_segment}",
        features = config["BLUETOOTH"]["FEATURES"]
@ -146,7 +148,7 @@ rule screen_features:

 rule light_features:
    input:
-        "data/raw/{pid}/light_with_datetime.csv",
+        expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"]),
    params:
        day_segment = "{day_segment}",
        features = config["LIGHT"]["FEATURES"],
@ -170,7 +172,7 @@ rule conversation_features:

 rule accelerometer_features:
    input:
-        "data/raw/{pid}/accelerometer_with_datetime.csv",
+        expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["ACCELEROMETER"]["DB_TABLE"]),
    params:
        day_segment = "{day_segment}",
        magnitude = config["ACCELEROMETER"]["FEATURES"]["MAGNITUDE"],
@ -184,7 +186,7 @@ rule accelerometer_features:

 rule applications_foreground_features:
    input:
-        "data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv",
+        expand("data/interim/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"])
    params:
        day_segment = "{day_segment}",
        single_categories = config["APPLICATIONS_FOREGROUND"]["SINGLE_CATEGORIES"],
@ -200,7 +202,7 @@ rule applications_foreground_features:

 rule wifi_features:
    input: 
-        "data/raw/{pid}/wifi_with_datetime.csv"
+        expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"])
    params:
        day_segment = "{day_segment}",
        features = config["WIFI"]["FEATURES"]
@ -224,7 +226,7 @@ rule fitbit_heartrate_features:

 rule fitbit_step_features:
    input:
-        step_data = "data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv"
+        step_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv"
    params:
        day_segment = "{day_segment}",
        features_all_steps = config["STEP"]["FEATURES"]["ALL_STEPS"],
--- a/rules/preprocessing.snakefile
+++ b/rules/preprocessing.snakefile
@ -19,6 +19,9 @@ rule download_dataset:
    script:
        "../src/data/download_dataset.R"

+PHONE_SENSORS = []
+PHONE_SENSORS.extend([config["MESSAGES"]["DB_TABLE"], config["CALLS"]["DB_TABLE"], config["BARNETT_LOCATION"]["DB_TABLE"], config["BLUETOOTH"]["DB_TABLE"], config["BATTERY"]["DB_TABLE"], config["SCREEN"]["DB_TABLE"], config["LIGHT"]["DB_TABLE"], config["ACCELEROMETER"]["DB_TABLE"], config["APPLICATIONS_FOREGROUND"]["DB_TABLE"],config["WIFI"]["DB_TABLE"], config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]])
+
 rule readable_datetime:
    input:
        sensor_input = rules.download_dataset.output
@ -26,7 +29,7 @@ rule readable_datetime:
        timezones = None,
        fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"]
    wildcard_constraints:
-        sensor = '(' + '|'.join([re.escape(x) for x in config["SENSORS"]]) + ')' # only process smartphone sensors, not fitbit
+        sensor = '.*(' + '|'.join([re.escape(x) for x in PHONE_SENSORS]) + ').*' # only process smartphone sensors, not fitbit
    output:
        "data/raw/{pid}/{sensor}_with_datetime.csv"
    script:
@ -34,7 +37,7 @@ rule readable_datetime:

 rule phone_valid_sensed_days:
    input:
-        all_sensors =  expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["SENSORS"])
+        all_sensors =  expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["TABLES_FOR_SENSED_BINS"])
    params:
        bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"],
        min_valid_hours = config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS"],
@ -46,7 +49,7 @@ rule phone_valid_sensed_days:

 rule phone_sensed_bins:
    input:
-        all_sensors =  expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["SENSORS"])
+        all_sensors =  expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["TABLES_FOR_SENSED_BINS"])
    params:
        bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"]
    output:
@ -67,7 +70,7 @@ rule unify_ios_android:

 rule resample_fused_location:
    input:
-        locations = "data/raw/{pid}/locations_raw.csv",
+        locations = "data/raw/{pid}/{sensor}_raw.csv",
        phone_sensed_bins = rules.phone_sensed_bins.output
    params:
        bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"],
@ -75,32 +78,54 @@ rule resample_fused_location:
        consecutive_threshold = config["RESAMPLE_FUSED_LOCATION"]["CONSECUTIVE_THRESHOLD"],
        time_since_valid_location = config["RESAMPLE_FUSED_LOCATION"]["TIME_SINCE_VALID_LOCATION"]
    output:
-        "data/raw/{pid}/locations_resampled.csv"
+        "data/raw/{pid}/{sensor}_resampled.csv"
    script:
        "../src/data/resample_fused_location.R"

 rule application_genres:
    input:
-        "data/raw/{pid}/applications_foreground_with_datetime.csv"
+        "data/raw/{pid}/{sensor}_with_datetime.csv"
    params:
        catalogue_source = config["APPLICATION_GENRES"]["CATALOGUE_SOURCE"],
        catalogue_file = config["APPLICATION_GENRES"]["CATALOGUE_FILE"],
        update_catalogue_file = config["APPLICATION_GENRES"]["UPDATE_CATALOGUE_FILE"],
        scrape_missing_genres = config["APPLICATION_GENRES"]["SCRAPE_MISSING_GENRES"]
    output:
-        "data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv"
+        "data/interim/{pid}/{sensor}_with_datetime_with_genre.csv"
    script:
        "../src/data/application_genres.R"

-rule fitbit_with_datetime:
+rule fitbit_heartrate_with_datetime:
    input:
-        expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["FITBIT_TABLE"])
+        expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["HEARTRATE"]["DB_TABLE"])
    params:
        local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
-        fitbit_sensor = "{fitbit_sensor}"
+        fitbit_sensor = "heartrate"
    output:
-        summary_data = "data/raw/{pid}/fitbit_{fitbit_sensor}_summary_with_datetime.csv",
-        intraday_data = "data/raw/{pid}/fitbit_{fitbit_sensor}_intraday_with_datetime.csv"
+        summary_data = "data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv",
+        intraday_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv"
    script:
        "../src/data/fitbit_readable_datetime.py"

+rule fitbit_step_with_datetime:
+    input:
+        expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["STEP"]["DB_TABLE"])
+    params:
+        local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
+        fitbit_sensor = "steps"
+    output:
+        intraday_data = "data/raw/{pid}/fitbit_step_intraday_with_datetime.csv"
+    script:
+        "../src/data/fitbit_readable_datetime.py"
+
+rule fitbit_sleep_with_datetime:
+    input:
+        expand("data/raw/{{pid}}/{fitbit_table}_raw.csv", fitbit_table=config["SLEEP"]["DB_TABLE"])
+    params:
+        local_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
+        fitbit_sensor = "sleep"
+    output:
+        summary_data = "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv",
+        intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
+    script:
+        "../src/data/fitbit_readable_datetime.py"
--- a/rules/reports.snakefile
+++ b/rules/reports.snakefile
@ -48,9 +48,12 @@ rule battery_consumption_rates_barchart:
    script:
        "../src/visualization/battery_consumption_rates_barchart.py"

+PHONE_SENSORS = []
+PHONE_SENSORS.extend([config["MESSAGES"]["DB_TABLE"], config["CALLS"]["DB_TABLE"], config["BARNETT_LOCATION"]["DB_TABLE"], config["BLUETOOTH"]["DB_TABLE"], config["BATTERY"]["DB_TABLE"], config["SCREEN"]["DB_TABLE"], config["LIGHT"]["DB_TABLE"], config["ACCELEROMETER"]["DB_TABLE"], config["APPLICATIONS_FOREGROUND"]["DB_TABLE"],config["WIFI"]["DB_TABLE"]])
+
 rule compliance_report:
    input:
-        sensor_heatmaps =  expand("reports/figures/{{pid}}/{sensor}_heatmap_rows.html", sensor=config["SENSORS"]),
+        sensor_heatmaps =  expand("reports/figures/{{pid}}/{sensor}_heatmap_rows.html", sensor=PHONE_SENSORS),
        compliance_heatmap =  rules.compliance_heatmap.output
    output:
        "reports/compliance/{pid}/compliance_report.html",
--- a/src/data/fitbit_readable_datetime.py
+++ b/src/data/fitbit_readable_datetime.py
@ -45,8 +45,9 @@ elif sensor == "steps":
 elif sensor == "calories":
    summary_data, intraday_data = parseCaloriesData(data, HOUR2EPOCH)
 else:
-    raise ValueError("Please check the FITBIT_SENSORS list in config.yaml file.")
+    raise ValueError("We only support heartrate, sleep, step, or calories sensors on Fitbit devices.")

-# Summary data will be empty for steps and calories as it is not provided by Fitbit's API
-summary_data.to_csv(snakemake.output["summary_data"], index=False)
+# Summary data does not exist for steps and calories as it is not provided by Fitbit's API
+if sensor == "heartrate" or sensor == "sleep":
+    summary_data.to_csv(snakemake.output["summary_data"], index=False)
 intraday_data.to_csv(snakemake.output["intraday_data"], index=False)
--- a/src/features/messages/messages_base.R
+++ b/src/features/messages/messages_base.R
@ -1,5 +1,3 @@
-library('tidyr')
-
 filter_by_day_segment <- function(data, day_segment) {
  if(day_segment %in% c("morning", "afternoon", "evening", "night"))
    data <- data %>% filter(local_day_segment == day_segment)
@ -31,7 +29,7 @@ base_sms_features <- function(sms, sms_type, day_segment, requested_features){

    for(feature_name in features_to_compute){
        if(feature_name == "countmostfrequentcontact"){
-                        # Get the number of messages for the most frequent contact throughout the study
+            # Get the number of messages for the most frequent contact throughout the study
            mostfrequentcontact <- sms %>% 
                group_by(trace) %>% 
                mutate(N=n()) %>% 
@ -45,17 +43,6 @@ base_sms_features <- function(sms, sms_type, day_segment, requested_features){
                summarise(!!paste("sms", sms_type, day_segment, feature_name, sep = "_") := n())  %>% 
                replace(is.na(.), 0)
            features <- merge(features, feature, by="local_date", all = TRUE)
-            # # Get the number of messages for the most frequent contact throughout the study
-            # feature <- sms %>% group_by(trace) %>% 
-            #     mutate(N=n()) %>% 
-            #     ungroup() %>%
-            #     filter(N == max(N)) %>% 
-            #     head(1) %>% # if there are multiple contacts with the same amount of messages pick the first one only
-            #     group_by(local_date) %>% 
-            #     summarise(!!paste("sms", sms_type, day_segment, feature_name, sep = "_") := N)  %>% 
-            #     replace(is.na(.), 0)
-
-            # features <- merge(features, feature, by="local_date", all = TRUE)
        } else {
            feature <- sms %>% 
                group_by(local_date)
@ -69,6 +56,6 @@ base_sms_features <- function(sms, sms_type, day_segment, requested_features){
            features <- merge(features, feature, by="local_date", all = TRUE)
        }
    }
-    features <- features %>% mutate_at(vars(contains("countmostfrequentcontact")), list( ~ replace_na(., 0)))
+
    return(features)
 }
--- a/src/features/messages_features.R
+++ b/src/features/messages_features.R
@ -2,13 +2,13 @@
 # swap base_sms_features(...) for your own function

 source("renv/activate.R")
-source("src/features/sms/sms_base.R")
+source("src/features/messages/messages_base.R")
 library(dplyr, warn.conflicts = FALSE)

 sms <-  read.csv(snakemake@input[[1]])
 day_segment <- snakemake@params[["day_segment"]]
 requested_features <-  snakemake@params[["features"]]
-sms_type <-  snakemake@params[["sms_type"]]
+sms_type <-  snakemake@params[["messages_type"]]
 features  <-  data.frame(local_date = character(), stringsAsFactors = FALSE)

 # Compute base SMS features