Update file names

2020-10-19 15:07:12 -04:00 · 2020-10-19 15:07:12 -04:00 · 24bf62a7ab
parent d32771fd9e
commit 24bf62a7ab
68 changed files with 495 additions and 669 deletions
--- a/220
+++ b/220
@ -13,17 +13,11 @@ if len(config["PIDS"]) == 0:
    raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")

 if config["PHONE_VALID_SENSED_BINS"]["COMPUTE"] or config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]: # valid sensed bins is necessary for sensed days, so we add these files anyways if sensed days are requested
-    if len(config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]) == 0:
-            raise ValueError("If you want to compute PHONE_VALID_SENSED_BINS or PHONE_VALID_SENSED_DAYS, you need to add at least one table to [PHONE_VALID_SENSED_BINS][DB_TABLES] in config.yaml")
+    if len(config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]) == 0:
+            raise ValueError("If you want to compute PHONE_VALID_SENSED_BINS or PHONE_VALID_SENSED_DAYS, you need to add at least one PHONE_SENSOR to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml")

-    pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
-    pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
-    tables_android = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
-    tables_ios = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
-
-    for pids,table in zip([pids_android, pids_ios], [tables_android, tables_ios]):
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"])))
+    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"])))
    files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
    files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_timestamps.csv", pid=config["PIDS"]))

@ -33,106 +27,100 @@ if config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]:
                                min_valid_hours_per_day=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"],
                                min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))

-for provider in config["MESSAGES"]["PROVIDERS"].keys():
-    if config["MESSAGES"]["PROVIDERS"][provider]["COMPUTE"]:
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["MESSAGES"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["MESSAGES"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="MESSAGES".lower()))
-        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="MESSAGES".lower()))
+for provider in config["PHONE_MESSAGES"]["PROVIDERS"].keys():
+    if config["PHONE_MESSAGES"]["PROVIDERS"][provider]["COMPUTE"]:
+        files_to_compute.extend(expand("data/raw/{pid}/phone_messages_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_messages_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_messages_features/phone_messages_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_MESSAGES"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_messages.csv", pid=config["PIDS"]))

-for provider in config["CALLS"]["PROVIDERS"].keys():
-    if config["CALLS"]["PROVIDERS"][provider]["COMPUTE"]:
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["CALLS"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["CALLS"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="CALLS".lower()))
-        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="CALLS".lower()))
+for provider in config["PHONE_CALLS"]["PROVIDERS"].keys():
+    if config["PHONE_CALLS"]["PROVIDERS"][provider]["COMPUTE"]:
+        files_to_compute.extend(expand("data/raw/{pid}/phone_calls_raw.csv", pid=config["PIDS"], sensor=config["PHONE_CALLS"]["DB_TABLE"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime.csv", pid=config["PIDS"], sensor=config["PHONE_CALLS"]["DB_TABLE"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_calls_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["PHONE_CALLS"]["DB_TABLE"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_calls_features/phone_calls_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CALLS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_calls.csv", pid=config["PIDS"]))

-for provider in config["BLUETOOTH"]["PROVIDERS"].keys():
-    if config["BLUETOOTH"]["PROVIDERS"][provider]["COMPUTE"]:
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BLUETOOTH"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["BLUETOOTH"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["BLUETOOTH"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="BLUETOOTH".lower()))
-        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="BLUETOOTH".lower()))
+for provider in config["PHONE_BLUETOOTH"]["PROVIDERS"].keys():
+    if config["PHONE_BLUETOOTH"]["PROVIDERS"][provider]["COMPUTE"]:
+        files_to_compute.extend(expand("data/raw/{pid}/phone_bluetooth_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_bluetooth_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_bluetooth_features/phone_bluetooth_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_BLUETOOTH"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_bluetooth.csv", pid=config["PIDS"]))

-for provider in config["ACTIVITY_RECOGNITION"]["PROVIDERS"].keys():
-    if config["ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]:
-        pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
-        pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
-        
-        for pids,table in zip([pids_android, pids_ios], [config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]):
-            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
-            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
-            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table))
-        
-        files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes.csv", pid=config["PIDS"]))
-        files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes_resampled.csv", pid=config["PIDS"]))
-        files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="ACTIVITY_RECOGNITION".lower()))
-        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="ACTIVITY_RECOGNITION".lower()))
+for provider in config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"].keys():
+    if config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]:
+        files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_activity_recognition_features/phone_activity_recognition_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_activity_recognition.csv", pid=config["PIDS"]))


-for provider in config["BATTERY"]["PROVIDERS"].keys():
-    if config["BATTERY"]["PROVIDERS"][provider]["COMPUTE"]:
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["BATTERY"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/interim/{pid}/battery_episodes.csv", pid=config["PIDS"]))
-        files_to_compute.extend(expand("data/interim/{pid}/battery_episodes_resampled.csv", pid=config["PIDS"]))
-        files_to_compute.extend(expand("data/interim/{pid}/battery_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["SCREEN"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="BATTERY".lower()))
-        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="BATTERY".lower()))
+for provider in config["PHONE_BATTERY"]["PROVIDERS"].keys():
+    if config["PHONE_BATTERY"]["PROVIDERS"][provider]["COMPUTE"]:
+        files_to_compute.extend(expand("data/raw/{pid}/phone_battery_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_battery_episodes.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_battery_episodes_resampled.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_battery_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_battery_features/phone_battery_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_BATTERY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_battery.csv", pid=config["PIDS"]))


-for provider in config["SCREEN"]["PROVIDERS"].keys():
-    if config["SCREEN"]["PROVIDERS"][provider]["COMPUTE"]:
-        if config["SCREEN"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]:
+for provider in config["PHONE_SCREEN"]["PROVIDERS"].keys():
+    if config["PHONE_SCREEN"]["PROVIDERS"][provider]["COMPUTE"]:
+        if "PHONE_SCREEN" in config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]:
            files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
        else:
-            raise ValueError("Error: Add your screen table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][DB_TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=config["PIDS"], sensor=config["SCREEN"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/interim/{pid}/screen_episodes.csv", pid=config["PIDS"]))
-        files_to_compute.extend(expand("data/interim/{pid}/screen_episodes_resampled.csv", pid=config["PIDS"]))
-        files_to_compute.extend(expand("data/interim/{pid}/screen_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["SCREEN"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="SCREEN".lower()))
-        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="SCREEN".lower()))
+            raise ValueError("Error: Add PHONE_SCREEN (and as many phone sensor as you have in your database) to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
+        files_to_compute.extend(expand("data/raw/{pid}/phone_screen_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime_unified.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_screen_episodes.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_screen_episodes_resampled.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_screen_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_screen_features/phone_screen_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_SCREEN"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_screen.csv", pid=config["PIDS"]))

-for provider in config["LIGHT"]["PROVIDERS"].keys():
-    if config["LIGHT"]["PROVIDERS"][provider]["COMPUTE"]:
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["LIGHT"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="LIGHT".lower()))
-        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="LIGHT".lower()))
+for provider in config["PHONE_LIGHT"]["PROVIDERS"].keys():
+    if config["PHONE_LIGHT"]["PROVIDERS"][provider]["COMPUTE"]:
+        files_to_compute.extend(expand("data/raw/{pid}/phone_light_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_light_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_light_features/phone_light_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LIGHT"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_light.csv", pid=config["PIDS"],))

-for provider in config["ACCELEROMETER"]["PROVIDERS"].keys():
-    if config["ACCELEROMETER"]["PROVIDERS"][provider]["COMPUTE"]:
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="ACCELEROMETER".lower()))
-        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="ACCELEROMETER".lower()))
+for provider in config["PHONE_ACCELEROMETER"]["PROVIDERS"].keys():
+    if config["PHONE_ACCELEROMETER"]["PROVIDERS"][provider]["COMPUTE"]:
+        files_to_compute.extend(expand("data/raw/{pid}/phone_accelerometer_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_accelerometer_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_accelerometer_features/phone_accelerometer_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_accelerometer.csv", pid=config["PIDS"]))

-for provider in config["APPLICATIONS_FOREGROUND"]["PROVIDERS"].keys():
-    if config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["COMPUTE"]:
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_with_genre.csv", pid=config["PIDS"], sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="APPLICATIONS_FOREGROUND".lower()))
-        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="APPLICATIONS_FOREGROUND".lower()))
+for provider in config["PHONE_APPLICATIONS_FOREGROUND"]["PROVIDERS"].keys():
+    if config["PHONE_APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["COMPUTE"]:
+        files_to_compute.extend(expand("data/raw/{pid}/phone_applications_foreground_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_applications_foreground_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_applications_foreground_with_datetime_with_categories.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_applications_foreground_features/phone_applications_foreground_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_APPLICATIONS_FOREGROUND"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_applications_foreground.csv", pid=config["PIDS"]))

-for provider in config["WIFI"]["PROVIDERS"].keys():
-    if config["WIFI"]["PROVIDERS"][provider]["COMPUTE"]:
-        if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0:
-            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]))
-            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]))
-            files_to_compute.extend(expand("data/raw/{pid}/{sensor_key}_with_datetime_visibleandconnected.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
-            files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["WIFI"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="WIFI".lower()))
-            files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
-        if len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
-            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]))
-            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]))
-            files_to_compute.extend(expand("data/raw/{pid}/{sensor_key}_with_datetime_visibleandconnected.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
-            files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["WIFI"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="WIFI".lower()))
-            files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="WIFI".lower()))
+for provider in config["PHONE_WIFI_VISIBLE"]["PROVIDERS"].keys():
+    if config["PHONE_WIFI_VISIBLE"]["PROVIDERS"][provider]["COMPUTE"]:
+        files_to_compute.extend(expand("data/raw/{pid}/phone_wifi_visible_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_wifi_visible_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_wifi_visible_features/phone_wifi_visible_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_WIFI_VISIBLE"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_wifi_visible.csv", pid=config["PIDS"]))
+
+for provider in config["PHONE_WIFI_CONNECTED"]["PROVIDERS"].keys():
+    if config["PHONE_WIFI_CONNECTED"]["PROVIDERS"][provider]["COMPUTE"]:
+        files_to_compute.extend(expand("data/raw/{pid}/phone_wifi_connected_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_wifi_connected_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_wifi_connected_features/phone_wifi_connected_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_WIFI_CONNECTED"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_wifi_connected.csv", pid=config["PIDS"]))

 if config["HEARTRATE"]["COMPUTE"]:
    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["HEARTRATE"]["DB_TABLE"]))
@ -151,31 +139,27 @@ if config["SLEEP"]["COMPUTE"]:
    files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"]))
    files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"]))

-for provider in config["CONVERSATION"]["PROVIDERS"].keys():    
-    if config["CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]:
-        pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
-        pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
-        for pids,table in zip([pids_android, pids_ios], [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"]]):
-            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
-            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
-            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="CONVERSATION".lower()))
-        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="CONVERSATION".lower()))
+for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():    
+    if config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]:
+        files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/phone_conversation_with_datetime_unified.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_conversation_features/phone_conversation_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_conversation.csv", pid=config["PIDS"]))

-for provider in config["LOCATIONS"]["PROVIDERS"].keys():
-    if config["LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
-        if config["LOCATIONS"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED":
-            if config["LOCATIONS"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]:
+for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
+    if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
+        if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "RESAMPLE_FUSED":
+            if config["PHONE_LOCATIONS"]["DB_TABLE"] in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"]:
                files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
            else:
                raise ValueError("Error: Add your locations table (and as many sensor tables as you have) to [PHONE_VALID_SENSED_BINS][DB_TABLES] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
-            
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["LOCATIONS"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["LOCATIONS"]["DB_TABLE"]))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor}_processed_{locations_to_use}.csv", pid=config["PIDS"], sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor}_processed_{locations_to_use}_with_datetime.csv", pid=config["PIDS"], sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]))
-        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="LOCATIONS".lower()))
-        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="LOCATIONS".lower()))
+
+        files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"]))

 # visualization for data exploration
 if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
--- a/config.yaml
+++ b/config.yaml
@ -32,9 +32,12 @@ READABLE_DATETIME:
 PHONE_VALID_SENSED_BINS:
  COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
  BIN_SIZE: &bin_size 5 # (in minutes)
-  # Add as many sensor tables as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS. 
-  # If you are extracting screen or Barnett's location features, screen and locations tables are mandatory.
-  DB_TABLES: []
+  # Add as many PHONE sensors as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS. 
+  # If you are extracting screen or Barnett/Doryab location features, PHONE_SCREEN and PHONE_LOCATIONS tables are mandatory.
+  # You can choose any of the keys shown below, just make sure its DB_TABLE exists in your database!
+  # PHONE_MESSAGES, PHONE_CALLS, PHONE_LOCATIONS, PHONE_BLUETOOTH, PHONE_ACTIVITY_RECOGNITION, PHONE_BATTERY, PHONE_SCREEN, PHONE_LIGHT,
+  # PHONE_ACCELEROMETER, PHONE_APPLICATIONS_FOREGROUND, PHONE_WIFI_VISIBLE, PHONE_WIFI_CONNECTED, PHONE_CONVERSATION
+  PHONE_SENSORS: []

 PHONE_VALID_SENSED_DAYS:
  COMPUTE: False
@ -42,7 +45,7 @@ PHONE_VALID_SENSED_DAYS:
  MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour [6] # (out of 60min/BIN_SIZE bins)

 # Communication SMS features config, TYPES and FEATURES keys need to match
-MESSAGES:
+PHONE_MESSAGES:
  DB_TABLE: messages
  PROVIDERS:
    RAPIDS:
@ -52,10 +55,10 @@ MESSAGES:
        received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
        sent: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
      SRC_LANGUAGE: "r"
-      SRC_FOLDER: "rapids" # inside src/features/messages
+      SRC_FOLDER: "rapids" # inside src/features/phone_messages

 # Communication call features config, TYPES and FEATURES keys need to match
-CALLS:
+PHONE_CALLS:
  DB_TABLE: calls
  PROVIDERS:
    RAPIDS:
@ -66,20 +69,13 @@ CALLS:
        incoming: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
        outgoing: [count, distinctcontacts, meanduration, sumduration, minduration, maxduration, stdduration, modeduration, entropyduration, timefirstcall, timelastcall, countmostfrequentcontact]
      SRC_LANGUAGE: "r"
-      SRC_FOLDER: "rapids" # inside src/features/calls
+      SRC_FOLDER: "rapids" # inside src/features/phone_calls

-APPLICATION_GENRES:
-  CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
-  CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
-  UPDATE_CATALOGUE_FILE: false # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
-  SCRAPE_MISSING_GENRES: false # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
-
-LOCATIONS:
+PHONE_LOCATIONS:
  DB_TABLE: locations
  LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS OR FUSED_RESAMPLED  
  FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
  FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
-  TIMEZONE: *timezone
  PROVIDERS:
    DORYAB:
      COMPUTE: False
@ -90,7 +86,7 @@ LOCATIONS:
      MAXIMUM_GAP_ALLOWED: 300
      MINUTES_DATA_USED: False
      SAMPLING_FREQUENCY: 0
-      SRC_FOLDER: "doryab" # inside src/features/locations
+      SRC_FOLDER: "doryab" # inside src/features/phone_locations
      SRC_LANGUAGE: "python"

    BARNETT:
@ -99,20 +95,20 @@ LOCATIONS:
      ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
      TIMEZONE: *timezone
      MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
-      SRC_FOLDER: "barnett" # inside src/features/locations
+      SRC_FOLDER: "barnett" # inside src/features/phone_locations
      SRC_LANGUAGE: "r"

-BLUETOOTH:
+PHONE_BLUETOOTH:
  DB_TABLE: bluetooth
  PROVIDERS:
    RAPIDS:
      COMPUTE: False
      FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
-      SRC_FOLDER: "rapids" # inside src/features/bluetooth
+      SRC_FOLDER: "rapids" # inside src/features/phone_bluetooth
      SRC_LANGUAGE: "r"


-ACTIVITY_RECOGNITION:
+PHONE_ACTIVITY_RECOGNITION:
  DB_TABLE: 
    ANDROID: plugin_google_activity_recognition
    IOS: plugin_ios_activity_recognition
@ -124,19 +120,19 @@ ACTIVITY_RECOGNITION:
        STATIONARY: ["still", "tilting"]
        MOBILE: ["on_foot", "walking", "running", "on_bicycle"]
        VEHICLE: ["in_vehicle"]
-      SRC_FOLDER: "rapids" # inside src/features/activity_recognition
+      SRC_FOLDER: "rapids" # inside src/features/phone_activity_recognition
      SRC_LANGUAGE: "python"

-BATTERY:
+PHONE_BATTERY:
  DB_TABLE: battery
  PROVIDERS:
    RAPIDS:
      COMPUTE: False
      FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
-      SRC_FOLDER: "rapids" # inside src/features/battery
+      SRC_FOLDER: "rapids" # inside src/features/phone_battery
      SRC_LANGUAGE: "python"

-SCREEN:
+PHONE_SCREEN:
  DB_TABLE: screen
  PROVIDERS:
    RAPIDS:
@ -146,25 +142,25 @@ SCREEN:
      IGNORE_EPISODES_LONGER_THAN: 0 # in minutes, set to 0 to disable
      FEATURES: ["countepisode", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"] # "episodepersensedminutes" needs to be added later
      EPISODE_TYPES: ["unlock"]
-      SRC_FOLDER: "rapids" # inside src/features/screen
+      SRC_FOLDER: "rapids" # inside src/features/phone_screen
      SRC_LANGUAGE: "python"

-LIGHT:
+PHONE_LIGHT:
  DB_TABLE: light
  PROVIDERS:
    RAPIDS:
      COMPUTE: False
      FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
-      SRC_FOLDER: "rapids" # inside src/features/light
+      SRC_FOLDER: "rapids" # inside src/features/phone_light
      SRC_LANGUAGE: "python"

-ACCELEROMETER:
+PHONE_ACCELEROMETER:
  DB_TABLE: accelerometer
  PROVIDERS:
    RAPIDS:
      COMPUTE: False
      FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
-      SRC_FOLDER: "rapids" # inside src/features/accelerometer
+      SRC_FOLDER: "rapids" # inside src/features/phone_accelerometer
      SRC_LANGUAGE: "python"
    
    PANDA:
@ -173,11 +169,16 @@ ACCELEROMETER:
      FEATURES:
        exertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
        nonexertional_activity_episode: ["sumduration", "maxduration", "minduration", "avgduration", "medianduration", "stdduration"]
-      SRC_FOLDER: "panda" # inside src/features/accelerometer
+      SRC_FOLDER: "panda" # inside src/features/phone_accelerometer
      SRC_LANGUAGE: "python"

-APPLICATIONS_FOREGROUND:
+PHONE_APPLICATIONS_FOREGROUND:
  DB_TABLE: applications_foreground
+  APPLICATION_CATEGORIES:
+    CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
+    CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
+    UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
+    SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
  PROVIDERS:
    RAPIDS:
      COMPUTE: False
@ -189,9 +190,45 @@ APPLICATIONS_FOREGROUND:
      EXCLUDED_CATEGORIES: []
      EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
      FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
-      SRC_FOLDER: "rapids" # inside src/features/applications_foreground
+      SRC_FOLDER: "rapids" # inside src/features/phone_applications_foreground
      SRC_LANGUAGE: "python"

+PHONE_WIFI_VISIBLE:
+  DB_TABLE: "wifi"
+  PROVIDERS:
+    RAPIDS:
+      COMPUTE: False
+      FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
+      SRC_FOLDER: "rapids" # inside src/features/phone_wifi_visible
+      SRC_LANGUAGE: "r"
+
+PHONE_WIFI_CONNECTED:
+  DB_TABLE: "sensor_wifi"
+  PROVIDERS:
+    RAPIDS:
+      COMPUTE: False
+      FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
+      SRC_FOLDER: "rapids" # inside src/features/phone_wifi_connected
+      SRC_LANGUAGE: "r"
+
+PHONE_CONVERSATION:
+  DB_TABLE: 
+    ANDROID: plugin_studentlife_audio_android
+    IOS: plugin_studentlife_audio
+  PROVIDERS:
+    RAPIDS:
+      COMPUTE: False
+      FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
+        "sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
+        "avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
+        "voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
+        "unknownexpectedfraction","countconversation"]
+      RECORDING_MINUTES: 1
+      PAUSED_MINUTES : 3
+      SRC_FOLDER: "rapids" # inside src/features/phone_conversation
+      SRC_LANGUAGE: "python"
+
+
 HEARTRATE:
  COMPUTE: False
  DB_TABLE: fitbit_data
@ -223,34 +260,6 @@ SLEEP:
  SLEEP_TYPES: ["main", "nap", "all"]
  SUMMARY_FEATURES: ["sumdurationafterwakeup", "sumdurationasleep", "sumdurationawake", "sumdurationtofallasleep", "sumdurationinbed", "avgefficiency", "countepisode"]

-WIFI:
-  DB_TABLE: 
-    VISIBLE_ACCESS_POINTS: "wifi" # if you only have a CONNECTED_ACCESS_POINTS table, set this value to ""
-    CONNECTED_ACCESS_POINTS: "sensor_wifi" # if you only have a VISIBLE_ACCESS_POINTS table, set this value to ""
-  PROVIDERS:
-    RAPIDS:
-      COMPUTE: False
-      FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
-      SRC_FOLDER: "rapids" # inside src/features/bluetooth
-      SRC_LANGUAGE: "r"
-
-CONVERSATION:
-  DB_TABLE: 
-    ANDROID: plugin_studentlife_audio_android
-    IOS: plugin_studentlife_audio
-  PROVIDERS:
-    RAPIDS:
-      COMPUTE: False
-      FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
-        "sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
-        "avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
-        "voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
-        "unknownexpectedfraction","countconversation"]
-      RECORDING_MINUTES: 1
-      PAUSED_MINUTES : 3
-      SRC_FOLDER: "rapids" # inside src/features/conversation
-      SRC_LANGUAGE: "python"
-
 ### Visualizations ################################################################
 HEATMAP_FEATURES_CORRELATIONS:
  PLOT: False
--- a/rules/common.smk
+++ b/rules/common.smk
@ -14,69 +14,20 @@ def infer_participant_platform(participant_file):

    return platform

-# Preprocessing.smk ####################################################################################################
-
-def optional_phone_sensed_bins_input(wildcards):
-    platform = infer_participant_platform("data/external/"+wildcards.pid)
-    
-    if platform == "android":
-        tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
-    elif platform == "ios":
-        tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
-
-    return expand("data/raw/{{pid}}/{table}_with_datetime.csv", table = tables_platform)
-
-def optional_phone_sensed_timestamps_input(wildcards):
-    platform = infer_participant_platform("data/external/"+wildcards.pid)
-    
-    if platform == "android":
-        tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]] # for android, discard any ios tables that may exist
-    elif platform == "ios":
-        tables_platform = [table for table in config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"] if table not in [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"]]] # for ios, discard any android tables that may exist
-
-    return expand("data/raw/{{pid}}/{table}_raw.csv", table = tables_platform)
-
 # Features.smk #########################################################################################################
 def find_features_files(wildcards):
    feature_files = []
    for provider_key, provider in config[(wildcards.sensor_key).upper()]["PROVIDERS"].items():
        if provider["COMPUTE"]:
-            feature_files.extend(expand("data/interim/{{pid}}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", sensor_key=(wildcards.sensor_key).lower(), language=provider["SRC_LANGUAGE"].lower(), provider_key=provider_key))
+            feature_files.extend(expand("data/interim/{{pid}}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", sensor_key=wildcards.sensor_key.lower(), language=provider["SRC_LANGUAGE"].lower(), provider_key=provider_key.lower()))
    return(feature_files)

-def optional_ar_input(wildcards):
-    platform = infer_participant_platform("data/external/"+wildcards.pid)
-    
-    if platform == "android": 
-        return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
-    elif platform == "ios":
-        return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
-
-def optional_conversation_input(wildcards):
-    platform = infer_participant_platform("data/external/"+wildcards.pid)
-
-    if platform == "android":
-        return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CONVERSATION"]["DB_TABLE"]["ANDROID"])[0]
-    elif platform == "ios":
-        return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CONVERSATION"]["DB_TABLE"]["IOS"])[0]
-
 def optional_steps_sleep_input(wildcards):
    if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED":
        return  "data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv"
    else:
        return []

-def optional_wifi_input(wildcards):
-    if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) == 0:
-        return {"visible_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"])}
-    elif len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) == 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
-        return {"connected_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])}
-    elif len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0 and len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
-        return {"visible_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]), "connected_access_points": expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])}
-    else:
-        raise ValueError("If you are computing WIFI features you need to provide either VISIBLE_ACCESS_POINTS, CONNECTED_ACCESS_POINTS or both")
-
-
 # Models.smk ###########################################################################################################

 def input_merge_features_of_single_participant(wildcards):
--- a/rules/features.smk
+++ b/rules/features.smk
@ -28,341 +28,211 @@ rule resample_episodes_with_datetime:
    script:
        "../src/data/readable_datetime.R"

-rule accelerometer_r_features:
+rule phone_accelerometer_python_features:
    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["ACCELEROMETER"]["DB_TABLE"])[0],
+        sensor_data = "data/raw/{pid}/phone_accelerometer_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "accelerometer"
+        sensor_key = "phone_accelerometer"
    output:
-        "data/interim/{pid}/accelerometer_features/accelerometer_r_{provider_key}.csv"
-    script:
-        "../src/features/entry.R"
-
-rule accelerometer_python_features:
-    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["ACCELEROMETER"]["DB_TABLE"])[0],
-        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
-    params:
-        provider = lambda wildcards: config["ACCELEROMETER"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}",
-        sensor_key = "accelerometer"
-    output:
-        "data/interim/{pid}/accelerometer_features/accelerometer_python_{provider_key}.csv"
+        "data/interim/{pid}/phone_accelerometer_features/phone_accelerometer_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"

 rule activity_recognition_episodes:
    input:
-        optional_ar_input
+        sensor_data = "data/raw/{pid}/phone_activity_recognition_with_datetime_unified.csv"
    output:
-        "data/interim/{pid}/activity_recognition_episodes.csv"
+        "data/interim/{pid}/phone_activity_recognition_episodes.csv"
    script:
-        "../src/features/activity_recognition/episodes/activity_recognition_episodes.R"
+        "../src/features/phone_activity_recognition/episodes/activity_recognition_episodes.R"

-rule activity_recognition_r_features:
+rule phone_activity_recognition_python_features:
    input:
-        sensor_episodes = "data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv",
+        sensor_episodes = "data/interim/{pid}/phone_activity_recognition_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "activity_recognition"
+        sensor_key = "phone_activity_recognition"
    output:
-        "data/interim/{pid}/activity_recognition_features/activity_recognition_r_{provider_key}.csv"
-    script:
-        "../src/features/entry.R"
-
-rule activity_recognition_python_features:
-    input:
-        sensor_episodes = "data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv",
-        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
-    params:
-        provider = lambda wildcards: config["ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}",
-        sensor_key = "activity_recognition"
-    output:
-        "data/interim/{pid}/activity_recognition_features/activity_recognition_python_{provider_key}.csv"
+        "data/interim/{pid}/phone_activity_recognition_features/phone_activity_recognition_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"

-rule applications_foreground_r_features:
+rule phone_applications_foreground_python_features:
    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"])[0],
+        sensor_data = "data/raw/{pid}/phone_applications_foreground_with_datetime_with_categories.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "applications_foreground"
+        sensor_key = "phone_applications_foreground"
    output:
-        "data/interim/{pid}/applications_foreground_features/applications_foreground_r_{provider_key}.csv"
-    script:
-        "../src/features/entry.R"
-
-rule applications_foreground_python_features:
-    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"])[0],
-        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
-    params:
-        provider = lambda wildcards: config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}",
-        sensor_key = "applications_foreground"
-    output:
-        "data/interim/{pid}/applications_foreground_features/applications_foreground_python_{provider_key}.csv"
+        "data/interim/{pid}/phone_applications_foreground_features/phone_applications_foreground_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"

 rule battery_episodes:
    input:
-        expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor=config["BATTERY"]["DB_TABLE"])
+        "data/raw/{pid}/phone_battery_raw.csv"
    output:
-        "data/interim/{pid}/battery_episodes.csv"
+        "data/interim/{pid}/phone_battery_episodes.csv"
    script:
-        "../src/features/battery/episodes/battery_episodes.R"
+        "../src/features/phone_battery/episodes/battery_episodes.R"

-rule battery_r_features:
+rule phone_battery_python_features:
    input:
-        sensor_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
+        sensor_episodes = "data/interim/{pid}/phone_battery_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_BATTERY"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "battery"
+        sensor_key = "phone_battery"
    output:
-        "data/interim/{pid}/battery_features/battery_r_{provider_key}.csv"
-    script:
-        "../src/features/entry.R"
-
-rule battery_python_features:
-    input:
-        sensor_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
-        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
-    params:
-        provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}",
-        sensor_key = "battery"
-    output:
-        "data/interim/{pid}/battery_features/battery_python_{provider_key}.csv"
+        "data/interim/{pid}/phone_battery_features/phone_battery_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"

-rule bluetooth_r_features:
+rule phone_bluetooth_r_features:
    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"])[0],
+        sensor_data = "data/raw/{pid}/phone_bluetooth_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_BLUETOOTH"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "bluetooth"
+        sensor_key = "phone_bluetooth"
    output:
-        "data/interim/{pid}/bluetooth_features/bluetooth_r_{provider_key}.csv"
+        "data/interim/{pid}/phone_bluetooth_features/phone_bluetooth_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"

-rule bluetooth_python_features:
-    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"])[0],
-        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
-    params:
-        provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}",
-        sensor_key = "bluetooth"
-    output:
-        "data/interim/{pid}/bluetooth_features/bluetooth_python_{provider_key}.csv"
-    script:
-        "../src/features/entry.py"
-
 rule calls_r_features:
    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"])[0],
+        sensor_data = "data/raw/{pid}/phone_calls_with_datetime_unified.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_CALLS"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "calls"
+        sensor_key = "phone_calls"
    output:
-        "data/interim/{pid}/calls_features/calls_r_{provider_key}.csv"
-    script:
-        "../src/features/entry.R"
-
-rule calls_python_features:
-    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"])[0],
-        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
-    params:
-        provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}",
-        sensor_key = "calls"
-    output:
-        "data/interim/{pid}/calls_features/calls_python_{provider_key}.csv"
-    script:
-        "../src/features/entry.py"
-
-rule conversation_r_features:
-    input:
-        sensor_data = optional_conversation_input,
-        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
-    params:
-        provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}",
-        sensor_key = "conversation"
-    output:
-        "data/interim/{pid}/conversation_features/conversation_r_{provider_key}.csv"
+        "data/interim/{pid}/phone_calls_features/phone_calls_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"

 rule conversation_python_features:
    input:
-        sensor_data = optional_conversation_input,
+        sensor_data = "data/raw/{pid}/phone_conversation_with_datetime_unified.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_CONVERSATION"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "conversation"
+        sensor_key = "phone_conversation"
    output:
-        "data/interim/{pid}/conversation_features/conversation_python_{provider_key}.csv"
+        "data/interim/{pid}/phone_conversation_features/phone_conversation_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"

-rule light_r_features:
+rule phone_light_python_features:
    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"])[0],
+        sensor_data = "data/raw/{pid}/phone_light_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_LIGHT"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "light"
+        sensor_key = "phone_light"
    output:
-        "data/interim/{pid}/light_features/light_r_{provider_key}.csv"
+        "data/interim/{pid}/phone_light_features/phone_light_python_{provider_key}.csv"
+    script:
+        "../src/features/entry.py"
+
+rule phone_locations_r_features:
+    input:
+        sensor_data = "data/interim/{pid}/phone_locations_processed_with_datetime.csv",
+        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
+    params:
+        provider = lambda wildcards: config["PHONE_LOCATIONS"]["PROVIDERS"][wildcards.provider_key.upper()],
+        provider_key = "{provider_key}",
+        sensor_key = "phone_locations"
+    output:
+        "data/interim/{pid}/phone_locations_features/phone_locations_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"

-rule light_python_features:
+rule phone_locations_python_features:
    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"])[0],
+        sensor_data = "data/interim/{pid}/phone_locations_processed_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_LOCATIONS"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "light"
+        sensor_key = "phone_locations"
    output:
-        "data/interim/{pid}/light_features/light_python_{provider_key}.csv"
+        "data/interim/{pid}/phone_locations_features/phone_locations_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"

-rule locations_r_features:
+rule phone_messages_r_features:
    input:
-        sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])[0],
+        sensor_data = "data/raw/{pid}/phone_messages_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_MESSAGES"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "locations"
+        sensor_key = "phone_messages"
    output:
-        "data/interim/{pid}/locations_features/locations_r_{provider_key}.csv"
+        "data/interim/{pid}/phone_messages_features/phone_messages_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"

-rule locations_python_features:
-    input:
-        sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])[0],
-        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
-    params:
-        provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}",
-        sensor_key = "locations"
-    output:
-        "data/interim/{pid}/locations_features/locations_python_{provider_key}.csv"
-    script:
-        "../src/features/entry.py"
-
-rule messages_r_features:
-    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"])[0],
-        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
-    params:
-        provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}",
-        sensor_key = "messages"
-    output:
-        "data/interim/{pid}/messages_features/messages_r_{provider_key}.csv"
-    script:
-        "../src/features/entry.R"
-
-rule messages_python_features:
-    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"])[0],
-        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
-    params:
-        provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}",
-        sensor_key = "messages"
-    output:
-        "data/interim/{pid}/messages_features/messages_python_{provider_key}.csv"
-    script:
-        "../src/features/entry.py"
-
 rule screen_episodes:
    input:
-        screen = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["SCREEN"]["DB_TABLE"])
+        screen = "data/raw/{pid}/phone_screen_with_datetime_unified.csv"
    output:
-        "data/interim/{pid}/screen_episodes.csv"
+        "data/interim/{pid}/phone_screen_episodes.csv"
    script:
-        "../src/features/screen/episodes/screen_episodes.R"
+        "../src/features/phone_screen/episodes/screen_episodes.R"

-rule screen_r_features:
+rule phone_screen_python_features:
    input:
-        sensor_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
+        sensor_episodes = "data/interim/{pid}/phone_screen_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_SCREEN"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "screen"
+        sensor_key = "phone_screen"
    output:
-        "data/interim/{pid}/screen_features/screen_r_{provider_key}.csv"
-    script:
-        "../src/features/entry.R"
-
-rule screen_python_features:
-    input:
-        sensor_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
-        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
-    params:
-        provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}",
-        sensor_key = "screen"
-    output:
-        "data/interim/{pid}/screen_features/screen_python_{provider_key}.csv"
+        "data/interim/{pid}/phone_screen_features/phone_screen_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"

-rule wifi_r_features:
+rule phone_wifi_connected_r_features:
    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower())[0],
+        sensor_data = "data/raw/{pid}/phone_wifi_connected_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["WIFI"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_WIFI_CONNECTED"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "wifi"
+        sensor_key = "phone_wifi_connected"
    output:
-        "data/interim/{pid}/wifi_features/wifi_r_{provider_key}.csv"
+        "data/interim/{pid}/phone_wifi_connected_features/phone_wifi_connected_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"

-rule wifi_python_features:
+rule phone_wifi_visible_r_features:
    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower())[0],
+        sensor_data = "data/raw/{pid}/phone_wifi_visible_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
-        provider = lambda wildcards: config["WIFI"]["PROVIDERS"][wildcards.provider_key],
+        provider = lambda wildcards: config["PHONE_WIFI_VISIBLE"]["PROVIDERS"][wildcards.provider_key.upper()],
        provider_key = "{provider_key}",
-        sensor_key = "wifi"
+        sensor_key = "phone_wifi_visible"
    output:
-        "data/interim/{pid}/wifi_features/wifi_python_{provider_key}.csv"
+        "data/interim/{pid}/phone_wifi_visible_features/phone_wifi_visible_r_{provider_key}.csv"
    script:
-        "../src/features/entry.py"
+        "../src/features/entry.R"

 rule fitbit_heartrate_features:
    input:
--- a/rules/preprocessing.smk
+++ b/rules/preprocessing.smk
@ -29,10 +29,10 @@ rule download_dataset:
        "data/external/{pid}"
    params:
        group = config["DOWNLOAD_DATASET"]["GROUP"],
-        table = "{sensor}",
+        sensor = "{sensor}",
+        table = lambda wildcards: config[str(wildcards.sensor).upper()]["DB_TABLE"],
        timezone = config["TIMEZONE"],
-        aware_multiplatform_tables = config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "," + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"] + "," + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "," + config["CONVERSATION"]["DB_TABLE"]["IOS"],
-        unifiable_sensors = {"calls": config["CALLS"]["DB_TABLE"], "battery": config["BATTERY"]["DB_TABLE"], "screen": config["SCREEN"]["DB_TABLE"], "ios_activity_recognition": config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"], "ios_conversation": config["CONVERSATION"]["DB_TABLE"]["IOS"]}
+        aware_multiplatform_tables = config["PHONE_ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "," + config["PHONE_ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"] + "," + config["PHONE_CONVERSATION"]["DB_TABLE"]["ANDROID"] + "," + config["PHONE_CONVERSATION"]["DB_TABLE"]["IOS"],
    output:
        "data/raw/{pid}/{sensor}_raw.csv"
    script:
@ -50,35 +50,23 @@ rule compute_day_segments:
    script:
        "../src/data/compute_day_segments.py"

-PHONE_SENSORS = []
-PHONE_SENSORS.extend([config["MESSAGES"]["DB_TABLE"], config["CALLS"]["DB_TABLE"], config["LOCATIONS"]["DB_TABLE"], config["BLUETOOTH"]["DB_TABLE"], config["BATTERY"]["DB_TABLE"], config["SCREEN"]["DB_TABLE"], config["LIGHT"]["DB_TABLE"], config["ACCELEROMETER"]["DB_TABLE"], config["APPLICATIONS_FOREGROUND"]["DB_TABLE"], config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]])
-PHONE_SENSORS.extend(config["PHONE_VALID_SENSED_BINS"]["DB_TABLES"])
-
-if len(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"]) > 0:
-    PHONE_SENSORS.append(config["WIFI"]["DB_TABLE"]["VISIBLE_ACCESS_POINTS"])
-if len(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"]) > 0:
-    PHONE_SENSORS.append(config["WIFI"]["DB_TABLE"]["CONNECTED_ACCESS_POINTS"])
-
-
-rule readable_datetime:
+rule phone_readable_datetime:
    input:
-        sensor_input = "data/raw/{pid}/{sensor}_raw.csv",
+        sensor_input = "data/raw/{pid}/phone_{sensor}_raw.csv",
        day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
    params:
        timezones = None,
        fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
        day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
        include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
-    wildcard_constraints:
-        sensor = '(' + '|'.join([re.escape(x) for x in PHONE_SENSORS]) + ')' # only process smartphone sensors, not fitbit
    output:
-        "data/raw/{pid}/{sensor}_with_datetime.csv"
+        "data/raw/{pid}/phone_{sensor}_with_datetime.csv"
    script:
        "../src/data/readable_datetime.R"

 rule phone_sensed_bins:
    input:
-        all_sensors =  optional_phone_sensed_bins_input
+        all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor = map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]))
    params:
        bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
    output:
@ -88,7 +76,7 @@ rule phone_sensed_bins:

 rule phone_sensed_timestamps:
    input:
-        all_sensors = optional_phone_sensed_timestamps_input
+        all_sensors = expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor = map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]))
    output:
        "data/interim/{pid}/phone_sensed_timestamps.csv"
    script:
@ -112,55 +100,50 @@ rule unify_ios_android:
        participant_info = "data/external/{pid}"
    params:
        sensor = "{sensor}",
-        unifiable_sensors = {"calls": config["CALLS"]["DB_TABLE"], "battery": config["BATTERY"]["DB_TABLE"], "screen": config["SCREEN"]["DB_TABLE"], "ios_activity_recognition": config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"], "ios_conversation": config["CONVERSATION"]["DB_TABLE"]["IOS"]}
    output:
        "data/raw/{pid}/{sensor}_with_datetime_unified.csv"
    script:
        "../src/data/unify_ios_android.R"

-rule process_location_types:
+rule process_phone_location_types:
    input:
-        locations = "data/raw/{pid}/{sensor}_raw.csv",
+        locations = "data/raw/{pid}/phone_locations_raw.csv",
        phone_sensed_timestamps = "data/interim/{pid}/phone_sensed_timestamps.csv",
    params:
-        consecutive_threshold = config["LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
-        time_since_valid_location = config["LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
-        locations_to_use = "{locations_to_use}"
-    wildcard_constraints:
-        locations_to_use = '(ALL|GPS|FUSED_RESAMPLED)'
+        consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
+        time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
+        locations_to_use = config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"]
    output:
-        "data/interim/{pid}/{sensor}_processed_{locations_to_use}.csv"
+        "data/interim/{pid}/phone_locations_processed.csv"
    script:
        "../src/data/process_location_types.R"

 rule readable_datetime_location_processed:
    input:
-        sensor_input = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]),
+        sensor_input = "data/interim/{pid}/phone_locations_processed.csv",
        day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
    params:
        timezones = None,
        fixed_timezone = config["READABLE_DATETIME"]["FIXED_TIMEZONE"],
        day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
        include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
-    wildcard_constraints:
-        locations_to_use = '(ALL|GPS|FUSED_RESAMPLED)'
    output:
-        expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])
+        "data/interim/{pid}/phone_locations_processed_with_datetime.csv"
    script:
        "../src/data/readable_datetime.R"

-rule application_genres:
+rule phone_application_categories:
    input:
-        "data/raw/{pid}/{sensor}_with_datetime.csv"
+        "data/raw/{pid}/phone_applications_foreground_with_datetime.csv"
    params:
-        catalogue_source = config["APPLICATION_GENRES"]["CATALOGUE_SOURCE"],
-        catalogue_file = config["APPLICATION_GENRES"]["CATALOGUE_FILE"],
-        update_catalogue_file = config["APPLICATION_GENRES"]["UPDATE_CATALOGUE_FILE"],
-        scrape_missing_genres = config["APPLICATION_GENRES"]["SCRAPE_MISSING_GENRES"]
+        catalogue_source = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["CATALOGUE_SOURCE"],
+        catalogue_file = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["CATALOGUE_FILE"],
+        update_catalogue_file = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["UPDATE_CATALOGUE_FILE"],
+        scrape_missing_genres = config["PHONE_APPLICATIONS_FOREGROUND"]["APPLICATION_CATEGORIES"]["SCRAPE_MISSING_CATEGORIES"]
    output:
-        "data/raw/{pid}/{sensor}_with_datetime_with_genre.csv"
+        "data/raw/{pid}/phone_applications_foreground_with_datetime_with_categories.csv"
    script:
-        "../src/data/application_genres.R"
+        "../src/data/application_categories.R"

 rule fitbit_heartrate_with_datetime:
    input:
@ -196,11 +179,3 @@ rule fitbit_sleep_with_datetime:
        intraday_data = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
    script:
        "../src/data/fitbit_readable_datetime.py"
-
-rule join_wifi_tables:
-    input: 
-        unpack(optional_wifi_input)
-    output:
-        "data/raw/{pid}/wifi_with_datetime_visibleandconnected.csv"
-    script:
-        "../src/data/join_visible_and_connected_wifi.R"
--- a/src/data/application_categories.R
+++ b/src/data/application_categories.R
--- a/src/data/assign_to_day_segment.R
+++ b/src/data/assign_to_day_segment.R
@ -2,166 +2,163 @@ library("tidyverse")
 library("lubridate")
 options(scipen=999)

-find_segments_frequency <- function(local_date, local_time, local_timezone, segments){
-  
-  assigned_segments <- segments[segments$segment_start<= local_time & segments$segment_end >= local_time, ]
-  assigned_segments["segment_start_ts"] = as.numeric(lubridate::as_datetime(stringi::stri_c(local_date,assigned_segments$segment_id_start_time), tz = local_timezone)) * 1000
-  assigned_segments["segment_end_ts"] = as.numeric(lubridate::as_datetime(stringi::stri_c(local_date,assigned_segments$segment_id_end_time), tz = local_timezone)) * 1000 + 999 
-
-  return(stringi::stri_c(stringi::stri_c("[", 
-                                assigned_segments[["label"]], "#",
-                                local_date, " ",
-                                assigned_segments[["segment_id_start_time"]], ",",
-                                local_date, " ",
-                                assigned_segments[["segment_id_end_time"]], ";",
-                                assigned_segments[["segment_start_ts"]], ",",
-                                assigned_segments[["segment_end_ts"]],
-                                "]"), collapse = "|"))
+day_type_delay <- function(day_type, include_past_periodic_segments){
+  delay <- day_segments %>% mutate(length_duration = duration(length)) %>%  filter(repeats_on == day_type) %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
+  return(if_else(is.na(delay) | include_past_periodic_segments == FALSE, duration("0days"), delay))
 }

-find_segments_periodic <- function(timestamp, segments){
-  # crossing and pivot_longer make segments a tibble, thus we need to extract [["segment_id"]]
-  return(stringi::stri_c(segments[[1]][segments[[1]]$segment_start_ts<= timestamp & segments[[1]]$segment_end_ts >= timestamp, "segment_id"][["segment_id"]], collapse = "|"))
+get_segment_dates <- function(data, local_timezone, day_type, delay){
+  dates <-  data %>% 
+            distinct(local_date) %>% 
+            mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>% 
+            complete(local_date_obj = seq(date(min(local_date_obj) - delay), max(local_date_obj), by="days")) %>%
+            mutate(local_date = replace_na(as.character(date(local_date_obj))))
+
+  if(day_type == "every_day")
+    dates <- dates %>% mutate(every_day = 0)
+  else if (day_type == "wday")
+    dates <- dates %>% mutate(wday = wday(local_date_obj, week_start = 1))
+  else if (day_type == "mday")
+    dates <- dates %>% mutate(mday = mday(local_date_obj))
+  else if (day_type == "qday")
+    dates <- dates %>% mutate(qday = qday(local_date_obj))
+  else if (day_type == "yday")
+    dates <- dates %>% mutate(yday = yday(local_date_obj))
+  return(dates)
 }

-find_segments_event <- function(timestamp, segments){
-  # segments is a data.frame, we don't need to extract [["segment_id"]] like in find_segments_periodic
-  return(stringi::stri_c(segments[[1]][segments[[1]]$segment_start_ts<= timestamp & segments[[1]]$segment_end_ts >= timestamp, "segment_id"], collapse = "|"))
+assign_rows_to_segments <- function(nested_data, nested_inferred_day_segments){
+  nested_data <- nested_data %>% mutate(assigned_segments = "")
+  for(i in 1:nrow(nested_inferred_day_segments)) {
+    segment <- nested_inferred_day_segments[i,]
+    nested_data$assigned_segments <- ifelse(segment$segment_start_ts<= nested_data$timestamp & segment$segment_end_ts >= nested_data$timestamp,
+                                            stringi::stri_c(nested_data$assigned_segments, segment$segment_id, sep = "|"), nested_data$assigned_segments)
+  }
+  nested_data$assigned_segments <- substring(nested_data$assigned_segments, 2)
+  return(nested_data)
+}
+
+assign_rows_to_segments_frequency <- function(nested_data, nested_timezone, day_segments){
+  for(i in 1:nrow(day_segments)) {
+    segment <- day_segments[i,]
+    nested_data$assigned_segments <- ifelse(segment$segment_start_ts<= nested_data$local_time_obj & segment$segment_end_ts >= nested_data$local_time_obj,
+                                            # The segment_id is assambled on the fly because it depends on each row's local_date and timezone 
+                                            stringi::stri_c("[",
+                                                            segment[["label"]], "#",
+                                                            nested_data$local_date, " ",
+                                                            segment[["segment_id_start_time"]], ",",
+                                                            nested_data$local_date, " ",
+                                                            segment[["segment_id_end_time"]], ";",
+                                                            as.numeric(lubridate::as_datetime(stringi::stri_c(nested_data$local_date, segment$segment_id_start_time), tz = nested_timezone)) * 1000, ",",
+                                                            as.numeric(lubridate::as_datetime(stringi::stri_c(nested_data$local_date, segment$segment_id_end_time), tz = nested_timezone)) * 1000 + 999,
+                                                            "]"),
+                                            nested_data$assigned_segments)
+  }
+  return(nested_data)
 }

 assign_to_day_segment <- function(sensor_data, day_segments, day_segments_type, include_past_periodic_segments){
-
+  
  if(nrow(sensor_data) == 0)
    return(sensor_data %>% mutate(assigned_segments = NA))
-
-  if(day_segments_type == "FREQUENCY"){ #FREQUENCY
+  
+  if(day_segments_type == "FREQUENCY"){
    
    day_segments <- day_segments %>% mutate(start_time = lubridate::hm(start_time),
                                            end_time = start_time + minutes(length) - seconds(1),
                                            segment_id_start_time = paste(str_pad(hour(start_time),2, pad="0"), str_pad(minute(start_time),2, pad="0"), str_pad(second(start_time),2, pad="0"),sep =":"),
                                            segment_id_end_time = paste(str_pad(hour(ymd("1970-01-01") + end_time),2, pad="0"), str_pad(minute(ymd("1970-01-01") + end_time),2, pad="0"), str_pad(second(ymd("1970-01-01") + end_time),2, pad="0"),sep =":"), # add ymd("1970-01-01") to get a real time instead of duration
-                                            segment_start = as.numeric(start_time),
-                                            segment_end = as.numeric(end_time))
-
-    sensor_data <- sensor_data %>% mutate(local_time_obj = as.numeric(lubridate::hms(local_time)),
-                                          assigned_segments = pmap_chr(list(local_date, local_time_obj, local_timezone), find_segments_frequency, day_segments)) %>% select(-local_time_obj)
+                                            segment_start_ts = as.numeric(start_time),
+                                            segment_end_ts = as.numeric(end_time))
    
-  } else if (day_segments_type == "PERIODIC"){ #PERIODIC
+    sensor_data <- sensor_data %>% mutate(local_time_obj = as.numeric(lubridate::hms(local_time)),
+                                          assigned_segments = "")
+    
+    sensor_data <- sensor_data %>%
+      group_by(local_timezone) %>% 
+      nest() %>% 
+      mutate(data = map2(data, local_timezone, assign_rows_to_segments_frequency, day_segments)) %>% 
+      unnest(cols = data) %>% 
+      arrange(timestamp) %>% 
+      select(-local_time_obj)
+    
+    return(sensor_data)
+
+    
+  } else if (day_segments_type == "PERIODIC"){
    
    # We need to take into account segment start dates that could include the first day of data
    day_segments <- day_segments %>% mutate(length_duration = duration(length))
-    wday_delay <- day_segments %>% mutate(length_duration = duration(length)) %>%  filter(repeats_on == "wday") %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
-    wday_delay <- if_else(is.na(wday_delay) | include_past_periodic_segments == FALSE, duration("0days"), wday_delay)
-    
-    mday_delay <- day_segments %>% mutate(length_duration = duration(length)) %>%  filter(repeats_on == "mday") %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
-    mday_delay <- if_else(is.na(mday_delay) | include_past_periodic_segments == FALSE, duration("0days"), mday_delay)
-    
-    qday_delay <- day_segments %>% mutate(length_duration = duration(length)) %>%  filter(repeats_on == "qday") %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
-    qday_delay <- if_else(is.na(qday_delay) | include_past_periodic_segments == FALSE, duration("0days"), qday_delay)
-    
-    yday_delay <- day_segments %>% mutate(length_duration = duration(length)) %>%  filter(repeats_on == "yday") %>% arrange(-length_duration) %>% pull(length_duration) %>% first()
-    yday_delay <- if_else(is.na(yday_delay) | include_past_periodic_segments == FALSE, duration("0days"), yday_delay)
+    every_day_delay <- duration("0days")
+    wday_delay <- day_type_delay("wday", include_past_periodic_segments)
+    mday_delay <- day_type_delay("mday", include_past_periodic_segments)
+    qday_delay <- day_type_delay("qday", include_past_periodic_segments)
+    yday_delay <- day_type_delay("yday", include_past_periodic_segments)
    
    sensor_data <- sensor_data %>%
-      # mutate(row_n = row_number()) %>% 
      group_by(local_timezone) %>% 
      nest() %>% 
      # get existent days that we need to start segments from
-      mutate(every_date = map(data, ~.x %>% 
-                                    distinct(local_date) %>% 
-                                    mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>% 
-                                    complete(local_date_obj = seq(min(local_date_obj), max(local_date_obj), by="days")) %>%
-                                    mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>% 
-                                    mutate(every_day = 0)),
-             week_dates = map(data, ~.x %>% 
-                               distinct(local_date) %>% 
-                                mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>% 
-                                complete(local_date_obj = seq(date(min(local_date_obj) - wday_delay), max(local_date_obj), by="days")) %>%
-                               mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>% 
-                               mutate(wday = wday(local_date_obj, week_start = 1))  ), 
-             month_dates = map(data, ~.x %>% 
-                                   distinct(local_date) %>% 
-                                   mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>% 
-                                   complete(local_date_obj = seq(date(min(local_date_obj) - mday_delay), max(local_date_obj), by="days")) %>%
-                                   mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
-                                   mutate(mday = mday(local_date_obj))), 
-             quarter_dates = map(data, ~.x %>% 
-                                distinct(local_date) %>% 
-                                mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>% 
-                                complete(local_date_obj = seq(date(min(local_date_obj) - qday_delay), max(local_date_obj), by="days")) %>%
-                                mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
-                                mutate(qday = qday(local_date_obj)) ), 
-             year_dates = map(data, ~.x %>% 
-                                 distinct(local_date) %>% 
-                                 mutate(local_date_obj = date(lubridate::ymd(local_date, tz = local_timezone))) %>% 
-                                 complete(local_date_obj = seq(date(min(local_date_obj) - yday_delay), max(local_date_obj), by="days")) %>%
-                                 mutate(local_date = replace_na(as.character(date(local_date_obj)))) %>%
-                                 mutate(yday = yday(local_date_obj)) ),
+      mutate(every_date = map2(data, local_timezone, get_segment_dates, "every_day", every_day_delay),
+             week_dates = map2(data, local_timezone, get_segment_dates, "wday", wday_delay),
+             month_dates = map2(data, local_timezone, get_segment_dates, "mday", mday_delay),
+             quarter_dates = map2(data, local_timezone, get_segment_dates, "qday", qday_delay),
+             year_dates = map2(data, local_timezone, get_segment_dates, "yday", yday_delay),
             existent_dates = pmap(list(every_date, week_dates, month_dates, quarter_dates, year_dates),
-                                    function(every_date, week_dates, month_dates, quarter_dates, year_dates) reduce(list(every_date, week_dates,month_dates, quarter_dates, year_dates), .f=full_join)),
-             every_date = NULL,
-             week_dates = NULL,
-             month_dates = NULL,
-             quarter_dates = NULL,
-             year_dates = NULL,
-             # build the actual day segments taking into account the users requested leangth and repeat schedule
+                                   function(every_date, week_dates, month_dates, quarter_dates, year_dates) reduce(list(every_date, week_dates,month_dates, quarter_dates, year_dates), .f=full_join)),
+             # build the actual day segments taking into account the users requested length and repeat schedule
             inferred_day_segments = map(existent_dates,
                                         ~ crossing(day_segments, .x) %>%
                                           pivot_longer(cols = c(every_day,wday, mday, qday, yday), names_to = "day_type", values_to = "day_value") %>%
                                           filter(repeats_on == day_type & repeats_value == day_value) %>%
-                                           mutate(segment_id_start = lubridate::parse_date_time(paste(local_date, start_time), orders = c("Ymd HMS", "Ymd HM")), # The segment ids (label#start#end) are computed in UTC to avoid having different labels for instances of a segment that happen in different timezones
+                                           # The segment ids (segment_id_start and segment_id_end) are computed in UTC to avoid having different labels for instances of a segment that happen in different timezones
+                                           mutate(segment_id_start = lubridate::parse_date_time(paste(local_date, start_time), orders = c("Ymd HMS", "Ymd HM")),
                                                  segment_id_end = segment_id_start + lubridate::duration(length),
-                                                  segment_start_ts = as.numeric(lubridate::parse_date_time(paste(local_date, start_time), orders = c("Ymd HMS", "Ymd HM"), tz = local_timezone)) * 1000, # The actual segments are computed using timestamps taking into account the timezone
+                                                  # The actual segments are computed using timestamps taking into account the timezone
+                                                  segment_start_ts = as.numeric(lubridate::parse_date_time(paste(local_date, start_time), orders = c("Ymd HMS", "Ymd HM"), tz = local_timezone)) * 1000,
                                                  segment_end_ts = segment_start_ts + as.numeric(lubridate::duration(length)) * 1000 + 999,
                                                  segment_id = paste0("[",
-                                                                      paste0(
-                                                                            label,"#",
-                                                                            paste0(lubridate::date(segment_id_start), " ",
-                                                                                  paste(str_pad(hour(segment_id_start),2, pad="0"), str_pad(minute(segment_id_start),2, pad="0"), str_pad(second(segment_id_start),2, pad="0"),sep =":"), ",",
-                                                                                  lubridate::date(segment_id_end), " ",
-                                                                                  paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
-                                                                            paste0(segment_start_ts, ",", segment_end_ts)
-                                                                      ),
+                                                                      paste0(label,"#",
+                                                                             paste0(lubridate::date(segment_id_start), " ",
+                                                                                    paste(str_pad(hour(segment_id_start),2, pad="0"), str_pad(minute(segment_id_start),2, pad="0"), str_pad(second(segment_id_start),2, pad="0"),sep =":"), ",",
+                                                                                    lubridate::date(segment_id_end), " ",
+                                                                                    paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
+                                                                             paste0(segment_start_ts, ",", segment_end_ts)),
                                                                      "]")) %>% 
-                                           select(segment_start_ts, segment_end_ts,  segment_id) %>% 
-                                           drop_na(segment_start_ts, segment_end_ts)), # drop day segments with an invalid start or end time (mostly due to daylight saving changes, e.g. 2020-03-08 02:00:00 EST does not exist, clock jumps from 1am to 3am)
-             data = map2(data, inferred_day_segments, ~ .x %>% mutate(row_date_time = as.numeric(lubridate::ymd_hms(local_date_time, tz = local_timezone)) * 1000,
-                                                                      assigned_segments = map_chr(row_date_time, ~find_segments_periodic(.x, inferred_day_segments)),
-                                                                      row_date_time = NULL))
+                                           # drop day segments with an invalid start or end time (mostly due to daylight saving changes, e.g. 2020-03-08 02:00:00 EST does not exist, clock jumps from 01:59am to 03:00am)
+                                           drop_na(segment_start_ts, segment_end_ts)), 
+             data = map2(data, inferred_day_segments, assign_rows_to_segments)
      ) %>%
-      select(-existent_dates, -inferred_day_segments) %>%
+      select(-existent_dates, -inferred_day_segments, -every_date, -week_dates, -month_dates, -quarter_dates, -year_dates) %>%
      unnest(cols = data) %>% 
      arrange(timestamp)
-    
-    
+
  } else if ( day_segments_type == "EVENT"){
    
    sensor_data <- sensor_data %>% 
      group_by(local_timezone) %>% 
      nest() %>% 
-      mutate(inferred_day_segments = map(local_timezone, ~ day_segments %>% mutate(shift = ifelse(shift == "0", "0seconds", shift),
-                                                     segment_start_ts = event_timestamp + (as.integer(seconds(lubridate::duration(shift))) * ifelse(shift_direction >= 0, 1, -1) * 1000),
-                                                     segment_end_ts = segment_start_ts + (as.integer(seconds(lubridate::duration(length))) * 1000),
-                                                     segment_id_start = lubridate::as_datetime(segment_start_ts/1000, tz = .x), # these start and end datetime objects are for labeling only
-                                                     segment_id_end = lubridate::as_datetime(segment_end_ts/1000, tz = .x),
-                                                     segment_end_ts = segment_end_ts + 999,
-                                                     segment_id = paste0("[",
-                                                                         paste0(
-                                                                           label,"#",
-                                                                           paste0(lubridate::date(segment_id_start), " ",
-                                                                                  paste(str_pad(hour(segment_id_start),2, pad="0"), str_pad(minute(segment_id_start),2, pad="0"), str_pad(second(segment_id_start),2, pad="0"),sep =":"), ",",
-                                                                                  lubridate::date(segment_id_end), " ",
-                                                                                  paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
-                                                                           paste0(segment_start_ts, ",", segment_end_ts)
-                                                                         ),
-                                                                         "]")) %>% 
-                                           select(-segment_id_start, -segment_id_end)),
-             data = map2(data, inferred_day_segments, ~ .x %>% mutate(assigned_segments = map_chr(timestamp, ~find_segments_event(.x, inferred_day_segments))))) %>% 
+      mutate(inferred_day_segments = map(local_timezone, ~ day_segments %>% 
+                                           mutate(shift = ifelse(shift == "0", "0seconds", shift),
+                                                  segment_start_ts = event_timestamp + (as.integer(seconds(lubridate::duration(shift))) * ifelse(shift_direction >= 0, 1, -1) * 1000),
+                                                  segment_end_ts = segment_start_ts + (as.integer(seconds(lubridate::duration(length))) * 1000),
+                                                  # these start and end datetime objects are for labeling only
+                                                  segment_id_start = lubridate::as_datetime(segment_start_ts/1000, tz = .x), 
+                                                  segment_id_end = lubridate::as_datetime(segment_end_ts/1000, tz = .x),
+                                                  segment_end_ts = segment_end_ts + 999,
+                                                  segment_id = paste0("[",
+                                                                      paste0(label,"#",
+                                                                             paste0(lubridate::date(segment_id_start), " ",
+                                                                             paste(str_pad(hour(segment_id_start),2, pad="0"), str_pad(minute(segment_id_start),2, pad="0"), str_pad(second(segment_id_start),2, pad="0"),sep =":"), ",",
+                                                                             lubridate::date(segment_id_end), " ",
+                                                                             paste(str_pad(hour(segment_id_end),2, pad="0"), str_pad(minute(segment_id_end),2, pad="0"), str_pad(second(segment_id_end),2, pad="0"),sep =":")),";",
+                                                                             paste0(segment_start_ts, ",", segment_end_ts)),
+                                                                     "]"))),
+             data = map2(data, inferred_day_segments, assign_rows_to_segments)) %>% 
      select(-inferred_day_segments) %>% 
      unnest(data) %>% 
      arrange(timestamp)
-
  }
-  
+
  return(sensor_data)
 }
--- a/src/data/download_dataset.R
+++ b/src/data/download_dataset.R
@ -40,9 +40,9 @@ is_multiplaform_participant <- function(dbEngine, device_ids, platforms){
 participant <- snakemake@input[[1]]
 group <- snakemake@params[["group"]]
 table <- snakemake@params[["table"]]
+sensor <- snakemake@params[["sensor"]]
 timezone <- snakemake@params[["timezone"]]
 aware_multiplatform_tables <- str_split(snakemake@params[["aware_multiplatform_tables"]], ",")[[1]]
-unifiable_tables = snakemake@params[["unifiable_sensors"]]
 sensor_file <- snakemake@output[[1]]

 device_ids <- strsplit(readLines(participant, n=1), ",")[[1]]
@ -58,30 +58,26 @@ end_datetime_utc = format(as.POSIXct(paste0(end_date, " 23:59:59"),format="%Y/%m

 dbEngine <- dbConnect(MySQL(), default.file = "./.env", group = group)

-# Get existent columns in table
-available_columns <- colnames(dbGetQuery(dbEngine, paste0("SELECT * FROM ", table, " LIMIT 1")))
-
-if("device_id" %in% available_columns){
-  if(is_multiplaform_participant(dbEngine, device_ids, platforms)){
-    sensor_data <- unify_raw_data(dbEngine, table, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, unifiable_tables, device_ids, platforms)
-  }else {
-    query <- paste0("SELECT * FROM ", table, " WHERE device_id IN ('", paste0(device_ids, collapse = "','"), "')")
-    if("timestamp" %in% available_columns && !(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc)
-      query <- paste0(query, "AND timestamp BETWEEN 1000*UNIX_TIMESTAMP('", start_datetime_utc, "') AND 1000*UNIX_TIMESTAMP('", end_datetime_utc, "')")
-    sensor_data <- dbGetQuery(dbEngine, query)
+if(is_multiplaform_participant(dbEngine, device_ids, platforms)){
+  sensor_data <- unify_raw_data(dbEngine, table, sensor, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, device_ids, platforms)
+}else {
+  # table has two elements for conversation and activity recognition (they store data on a different table for ios and android)
+  if(length(table) > 1){
+    table <- table[[toupper(platforms[1])]]
  }
-  
-  if("timestamp" %in% available_columns)
-    sensor_data <- sensor_data %>% arrange(timestamp)
-  
-  # Unify device_id
-  sensor_data <- sensor_data %>% mutate(device_id = unified_device_id)
-  
-  # Droping duplicates on all columns except for _id or id
-  sensor_data <- sensor_data %>% distinct(!!!syms(setdiff(names(sensor_data), c("_id", "id"))))
-  
-} else 
-    stop(paste0("Table ", table, "does not have a device_id column (Aware ID) to link its data to a participant"))
+  query <- paste0("SELECT * FROM ", table, " WHERE device_id IN ('", paste0(device_ids, collapse = "','"), "')")
+  if(!(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc)
+    query <- paste0(query, "AND timestamp BETWEEN 1000*UNIX_TIMESTAMP('", start_datetime_utc, "') AND 1000*UNIX_TIMESTAMP('", end_datetime_utc, "')")
+  sensor_data <- dbGetQuery(dbEngine, query)
+}
+
+sensor_data <- sensor_data %>% arrange(timestamp)
+
+# Unify device_id
+sensor_data <- sensor_data %>% mutate(device_id = unified_device_id)
+
+# Droping duplicates on all columns except for _id or id
+sensor_data <- sensor_data %>% distinct(!!!syms(setdiff(names(sensor_data), c("_id", "id"))))

 write_csv(sensor_data, sensor_file)
 dbDisconnect(dbEngine)
--- a/src/data/unify_ios_android.R
+++ b/src/data/unify_ios_android.R
@ -4,11 +4,10 @@ source("src/data/unify_utils.R")
 sensor_data <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE)
 participant_info <- snakemake@input[["participant_info"]]
 sensor <- snakemake@params[["sensor"]]
-unifiable_sensors = snakemake@params[["unifiable_sensors"]]

 platforms <- strsplit(readLines(participant_info, n=2)[[2]], ",")[[1]]
 platform <- ifelse(platforms[1] == "multiple" | (length(platforms) > 1 & "android" %in% platforms & "ios" %in% platforms), "android", platforms[1])

-sensor_data <- unify_data(sensor_data, sensor, platform, unifiable_sensors)
+sensor_data <- unify_data(sensor_data, sensor, platform)

 write.csv(sensor_data, snakemake@output[[1]], row.names = FALSE)
--- a/src/data/unify_utils.R
+++ b/src/data/unify_utils.R
@ -101,7 +101,7 @@ clean_ios_activity_column <- function(ios_gar){
    return(ios_gar)
 }

-unify_ios_gar <- function(ios_gar){
+unify_ios_activity_recognition <- function(ios_gar){
    # We only need to unify Google Activity Recognition data for iOS
    # discard rows where activities column is blank
    ios_gar <- ios_gar[-which(ios_gar$activities == ""), ]
@ -138,7 +138,7 @@ unify_ios_conversation <- function(conversation){
 }

 # This function is used in download_dataset.R
-unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, unifiable_tables, device_ids, platforms){
+unify_raw_data <- function(dbEngine, sensor_table, sensor, start_datetime_utc, end_datetime_utc, aware_multiplatform_tables, device_ids, platforms){
  # If platforms is 'multiple', fetch each device_id's platform from aware_device, otherwise, use those given by the user
  if(length(platforms) == 1 && platforms == "multiple")
      devices_platforms <- dbGetQuery(dbEngine, paste0("SELECT device_id,brand FROM aware_device WHERE device_id IN ('", paste0(device_ids, collapse = "','"), "')")) %>% 
@ -147,8 +147,9 @@ unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc
      devices_platforms <- data.frame(device_id = device_ids, platform = platforms)

  # Get existent tables in database
-  available_tables_in_db <- dbGetQuery(dbEngine, paste0("SELECT table_name FROM information_schema.tables WHERE table_type = 'base table' AND table_schema='", dbGetInfo(dbEngine)$dbname,"'")) %>% pull(table_name)
-  
+  available_tables_in_db <- dbGetQuery(dbEngine, paste0("SELECT table_name FROM information_schema.tables WHERE table_schema='", dbGetInfo(dbEngine)$dbname,"'"))[[1]]
+  if(!any(sensor_table %in% available_tables_in_db))
+    stop(paste0("You requested data from these table(s) ", paste0(sensor_table, collapse=", "), " but they don't exist in your database ", dbGetInfo(dbEngine)$dbname))
  # Parse the table names for activity recognition and conversation plugins because they are different between android and ios
  ar_tables <- setNames(aware_multiplatform_tables[1:2], c("android", "ios"))
  conversation_tables <- setNames(aware_multiplatform_tables[3:4], c("android", "ios"))
@ -160,17 +161,19 @@ unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc
    platform <- row$platform
    
    # Handle special cases when tables for the same sensor have different names for Android and iOS (AR and conversation)
-    if(table %in% ar_tables)
+    if(length(sensor_table) == 1)
+        table <- sensor_table
+    else if(all(sensor_table == ar_tables))
      table <- ar_tables[[platform]]
-    else if(table %in% conversation_tables)
+    else if(all(sensor_table == conversation_tables))
      table <- conversation_tables[[platform]]

    if(table %in% available_tables_in_db){
      query <- paste0("SELECT * FROM ", table, " WHERE device_id IN ('", device_id, "')")
-      if("timestamp" %in% available_columns && !(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc){
+      if(!(is.na(start_datetime_utc)) && !(is.na(end_datetime_utc)) && start_datetime_utc < end_datetime_utc){
        query <- paste0(query, "AND timestamp BETWEEN 1000*UNIX_TIMESTAMP('", start_datetime_utc, "') AND 1000*UNIX_TIMESTAMP('", end_datetime_utc, "')")
      }
-      sensor_data <- unify_data(dbGetQuery(dbEngine, query), table, platform, unifiable_tables)
+      sensor_data <- unify_data(dbGetQuery(dbEngine, query), sensor, platform)
      participants_sensordata <- append(participants_sensordata, list(sensor_data))
    }else{
      warning(paste0("Missing ", table, " table. We unified the data from ", paste0(devices_platforms$device_id, collapse = " and "), " but without records from this missing table for ", device_id))
@ -182,25 +185,16 @@ unify_raw_data <- function(dbEngine, table, start_datetime_utc, end_datetime_utc
 }

 # This function is used in unify_ios_android.R and unify_raw_data function
-unify_data <- function(sensor_data, sensor, platform, unifiable_sensors){
-    if(sensor == unifiable_sensors$calls){
-        if(platform == "ios"){
-            sensor_data = unify_ios_calls(sensor_data)
-        }
-        # android calls remain unchanged
-    } else if(sensor == unifiable_sensors$battery){
-        if(platform == "ios"){
-            sensor_data = unify_ios_battery(sensor_data)
-        }
-        # android battery remains unchanged
-    } else if(sensor == unifiable_sensors$ios_activity_recognition){
-        sensor_data = unify_ios_gar(sensor_data)
-    } else if(sensor == unifiable_sensors$screen){
-        if(platform == "ios"){
-            sensor_data = unify_ios_screen(sensor_data)
-        }
-        # android screen remains unchanged
-    } else if(sensor == unifiable_sensors$ios_conversation){
+unify_data <- function(sensor_data, sensor, platform){
+    if(sensor == "phone_calls" & platform == "ios"){
+        sensor_data = unify_ios_calls(sensor_data)
+    } else if(sensor == "phone_battery" & platform == "ios"){
+        sensor_data = unify_ios_battery(sensor_data)
+    } else if(sensor == "phone_activity_recognition" & platform == "ios"){
+        sensor_data = unify_ios_activity_recognition(sensor_data)
+    } else if(sensor == "phone_screen" & platform == "ios"){
+        sensor_data = unify_ios_screen(sensor_data)
+    } else if(sensor == "phone_conversation" & platform == "ios"){
        sensor_data = unify_ios_conversation(sensor_data)
    }
    return(sensor_data)
--- a/src/features/phone_accelerometer/panda/main.py
+++ b/src/features/phone_accelerometer/panda/main.py
--- a/src/features/phone_accelerometer/rapids/main.py
+++ b/src/features/phone_accelerometer/rapids/main.py
--- a/src/features/phone_activity_recognition/episodes/activity_recognition_episodes.R
+++ b/src/features/phone_activity_recognition/episodes/activity_recognition_episodes.R
--- a/src/features/phone_activity_recognition/rapids/main.py
+++ b/src/features/phone_activity_recognition/rapids/main.py
--- a/src/features/phone_applications_foreground/rapids/main.py
+++ b/src/features/phone_applications_foreground/rapids/main.py
--- a/src/features/phone_battery/episodes/battery_episodes.R
+++ b/src/features/phone_battery/episodes/battery_episodes.R
--- a/src/features/phone_battery/rapids/main.py
+++ b/src/features/phone_battery/rapids/main.py
--- a/src/features/phone_bluetooth/rapids/main.R
+++ b/src/features/phone_bluetooth/rapids/main.R
--- a/src/features/phone_calls/rapids/main.R
+++ b/src/features/phone_calls/rapids/main.R
--- a/src/features/phone_conversation/rapids/main.py
+++ b/src/features/phone_conversation/rapids/main.py
--- a/src/features/phone_light/rapids/main.py
+++ b/src/features/phone_light/rapids/main.py
--- a/src/features/phone_locations/barnett/library/AvgFlightDur.R
+++ b/src/features/phone_locations/barnett/library/AvgFlightDur.R
--- a/src/features/phone_locations/barnett/library/AvgFlightLen.R
+++ b/src/features/phone_locations/barnett/library/AvgFlightLen.R
--- a/src/features/phone_locations/barnett/library/Collapse2Pause.R
+++ b/src/features/phone_locations/barnett/library/Collapse2Pause.R
--- a/src/features/phone_locations/barnett/library/DailyMobilityPlots.R
+++ b/src/features/phone_locations/barnett/library/DailyMobilityPlots.R
--- a/src/features/phone_locations/barnett/library/DailyRoutineIndex.R
+++ b/src/features/phone_locations/barnett/library/DailyRoutineIndex.R
--- a/src/features/phone_locations/barnett/library/DayDist.R
+++ b/src/features/phone_locations/barnett/library/DayDist.R
--- a/src/features/phone_locations/barnett/library/DistanceTravelled.R
+++ b/src/features/phone_locations/barnett/library/DistanceTravelled.R
--- a/src/features/phone_locations/barnett/library/ExtractFlights.R
+++ b/src/features/phone_locations/barnett/library/ExtractFlights.R
--- a/src/features/phone_locations/barnett/library/ExtractTimePeriod.R
+++ b/src/features/phone_locations/barnett/library/ExtractTimePeriod.R
--- a/src/features/phone_locations/barnett/library/GPS2MobMat.R
+++ b/src/features/phone_locations/barnett/library/GPS2MobMat.R
--- a/src/features/phone_locations/barnett/library/GPSmobility-internal.R
+++ b/src/features/phone_locations/barnett/library/GPSmobility-internal.R
--- a/src/features/phone_locations/barnett/library/GetMobilityFeaturesMat.R
+++ b/src/features/phone_locations/barnett/library/GetMobilityFeaturesMat.R
--- a/src/features/phone_locations/barnett/library/GuessPause.R
+++ b/src/features/phone_locations/barnett/library/GuessPause.R
--- a/src/features/phone_locations/barnett/library/Hometime.R
+++ b/src/features/phone_locations/barnett/library/Hometime.R
--- a/src/features/phone_locations/barnett/library/InitializeParams.R
+++ b/src/features/phone_locations/barnett/library/InitializeParams.R
--- a/src/features/phone_locations/barnett/library/IsFlight.R
+++ b/src/features/phone_locations/barnett/library/IsFlight.R
--- a/src/features/phone_locations/barnett/library/LatLong2XY.R
+++ b/src/features/phone_locations/barnett/library/LatLong2XY.R
--- a/src/features/phone_locations/barnett/library/LocationAt.R
+++ b/src/features/phone_locations/barnett/library/LocationAt.R
--- a/src/features/phone_locations/barnett/library/MaxDiam.R
+++ b/src/features/phone_locations/barnett/library/MaxDiam.R
--- a/src/features/phone_locations/barnett/library/MaxDistBetweenTrajectories.R
+++ b/src/features/phone_locations/barnett/library/MaxDistBetweenTrajectories.R
--- a/src/features/phone_locations/barnett/library/MaxHomeDist.R
+++ b/src/features/phone_locations/barnett/library/MaxHomeDist.R
--- a/src/features/phone_locations/barnett/library/MaxRadius.R
+++ b/src/features/phone_locations/barnett/library/MaxRadius.R
--- a/src/features/phone_locations/barnett/library/MinsMissing.R
+++ b/src/features/phone_locations/barnett/library/MinsMissing.R
--- a/src/features/phone_locations/barnett/library/MobilityFeatures.R
+++ b/src/features/phone_locations/barnett/library/MobilityFeatures.R
--- a/src/features/phone_locations/barnett/library/MobmatQualityOK.R
+++ b/src/features/phone_locations/barnett/library/MobmatQualityOK.R
--- a/src/features/phone_locations/barnett/library/ProbPause.R
+++ b/src/features/phone_locations/barnett/library/ProbPause.R
--- a/src/features/phone_locations/barnett/library/ProgressBar.R
+++ b/src/features/phone_locations/barnett/library/ProgressBar.R
--- a/src/features/phone_locations/barnett/library/RadiusOfGyration.R
+++ b/src/features/phone_locations/barnett/library/RadiusOfGyration.R
--- a/src/features/phone_locations/barnett/library/RandomBridge.R
+++ b/src/features/phone_locations/barnett/library/RandomBridge.R
--- a/src/features/phone_locations/barnett/library/SigLocEntropy.R
+++ b/src/features/phone_locations/barnett/library/SigLocEntropy.R
--- a/src/features/phone_locations/barnett/library/SigLocs.R
+++ b/src/features/phone_locations/barnett/library/SigLocs.R
--- a/src/features/phone_locations/barnett/library/SigLocsVisited.R
+++ b/src/features/phone_locations/barnett/library/SigLocsVisited.R
--- a/src/features/phone_locations/barnett/library/SimulateMobilityGaps.R
+++ b/src/features/phone_locations/barnett/library/SimulateMobilityGaps.R
--- a/src/features/phone_locations/barnett/library/StdFlightDur.R
+++ b/src/features/phone_locations/barnett/library/StdFlightDur.R
--- a/src/features/phone_locations/barnett/library/StdFlightLen.R
+++ b/src/features/phone_locations/barnett/library/StdFlightLen.R
--- a/src/features/phone_locations/barnett/library/WriteSurveyAnswers2File.R
+++ b/src/features/phone_locations/barnett/library/WriteSurveyAnswers2File.R
--- a/src/features/phone_locations/barnett/library/plot.flights.R
+++ b/src/features/phone_locations/barnett/library/plot.flights.R
--- a/src/features/phone_locations/barnett/library/plotlimits.R
+++ b/src/features/phone_locations/barnett/library/plotlimits.R
--- a/src/features/phone_locations/barnett/main.R
+++ b/src/features/phone_locations/barnett/main.R
@ -3,7 +3,7 @@ library("dplyr")
 library("stringr")

 # Load Ian Barnett's code. Taken from https://scholar.harvard.edu/ibarnett/software/gpsmobility
-file.sources = list.files(c("src/features/locations/barnett/library"), pattern="*.R$", full.names=TRUE, ignore.case=TRUE)
+file.sources = list.files(c("src/features/phone_locations/barnett/library"), pattern="*.R$", full.names=TRUE, ignore.case=TRUE)
 sapply(file.sources,source,.GlobalEnv)

 create_empty_file <- function(requested_features){
@ -52,10 +52,13 @@ barnett_features <- function(sensor_data_files, day_segment, params){
  if (nrow(location) > 1){
    # Filter by segment and skipping any non-daily segment
    location <- location %>% filter_data_by_segment(day_segment)
-    segment <- location %>% head(1) %>% pull(local_segment) 
-    segment_data <- str_split(segment, "#")[[1]]
-    if(segment_data[[2]] != segment_data[[4]] || segment_data[[3]] != "00:00:00" || segment_data[[5]] != "23:59:59"){
-      warning(paste("Barnett's location features cannot be computed for day segmentes that are not daily (cover 00:00:00 to 23:59:59 of every day). Skipping for ", segment))
+    
+    datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
+    datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
+    location <- location %>% mutate(is_daily = str_detect(local_segment, paste0(day_segment, "#", datetime_start_regex, ",", datetime_end_regex))) 
+
+    if(!all(location$is_daily)){
+      message(paste("Barnett's location features cannot be computed for day segmentes that are not daily (cover 00:00:00 to 23:59:59 of every day). Skipping ", day_segment))
      location_features <- create_empty_file(requested_features)  
    } else {
      # Count how many minutes of data we use to get location features
--- a/src/features/phone_locations/doryab/main.py
+++ b/src/features/phone_locations/doryab/main.py
--- a/src/features/phone_messages/rapids/main.R
+++ b/src/features/phone_messages/rapids/main.R
--- a/src/features/phone_screen/episodes/screen_episodes.R
+++ b/src/features/phone_screen/episodes/screen_episodes.R
--- a/src/features/phone_screen/rapids/main.py
+++ b/src/features/phone_screen/rapids/main.py
--- a/src/features/phone_wifi_connected/rapids/main.R
+++ b/src/features/phone_wifi_connected/rapids/main.R
--- a/src/features/phone_wifi_visible/rapids/main.R
+++ b/src/features/phone_wifi_visible/rapids/main.R
@ -0,0 +1,46 @@
+library(dplyr)
+
+compute_wifi_feature <- function(data, feature, day_segment){
+  data <- data %>% filter_data_by_segment(day_segment)
+  if(feature %in% c("countscans", "uniquedevices")){
+    data <- data %>% group_by(local_segment)
+    data <- switch(feature,
+              "countscans" = data %>% summarise(!!paste("wifi_rapids", feature, sep = "_") := n()),
+              "uniquedevices" = data %>% summarise(!!paste("wifi_rapids", feature, sep = "_") := n_distinct(bssid)))
+    return(data)
+   } else if(feature == "countscansmostuniquedevice"){
+     # Get the most scanned device
+    mostuniquedevice <- data %>% 
+      group_by(bssid) %>% 
+      mutate(N=n()) %>% 
+      ungroup() %>%
+      filter(N == max(N)) %>% 
+      head(1) %>% # if there are multiple device with the same amount of scans pick the first one only
+      pull(bssid)
+    return(data %>% 
+             filter(bssid == mostuniquedevice) %>%
+             group_by(local_segment) %>% 
+             summarise(!!paste("wifi_rapids", feature, sep = "_") := n()) %>%
+             replace(is.na(.), 0))
+  }
+}
+
+rapids_features <- function(sensor_data_files, day_segment, provider){
+  wifi_data <-  read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
+  requested_features <- provider[["FEATURES"]]
+  # Output dataframe
+  features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
+
+  # The name of the features this function can compute
+  base_features_names  <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
+
+  # The subset of requested features this function can compute
+  features_to_compute  <- intersect(base_features_names, requested_features)
+
+  for(feature_name in features_to_compute){
+    feature <- compute_wifi_feature(wifi_data, feature_name, day_segment)
+    features <- merge(features, feature, by="local_segment", all = TRUE)
+  }
+
+  return(features)
+}
--- a/src/features/utils/resample_episodes.R
+++ b/src/features/utils/resample_episodes.R
@ -1,6 +1,8 @@
 source("renv/activate.R")
 library("dplyr")
 library("tidyr")
+library("tibble")
+options(scipen=999)

 # Using mostly indeixng instead of tidyr because is faster
 resampled_episodes <- read.csv(snakemake@input[[1]]) 
--- a/src/features/utils/utils.py
+++ b/src/features/utils/utils.py
@ -74,7 +74,7 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file
    sensor_features = pd.DataFrame(columns=["local_segment"])
    day_segments_labels = pd.read_csv(day_segments_file, header=0)
    if "FEATURES" not in provider:
-        raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(sensor_key.upper(), provider_key))
+        raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(sensor_key.upper(), provider_key.upper()))

    if provider["COMPUTE"] == True: