Update AR module for segments; Refactor input format

2020-10-07 18:11:06 -04:00 · 2020-10-07 18:11:06 -04:00 · 236b1cd809
parent 01ab59a3b6
commit 236b1cd809
43 changed files with 571 additions and 766 deletions
--- a/26
+++ b/26
@ -55,16 +55,22 @@ for provider in config["BLUETOOTH"]["PROVIDERS"].keys():
        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["BLUETOOTH"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="BLUETOOTH".lower()))
        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="BLUETOOTH".lower()))
-if config["ACTIVITY_RECOGNITION"]["COMPUTE"]:
+for provider in config["ACTIVITY_RECOGNITION"]["PROVIDERS"].keys():
-    pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
+    if config["ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]:
-    pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
+        pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
-    
+        pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
-    for pids,table in zip([pids_android, pids_ios], [config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]):
+        
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
+        for pids,table in zip([pids_android, pids_ios], [config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]):
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
+            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
-        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table))
+            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
-        files_to_compute.extend(expand("data/processed/{pid}/{sensor}_deltas.csv", pid=pids, sensor=table))
+            files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table))
-    files_to_compute.extend(expand("data/processed/{pid}/activity_recognition_{day_segment}.csv",pid=config["PIDS"], day_segment = config["ACTIVITY_RECOGNITION"]["DAY_SEGMENTS"]))
+        
        files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes.csv", pid=config["PIDS"]))
        files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes_resampled.csv", pid=config["PIDS"]))
        files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="ACTIVITY_RECOGNITION".lower()))
        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="ACTIVITY_RECOGNITION".lower()))
 for provider in config["BATTERY"]["PROVIDERS"].keys():
    if config["BATTERY"]["PROVIDERS"][provider]["COMPUTE"]:
--- a/config.yaml
+++ b/config.yaml
@ -113,12 +113,19 @@ BLUETOOTH:
 ACTIVITY_RECOGNITION:
  COMPUTE: False
  DB_TABLE: 
    ANDROID: plugin_google_activity_recognition
    IOS: plugin_ios_activity_recognition
-  DAY_SEGMENTS: *day_segments
+  PROVIDERS:
-  FEATURES: ["count","mostcommonactivity","countuniqueactivities","activitychangecount","sumstationary","summobile","sumvehicle"]
+    RAPIDS:
      COMPUTE: False
      FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"]
      ACTIVITY_CLASSES:
        STATIONARY: ["still", "tilting"]
        MOBILE: ["on_foot", "walking", "running", "on_bicycle"]
        VEHICLE: ["in_vehicle"]
      SRC_FOLDER: "rapids" # inside src/features/activity_recognition
      SRC_LANGUAGE: "python"
 BATTERY:
  DB_TABLE: battery
--- a/rules/common.smk
+++ b/rules/common.smk
@ -46,21 +46,19 @@ def find_features_files(wildcards):
 def optional_ar_input(wildcards):
    platform = infer_participant_platform("data/external/"+wildcards.pid)
-
+    
    if platform == "android": 
-        return ["data/raw/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_with_datetime_unified.csv",
+        return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
                "data/interim/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_episodes.csv"]
    elif platform == "ios":
-        return ["data/raw/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_with_datetime_unified.csv",
+        return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
                "data/interim/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_episodes.csv"]
 def optional_conversation_input(wildcards):
    platform = infer_participant_platform("data/external/"+wildcards.pid)
    if platform == "android":
-        return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "_with_datetime_unified.csv"]
+        return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CONVERSATION"]["DB_TABLE"]["ANDROID"])[0]
    elif platform == "ios":
-        return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["IOS"] + "_with_datetime_unified.csv"]
+        return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CONVERSATION"]["DB_TABLE"]["IOS"])[0]
 def optional_steps_sleep_input(wildcards):
    if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED":
--- a/rules/features.smk
+++ b/rules/features.smk
@ -6,70 +6,6 @@ rule join_features_from_providers:
    script:
        "../src/features/join_features_from_providers.R"
 rule messages_r_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"]),
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/messages_features/messages_r_{provider_key}.csv"
    script:
        "../src/features/messages/messages_entry.R"
 rule messages_python_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"]),
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/messages_features/messages_python_{provider_key}.csv"
    script:
        "../src/features/messages/messages_entry.py"
 rule calls_python_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"]),
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/calls_features/calls_python_{provider_key}.csv"
    script:
        "../src/features/calls/calls_entry.py"
 rule calls_r_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"]),
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/calls_features/calls_r_{provider_key}.csv"
    script:
        "../src/features/calls/calls_entry.R"
 rule battery_episodes:
    input:
        expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor=config["BATTERY"]["DB_TABLE"])
    output:
        "data/interim/{pid}/battery_episodes.csv"
    script:
        "../src/features/battery/episodes/battery_episodes.R"
 rule screen_episodes:
    input:
        screen = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["SCREEN"]["DB_TABLE"])
    output:
        "data/interim/{pid}/screen_episodes.csv"
    script:
        "../src/features/screen/episodes/screen_episodes.R"
 rule resample_episodes:
    input:
        "data/interim/{pid}/{sensor}_episodes.csv"
@ -92,178 +28,6 @@ rule resample_episodes_with_datetime:
    script:
        "../src/data/readable_datetime.R"
 rule google_activity_recognition_deltas:
    input:
        expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
    output:
        expand("data/interim/{{pid}}/{sensor}_episodes.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
    script:
        "../src/features/ar/episodes/activity_recognition_episodes.R"
 rule ios_activity_recognition_deltas:
    input:
        expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
    output:
        expand("data/interim/{{pid}}/{sensor}_episodes.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
    script:
        "../src/features/ar/episodes/activity_recognition_episodes.R"
 rule locations_python_features:
    input:
        sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]),
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
    output:
        "data/interim/{pid}/locations_features/locations_python_{provider_key}.csv"
    script:
        "../src/features/locations/locations_entry.py"
 rule locations_r_features:
    input:
        sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]),
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/locations_features/locations_r_{provider_key}.csv"
    script:
        "../src/features/locations/locations_entry.R"
 rule bluetooth_r_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"]),
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/bluetooth_features/bluetooth_r_{provider_key}.csv"
    script:
        "../src/features/bluetooth/bluetooth_entry.R"
 rule bluetooth_python_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"]),
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/bluetooth_features/bluetooth_python_{provider_key}.csv"
    script:
        "../src/features/bluetooth/bluetooth_entry.py"
 rule activity_features:
    input:
        optional_ar_input
    params:
        segment = "{day_segment}",
        features = config["ACTIVITY_RECOGNITION"]["FEATURES"]
    output:
        "data/processed/{pid}/activity_recognition_{day_segment}.csv"
    script:
        "../src/features/activity_recognition.py"
 rule battery_r_features:
    input:
        battery_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/battery_features/battery_r_{provider_key}.csv"
    script:
        "../src/features/battery/battery_entry.R"
 rule battery_python_features:
    input:
        battery_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/battery_features/battery_python_{provider_key}.csv"
    script:
        "../src/features/battery/battery_entry.py"
 rule screen_r_features:
    input:
        screen_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/screen_features/screen_r_{provider_key}.csv"
    script:
        "../src/features/screen/screen_entry.R"
 rule screen_python_features:
    input:
        screen_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/screen_features/screen_python_{provider_key}.csv"
    script:
        "../src/features/screen/screen_entry.py"
 rule light_r_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"]),
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/light_features/light_r_{provider_key}.csv"
    script:
        "../src/features/light/light_entry.R"
 rule light_python_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"]),
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/light_features/light_python_{provider_key}.csv"
    script:
        "../src/features/light/light_entry.py"
 rule conversation_r_features:
    input:
        sensor_data = optional_conversation_input,
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/conversation_features/conversation_r_{provider_key}.csv"
    script:
        "../src/features/conversation/conversation_entry.R"
 rule conversation_python_features:
    input:
        sensor_data = optional_conversation_input,
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}"
    output:
        "data/interim/{pid}/conversation_features/conversation_python_{provider_key}.csv"
    script:
        "../src/features/conversation/conversation_entry.py"
 rule accelerometer_features:
    input:
        expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["ACCELEROMETER"]["DB_TABLE"]),
@ -278,53 +42,315 @@ rule accelerometer_features:
    script:
        "../src/features/accelerometer_features.py"
 rule activity_recognition_episodes:
    input:
        optional_ar_input
    output:
        "data/interim/{pid}/activity_recognition_episodes.csv"
    script:
        "../src/features/activity_recognition/episodes/activity_recognition_episodes.R"
 rule activity_recognition_r_features:
    input:
        sensor_episodes = "data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "activity_recognition"
    output:
        "data/interim/{pid}/activity_recognition_features/activity_recognition_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"
 rule activity_recognition_python_features:
    input:
        sensor_episodes = "data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "activity_recognition"
    output:
        "data/interim/{pid}/activity_recognition_features/activity_recognition_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"
 rule applications_foreground_r_features:
    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]),
+        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"])[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}"
+        provider_key = "{provider_key}",
        sensor_key = "applications_foreground"
    output:
        "data/interim/{pid}/applications_foreground_features/applications_foreground_r_{provider_key}.csv"
    script:
-        "../src/features/applications_foreground/applications_foreground_entry.R"
+        "../src/features/entry.R"
 rule applications_foreground_python_features:
    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]),
+        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"])[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}"
+        provider_key = "{provider_key}",
        sensor_key = "applications_foreground"
    output:
        "data/interim/{pid}/applications_foreground_features/applications_foreground_python_{provider_key}.csv"
    script:
-        "../src/features/applications_foreground/applications_foreground_entry.py"
+        "../src/features/entry.py"
 rule battery_episodes:
    input:
        expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor=config["BATTERY"]["DB_TABLE"])
    output:
        "data/interim/{pid}/battery_episodes.csv"
    script:
        "../src/features/battery/episodes/battery_episodes.R"
 rule battery_r_features:
    input:
        sensor_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "battery"
    output:
        "data/interim/{pid}/battery_features/battery_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"
 rule battery_python_features:
    input:
        sensor_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "battery"
    output:
        "data/interim/{pid}/battery_features/battery_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"
 rule bluetooth_r_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"])[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "bluetooth"
    output:
        "data/interim/{pid}/bluetooth_features/bluetooth_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"
 rule bluetooth_python_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"])[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "bluetooth"
    output:
        "data/interim/{pid}/bluetooth_features/bluetooth_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"
 rule calls_r_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"])[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "calls"
    output:
        "data/interim/{pid}/calls_features/calls_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"
 rule calls_python_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"])[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "calls"
    output:
        "data/interim/{pid}/calls_features/calls_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"
 rule conversation_r_features:
    input:
        sensor_data = optional_conversation_input,
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "conversation"
    output:
        "data/interim/{pid}/conversation_features/conversation_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"
 rule conversation_python_features:
    input:
        sensor_data = optional_conversation_input,
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "conversation"
    output:
        "data/interim/{pid}/conversation_features/conversation_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"
 rule light_r_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"])[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "light"
    output:
        "data/interim/{pid}/light_features/light_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"
 rule light_python_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"])[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "light"
    output:
        "data/interim/{pid}/light_features/light_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"
 rule locations_r_features:
    input:
        sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "locations"
    output:
        "data/interim/{pid}/locations_features/locations_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"
 rule locations_python_features:
    input:
        sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "locations"
    output:
        "data/interim/{pid}/locations_features/locations_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"
 rule messages_r_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"])[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "messages"
    output:
        "data/interim/{pid}/messages_features/messages_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"
 rule messages_python_features:
    input:
        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"])[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "messages"
    output:
        "data/interim/{pid}/messages_features/messages_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"
 rule screen_episodes:
    input:
        screen = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["SCREEN"]["DB_TABLE"])
    output:
        "data/interim/{pid}/screen_episodes.csv"
    script:
        "../src/features/screen/episodes/screen_episodes.R"
 rule screen_r_features:
    input:
        sensor_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "screen"
    output:
        "data/interim/{pid}/screen_features/screen_r_{provider_key}.csv"
    script:
        "../src/features/entry.R"
 rule screen_python_features:
    input:
        sensor_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
        provider_key = "{provider_key}",
        sensor_key = "screen"
    output:
        "data/interim/{pid}/screen_features/screen_python_{provider_key}.csv"
    script:
        "../src/features/entry.py"
 rule wifi_r_features:
    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower()),
+        sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower())[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["WIFI"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}"
+        provider_key = "{provider_key}",
        sensor_key = "wifi"
    output:
        "data/interim/{pid}/wifi_features/wifi_r_{provider_key}.csv"
    script:
-        "../src/features/wifi/wifi_entry.R"
+        "../src/features/entry.R"
 rule wifi_python_features:
    input:
-        sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower()),
+        sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower())[0],
        day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
    params:
        provider = lambda wildcards: config["WIFI"]["PROVIDERS"][wildcards.provider_key],
-        provider_key = "{provider_key}"
+        provider_key = "{provider_key}",
        sensor_key = "wifi"
    output:
        "data/interim/{pid}/wifi_features/wifi_python_{provider_key}.csv"
    script:
-        "../src/features/wifi/wifi_entry.py"
+        "../src/features/entry.py"
 rule fitbit_heartrate_features:
    input:
--- a/src/features/activity_recognition.py
+++ b/src/features/activity_recognition.py
@ -1,15 +0,0 @@
 import pandas as pd
 from ar.ar_base import base_ar_features
 ar_data = pd.read_csv(snakemake.input[0],parse_dates=["local_date_time"])
 ar_deltas = pd.read_csv(snakemake.input[1],parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"])
 day_segment = snakemake.params["segment"]
 requested_features = snakemake.params["features"]
 ar_features = pd.DataFrame(columns=["local_date"])
 ar_features = ar_features.merge(base_ar_features(ar_data, ar_deltas, day_segment, requested_features), on="local_date", how="outer")
 assert len(requested_features) + 1 == ar_features.shape[1], "The number of features in the output dataframe (=" + str(ar_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your activity recognition feature extraction functions"
 ar_features.to_csv(snakemake.output[0], index=False)
--- a/src/features/activity_recognition/episodes/activity_recognition_episodes.R
+++ b/src/features/activity_recognition/episodes/activity_recognition_episodes.R
--- a/src/features/activity_recognition/rapids/main.py
+++ b/src/features/activity_recognition/rapids/main.py
@ -0,0 +1,123 @@
 import pandas as pd
 import numpy as np
 def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
    chunk_episodes = kwargs["chunk_episodes"]
    ar_episodes = pd.read_csv(sensor_data_files["sensor_episodes"])
    activity_classes = provider["ACTIVITY_CLASSES"]
    # name of the features this function can compute
    base_features_names = ["count","mostcommonactivity","countuniqueactivities","durationstationary","durationmobile","durationvehicle"]
    # the subset of requested features this function can compute
    requested_features = provider["FEATURES"]
    features_to_compute = list(set(requested_features) & set(base_features_names))
    ar_features = pd.DataFrame(columns=["local_segment"] + ["ar_rapids_" + x for x in features_to_compute])
    if not ar_episodes.empty:
        ar_episodes = filter_data_by_segment(ar_episodes, day_segment)
        if not ar_episodes.empty:
            # chunk episodes
            ar_episodes = chunk_episodes(ar_episodes)
        if not ar_episodes.empty:
            ar_features = pd.DataFrame()
            if "count" in features_to_compute:
                ar_features["ar_rapids_count"] = ar_episodes.groupby(["local_segment"]).count()["episode_id"]
            if "mostcommonactivity" in features_to_compute:
                ar_features["ar_rapids_mostcommonactivity"] = ar_episodes.groupby(["local_segment"])["activity_type"].agg(lambda x: pd.Series.mode(x)[0])
            if "countuniqueactivities" in features_to_compute:
                ar_features["ar_rapids_countuniqueactivities"] = ar_episodes.groupby(["local_segment"])["activity_type"].nunique()
            # duration features    
            for column, activity_labels in activity_classes.items():
                if "duration" + column.lower() in features_to_compute:
                    filtered_data = ar_episodes[ar_episodes["activity_name"].isin(pd.Series(activity_labels))]
                    if not filtered_data.empty:
                        ar_features["ar_rapids_duration_" + column] = ar_episodes[ar_episodes["activity_name"].isin(pd.Series(activity_labels))].groupby(["local_segment"])["duration"].sum().fillna(0)
                    else:
                        ar_features["ar_rapids_duration_" + column] = 0
            ar_features.index.names = ["local_segment"]
            ar_features = ar_features.reset_index()
    return ar_features
    """
    if not ar_data.empty:
        ar_data = filter_data_by_segment(ar_data, day_segment)
        if not ar_data.empty:
            # chunk_episodes
            ar_data = chunk_episodes(ar_data)
        if not ar_data.empty:
            ar_data["episode_id"] = ((ar_data.ar_status != ar_data.ar_status.shift()) | (ar_data.start_timestamp - ar_data.end_timestamp.shift() > 1)).cumsum()
            grouped = ar_data.groupby(by=["local_segment", "episode_id", "ar_status"])
            ar_episodes= grouped[["duration"]].sum()
            ar_episodes["ar_diff"] = grouped["ar_level"].first() - grouped["ar_level"].last()
            ar_episodes["ar_consumption_rate"] = ar_episodes["ar_diff"] / ar_episodes["duration"]
            ar_episodes.reset_index(inplace=True)
            # for discharge episodes
            ar_discharge_episodes = ar_episodes[(ar_episodes["ar_status"] == 3) | (ar_episodes["ar_status"] == 4)]
            ar_discharge_features = pd.DataFrame()
            if "countdischarge" in features_to_compute:
                ar_discharge_features["ar_rapids_countdischarge"] = ar_discharge_episodes.groupby(["local_segment"])["episode_id"].count()
            if "sumdurationdischarge" in features_to_compute:
                ar_discharge_features["ar_rapids_sumdurationdischarge"] = ar_discharge_episodes.groupby(["local_segment"])["duration"].sum()
            if "avgconsumptionrate" in features_to_compute:
                ar_discharge_features["ar_rapids_avgconsumptionrate"] = ar_discharge_episodes.groupby(["local_segment"])["ar_consumption_rate"].mean()
            if "maxconsumptionrate" in features_to_compute:
                ar_discharge_features["ar_rapids_maxconsumptionrate"] = ar_discharge_episodes.groupby(["local_segment"])["ar_consumption_rate"].max()
            # for charge episodes
            ar_charge_episodes = ar_episodes[(ar_episodes["ar_status"] == 2) | (ar_episodes["ar_status"] == 5)]
            ar_charge_features = pd.DataFrame()
            if "countcharge" in features_to_compute:
                ar_charge_features["ar_rapids_countcharge"] = ar_charge_episodes.groupby(["local_segment"])["episode_id"].count()
            if "sumdurationcharge" in features_to_compute:
                ar_charge_features["ar_rapids_sumdurationcharge"] = ar_charge_episodes.groupby(["local_segment"])["duration"].sum()
            # combine discharge features and charge features; fill the missing values with ZERO
            ar_features = pd.concat([ar_discharge_features, ar_charge_features], axis=1, sort=True).fillna(0)
            ar_features.index.rename("local_segment", inplace=True)
            ar_features = ar_features.reset_index()
    return ar_features
    """
--- a/src/features/applications_foreground/applications_foreground_entry.R
+++ b/src/features/applications_foreground/applications_foreground_entry.R
@ -1,13 +0,0 @@
 source("renv/activate.R")
 source("src/features/utils/utils.R")
 library("dplyr")
 library("tidyr")
 sensor_data_file <-  snakemake@input[["sensor_data"]]
 day_segments_file <-  snakemake@input[["day_segments_labels"]]
 provider <- snakemake@params["provider"][["provider"]]
 provider_key <- snakemake@params["provider_key"]
 sensor_features <- fetch_provider_features(provider, provider_key, "applications_foreground", sensor_data_file, day_segments_file)
 write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
--- a/src/features/applications_foreground/applications_foreground_entry.py
+++ b/src/features/applications_foreground/applications_foreground_entry.py
@ -1,18 +0,0 @@
 import pandas as pd
 from importlib import import_module, util
 from pathlib import Path
 # import fetch_provider_features from src/features/utils/utils.py
 spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
 mod = util.module_from_spec(spec)
 spec.loader.exec_module(mod)
 fetch_provider_features = getattr(mod,  "fetch_provider_features")
 sensor_data_file = snakemake.input["sensor_data"][0]
 day_segments_file = snakemake.input["day_segments_labels"]
 provider = snakemake.params["provider"]
 provider_key = snakemake.params["provider_key"]
 sensor_features = fetch_provider_features(provider, provider_key, "applications_foreground", sensor_data_file, day_segments_file)
 sensor_features.to_csv(snakemake.output[0], index=False)
--- a/src/features/applications_foreground/rapids/main.py
+++ b/src/features/applications_foreground/rapids/main.py
@ -9,28 +9,31 @@ def compute_features(filtered_data, apps_type, requested_features, apps_features
    if "timeoffirstuse" in requested_features:
        time_first_event = filtered_data.sort_values(by="timestamp", ascending=True).drop_duplicates(subset="local_segment", keep="first").set_index("local_segment")
        if time_first_event.empty:
-            apps_features["apps_rapids" + "_timeoffirstuse" + apps_type] = np.nan
+            apps_features["apps_rapids_timeoffirstuse" + apps_type] = np.nan
        else:
-            apps_features["apps_rapids" + "_timeoffirstuse" + apps_type] = time_first_event["local_hour"] * 60 + time_first_event["local_minute"]
+            apps_features["apps_rapids_timeoffirstuse" + apps_type] = time_first_event["local_hour"] * 60 + time_first_event["local_minute"]
    if "timeoflastuse" in requested_features:
        time_last_event = filtered_data.sort_values(by="timestamp", ascending=False).drop_duplicates(subset="local_segment", keep="first").set_index("local_segment")
        if time_last_event.empty:
-            apps_features["apps_rapids" + "_timeoflastuse" + apps_type] = np.nan
+            apps_features["apps_rapids_timeoflastuse" + apps_type] = np.nan
        else:
-            apps_features["apps_rapids" + "_timeoflastuse" + apps_type] = time_last_event["local_hour"] * 60 + time_last_event["local_minute"]
+            apps_features["apps_rapids_timeoflastuse" + apps_type] = time_last_event["local_hour"] * 60 + time_last_event["local_minute"]
    if "frequencyentropy" in requested_features:
        apps_with_count = filtered_data.groupby(["local_segment","application_name"]).count().sort_values(by="timestamp", ascending=False).reset_index()
        if (len(apps_with_count.index) < 2 ):
-            apps_features["apps_rapids" + "_frequencyentropy" + apps_type] = np.nan
+            apps_features["apps_rapids_frequencyentropy" + apps_type] = np.nan
        else:    
-            apps_features["apps_rapids" + "_frequencyentropy" + apps_type] = apps_with_count.groupby("local_segment")["timestamp"].agg(entropy)
+            apps_features["apps_rapids_frequencyentropy" + apps_type] = apps_with_count.groupby("local_segment")["timestamp"].agg(entropy)
    if "count" in requested_features:
-        apps_features["apps_rapids" + "_count" + apps_type] = filtered_data.groupby(["local_segment"]).count()["timestamp"]
+        apps_features["apps_rapids_count" + apps_type] = filtered_data.groupby(["local_segment"]).count()["timestamp"]
-        apps_features.fillna(value={"apps_rapids" + "_count" + apps_type: 0}, inplace=True)
+        apps_features.fillna(value={"apps_rapids_count" + apps_type: 0}, inplace=True)
    return apps_features
-def rapids_features(apps_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
+def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
    apps_data = pd.read_csv(sensor_data_files["sensor_data"])
    requested_features = provider["FEATURES"]
    excluded_categories = provider["EXCLUDED_CATEGORIES"]
    excluded_apps = provider["EXCLUDED_APPS"]
@ -49,10 +52,8 @@ def rapids_features(apps_data, day_segment, provider, filter_data_by_segment, *a
    apps_data = apps_data[~apps_data["genre"].isin(excluded_categories)]
    # exclude apps in the excluded_apps list
    apps_data = apps_data[~apps_data["package_name"].isin(excluded_apps)]
-
+    apps_features = pd.DataFrame(columns=["local_segment"] + ["apps_rapids_" + x for x in ["".join(feature) for feature in itertools.product(requested_features, single_categories + multiple_categories + single_apps)]])
    apps_features = pd.DataFrame(columns=["local_segment"] + ["apps_rapids_" + "_" + x for x in ["".join(feature) for feature in itertools.product(requested_features, single_categories + multiple_categories + single_apps)]])
    if not apps_data.empty:
        # deep copy the apps_data for the top1global computation
        apps_data_global = apps_data.copy()
--- a/src/features/ar/ar_base.py
+++ b/src/features/ar/ar_base.py
@ -1,63 +0,0 @@
 import pandas as pd
 import numpy as np
 import scipy.stats as stats
 from features_utils import splitOvernightEpisodes, splitMultiSegmentEpisodes
 def base_ar_features(ar_data, ar_deltas, day_segment, requested_features):
    # name of the features this function can compute
    base_features_names = ["count","mostcommonactivity","countuniqueactivities","activitychangecount","sumstationary","summobile","sumvehicle"]
    # the subset of requested features this function can compute
    features_to_compute = list(set(requested_features) & set(base_features_names))
    ar_features = pd.DataFrame(columns = ["local_date"] + ["ar_" + day_segment + "_" + x for x in features_to_compute])
    if not ar_data.empty:
        ar_deltas = splitOvernightEpisodes(ar_deltas, [],["activity"])
        if day_segment != "daily":
            ar_deltas = splitMultiSegmentEpisodes(ar_deltas, day_segment, [])
        ar_data.local_date_time = pd.to_datetime(ar_data.local_date_time)
        resampledData = ar_data.set_index(ar_data.local_date_time)
        resampledData.drop(columns=["local_date_time"], inplace=True)
        if day_segment != "daily":
            resampledData = resampledData.loc[resampledData["local_day_segment"] == day_segment]
        if not resampledData.empty:
            ar_features = pd.DataFrame()
            # finding the count of samples of the day
            if "count" in features_to_compute:
                ar_features["ar_" + day_segment + "_count"] = resampledData["activity_type"].resample("D").count()
            # finding most common activity of the day
            if "mostcommonactivity" in features_to_compute:
                ar_features["ar_" + day_segment + "_mostcommonactivity"] = resampledData["activity_type"].resample("D").apply(lambda x: stats.mode(x)[0] if len(stats.mode(x)[0]) != 0 else None)
            # finding different number of activities during a day
            if "countuniqueactivities" in features_to_compute:
                ar_features["ar_" + day_segment + "_countuniqueactivities"] = resampledData["activity_type"].resample("D").nunique()
            # finding Number of times activity changed
            if "activitychangecount" in features_to_compute:
                resampledData["activity_type_shift"] = resampledData["activity_type"].shift().fillna(resampledData["activity_type"].head(1))
                resampledData["different_activity"] = np.where(resampledData["activity_type"]!=resampledData["activity_type_shift"],1,0)
                ar_features["ar_" + day_segment + "_activitychangecount"] = resampledData["different_activity"].resample("D").sum()
            deltas_features = {"sumstationary":["still","tilting"], 
                            "summobile":["on_foot","walking","running","on_bicycle"],
                            "sumvehicle":["in_vehicle"]}
            for column, activity_labels in deltas_features.items():
                if column in features_to_compute:
                    filtered_data = ar_deltas[ar_deltas["activity"].isin(pd.Series(activity_labels))]
                    if not filtered_data.empty:
                        ar_features["ar_" + day_segment + "_" + column] = ar_deltas[ar_deltas["activity"].isin(pd.Series(activity_labels))].groupby(["local_start_date"])["time_diff"].sum().fillna(0)
                    else:
                        ar_features["ar_" + day_segment + "_" + column] = 0
            ar_features.index.names = ["local_date"]
            ar_features = ar_features.reset_index()
    return ar_features
--- a/src/features/battery/battery_entry.R
+++ b/src/features/battery/battery_entry.R
@ -1,13 +0,0 @@
 source("renv/activate.R")
 source("src/features/utils/utils.R")
 library("dplyr")
 library("tidyr")
 sensor_data_file <-  snakemake@input[["battery_episodes"]]
 day_segments_file <-  snakemake@input[["day_segments_labels"]]
 provider <- snakemake@params["provider"][["provider"]]
 provider_key <- snakemake@params["provider_key"]
 sensor_features <- fetch_provider_features(provider, provider_key, "battery", sensor_data_file, day_segments_file)
 write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
--- a/src/features/battery/battery_entry.py
+++ b/src/features/battery/battery_entry.py
@ -1,18 +0,0 @@
 import pandas as pd
 from importlib import import_module, util
 from pathlib import Path
 # import fetch_provider_features from src/features/utils/utils.py
 spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
 mod = util.module_from_spec(spec)
 spec.loader.exec_module(mod)
 fetch_provider_features = getattr(mod,  "fetch_provider_features")
 battery_episodes_file = snakemake.input["battery_episodes"]
 day_segments_file = snakemake.input["day_segments_labels"]
 provider = snakemake.params["provider"]
 provider_key = snakemake.params["provider_key"]
 sensor_features = fetch_provider_features(provider, provider_key, "battery", battery_episodes_file, day_segments_file)
 sensor_features.to_csv(snakemake.output[0], index=False)
--- a/src/features/battery/rapids/main.py
+++ b/src/features/battery/rapids/main.py
@ -1,8 +1,9 @@
 import pandas as pd
 from datetime import datetime, timedelta, time
-def rapids_features(battery_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
+def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
-
+    
    battery_data = pd.read_csv(sensor_data_files["sensor_episodes"])
    chunk_episodes = kwargs["chunk_episodes"]
    # name of the features this function can compute
--- a/src/features/battery_features.py
+++ b/src/features/battery_features.py
@ -1,13 +0,0 @@
 import pandas as pd
 from battery.battery_base import base_battery_features
 battery_data = pd.read_csv(snakemake.input[0], parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"])
 day_segment = snakemake.params["day_segment"]
 requested_features = snakemake.params["features"]
 battery_features = pd.DataFrame(columns=["local_date"])
 battery_features = battery_features.merge(base_battery_features(battery_data, day_segment, requested_features), on="local_date", how="outer")
 assert len(requested_features) + 1 == battery_features.shape[1], "The number of features in the output dataframe (=" + str(battery_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your battery feature extraction functions"
 battery_features.to_csv(snakemake.output[0], index=False)
--- a/src/features/bluetooth/bluetooth_entry.R
+++ b/src/features/bluetooth/bluetooth_entry.R
@ -1,13 +0,0 @@
 source("renv/activate.R")
 source("src/features/utils/utils.R")
 library("dplyr")
 library("tidyr")
 sensor_data_file <-  snakemake@input[["sensor_data"]]
 day_segments_file <-  snakemake@input[["day_segments_labels"]]
 provider <- snakemake@params["provider"][["provider"]]
 provider_key <- snakemake@params["provider_key"]
 sensor_features <- fetch_provider_features(provider, provider_key, "bluetooth", sensor_data_file, day_segments_file)
 write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
--- a/src/features/bluetooth/bluetooth_entry.py
+++ b/src/features/bluetooth/bluetooth_entry.py
@ -1,18 +0,0 @@
 import pandas as pd
 from importlib import import_module, util
 from pathlib import Path
 # import fetch_provider_features from src/features/utils/utils.py
 spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
 mod = util.module_from_spec(spec)
 spec.loader.exec_module(mod)
 fetch_provider_features = getattr(mod,  "fetch_provider_features")
 sensor_data_file = snakemake.input["sensor_data"][0]
 day_segments_file = snakemake.input["day_segments_labels"]
 provider = snakemake.params["provider"]
 provider_key = snakemake.params["provider_key"]
 sensor_features = fetch_provider_features(provider, provider_key, "bluetooth", sensor_data_file, day_segments_file)
 sensor_features.to_csv(snakemake.output[0], index=False)
--- a/src/features/bluetooth/rapids/main.R
+++ b/src/features/bluetooth/rapids/main.R
@ -27,24 +27,26 @@ compute_bluetooth_feature <- function(data, feature, day_segment){
  }
 }
-rapids_features <- function(bluetooth_data, day_segment, provider){
+rapids_features <- function(sensor_data_files, day_segment, provider){
-    requested_features <- provider[["FEATURES"]]
+  
-    
+  bluetooth_data <-  read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
-    # Output dataframe
+  requested_features <- provider[["FEATURES"]]
-    features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
+  
  # Output dataframe
  features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
-    # The name of the features this function can compute
+  # The name of the features this function can compute
-    base_features_names  <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
+  base_features_names  <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
-    # The subset of requested features this function can compute
+  # The subset of requested features this function can compute
-    features_to_compute  <- intersect(base_features_names, requested_features)
+  features_to_compute  <- intersect(base_features_names, requested_features)
-    for(feature_name in features_to_compute){
+  for(feature_name in features_to_compute){
-      feature <- compute_bluetooth_feature(bluetooth_data, feature_name, day_segment)
+    feature <- compute_bluetooth_feature(bluetooth_data, feature_name, day_segment)
-      features <- merge(features, feature, by="local_segment", all = TRUE)
+    features <- merge(features, feature, by="local_segment", all = TRUE)
-    }
+  }
-    features <- features %>% mutate_at(vars(contains("countscansmostuniquedevice")), list( ~ replace_na(., 0)))
+  features <- features %>% mutate_at(vars(contains("countscansmostuniquedevice")), list( ~ replace_na(., 0)))
-    return(features)
+  return(features)
 }
--- a/src/features/calls/calls_entry.py
+++ b/src/features/calls/calls_entry.py
@ -1,18 +0,0 @@
 import pandas as pd
 from importlib import import_module, util
 from pathlib import Path
 # import fetch_provider_features from src/features/utils/utils.py
 spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
 mod = util.module_from_spec(spec)
 spec.loader.exec_module(mod)
 fetch_provider_features = getattr(mod,  "fetch_provider_features")
 sensor_data_file = snakemake.input["sensor_data"][0]
 day_segments_file = snakemake.input["day_segments_labels"]
 provider = snakemake.params["provider"]
 provider_key = snakemake.params["provider_key"]
 sensor_features = fetch_provider_features(provider, provider_key, "calls", sensor_data_file, day_segments_file)
 sensor_features.to_csv(snakemake.output[0], index=False)
--- a/src/features/calls/rapids/main.R
+++ b/src/features/calls/rapids/main.R
@ -62,8 +62,9 @@ call_features_of_type <- function(calls, call_type, day_segment, requested_featu
    return(features)
 }
-rapids_features <- function(calls, day_segment, provider){
+rapids_features <- function(sensor_data_files, day_segment, provider){
-    calls <- calls %>% filter_data_by_segment(day_segment)
+    calls_data <-  read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
    calls_data <- calls_data %>% filter_data_by_segment(day_segment)
    call_types = provider[["CALL_TYPES"]]
    call_features <- setNames(data.frame(matrix(ncol=1, nrow=0)), c("local_segment"))
@ -74,7 +75,7 @@ rapids_features <- function(calls, day_segment, provider){
            stop(paste("Call type can online be incoming, outgoing or missed but instead you typed: ", call_type, " in config[CALLS][CALL_TYPES]"))
        requested_features <- provider[["FEATURES"]][[call_type]]
-        calls_of_type <- calls %>% filter(call_type == call_type_label)
+        calls_of_type <- calls_data %>% filter(call_type == call_type_label)
        features <- call_features_of_type(calls_of_type, call_type, day_segment, requested_features)
        call_features <- merge(call_features, features, all=TRUE)
--- a/src/features/conversation/conversation_entry.R
+++ b/src/features/conversation/conversation_entry.R
@ -1,13 +0,0 @@
 source("renv/activate.R")
 source("src/features/utils/utils.R")
 library("dplyr")
 library("tidyr")
 sensor_data_file <-  snakemake@input[["sensor_data"]]
 day_segments_file <-  snakemake@input[["day_segments_labels"]]
 provider <- snakemake@params["provider"][["provider"]]
 provider_key <- snakemake@params["provider_key"]
 sensor_features <- fetch_provider_features(provider, provider_key, "conversation", sensor_data_file, day_segments_file)
 write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
--- a/src/features/conversation/conversation_entry.py
+++ b/src/features/conversation/conversation_entry.py
@ -1,18 +0,0 @@
 import pandas as pd
 from importlib import import_module, util
 from pathlib import Path
 # import fetch_provider_features from src/features/utils/utils.py
 spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
 mod = util.module_from_spec(spec)
 spec.loader.exec_module(mod)
 fetch_provider_features = getattr(mod,  "fetch_provider_features")
 sensor_data_file = snakemake.input["sensor_data"][0]
 day_segments_file = snakemake.input["day_segments_labels"]
 provider = snakemake.params["provider"]
 provider_key = snakemake.params["provider_key"]
 sensor_features = fetch_provider_features(provider, provider_key, "conversation", sensor_data_file, day_segments_file)
 sensor_features.to_csv(snakemake.output[0], index=False)
--- a/src/features/conversation/rapids/main.py
+++ b/src/features/conversation/rapids/main.py
@ -1,8 +1,9 @@
 import pandas as pd
 import numpy as np
-# def rapids_features(conversation_data, day_segment, requested_features,recordingMinutes,pausedMinutes,expectedMinutes):
+def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
-def rapids_features(conversation_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
+
    conversation_data = pd.read_csv(sensor_data_files["sensor_data"])
    requested_features = provider["FEATURES"]
    recordingMinutes = provider["RECORDING_MINUTES"]
@ -20,7 +21,7 @@ def rapids_features(conversation_data, day_segment, provider, filter_data_by_seg
    # the subset of requested features this function can compute
    features_to_compute = list(set(requested_features) & set(base_features_names))
-    conversation_features = pd.DataFrame(columns=["local_segment"] + ["conversation_rapids"  + "_" + x for x in features_to_compute])
+    conversation_features = pd.DataFrame(columns=["local_segment"] + ["conversation_rapids_" + x for x in features_to_compute])
    if not conversation_data.empty:
        conversation_data = filter_data_by_segment(conversation_data, day_segment)
@ -30,19 +31,19 @@ def rapids_features(conversation_data, day_segment, provider, filter_data_by_seg
            conversation_data = conversation_data.drop_duplicates(subset=["local_date", "local_time"], keep="first")
            if "minutessilence" in features_to_compute:
-                conversation_features["conversation_rapids" + "_minutessilence"] = conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60
+                conversation_features["conversation_rapids_minutessilence"] = conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60
            if "minutesnoise" in features_to_compute:
-                conversation_features["conversation_rapids" + "_minutesnoise"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60
+                conversation_features["conversation_rapids_minutesnoise"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60
            if "minutesvoice" in features_to_compute:
-                conversation_features["conversation_rapids" + "_minutesvoice"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60
+                conversation_features["conversation_rapids_minutesvoice"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60
            if "minutesunknown" in features_to_compute:
-                conversation_features["conversation_rapids" + "_minutesunknown"] = conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60
+                conversation_features["conversation_rapids_minutesunknown"] = conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60
            if "countconversation" in features_to_compute:
-                conversation_features["conversation_rapids" + "_countconversation"] = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['double_convo_start'].nunique()
+                conversation_features["conversation_rapids_countconversation"] = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['double_convo_start'].nunique()
            conv_duration = (conversation_data['double_convo_end']/1000 - conversation_data['double_convo_start']/1000)/60
            conversation_data = conversation_data.assign(conv_duration = conv_duration.values)
@ -50,43 +51,43 @@ def rapids_features(conversation_data, day_segment, provider, filter_data_by_seg
            conv_totalDuration = conversation_data[(conversation_data['inference'] >= 0) & (conversation_data['inference'] < 4)].groupby(["local_segment"])['inference'].count()/60 
            if "silencesensedfraction" in features_to_compute:
-                conversation_features["conversation_rapids" + "_silencesensedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
+                conversation_features["conversation_rapids_silencesensedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
            if "noisesensedfraction" in features_to_compute:
-                conversation_features["conversation_rapids" + "_noisesensedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
+                conversation_features["conversation_rapids_noisesensedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
            if "voicesensedfraction" in features_to_compute:
-                conversation_features["conversation_rapids" + "_voicesensedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
+                conversation_features["conversation_rapids_voicesensedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
            if "unknownsensedfraction" in features_to_compute:
-                conversation_features["conversation_rapids" + "_unknownsensedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
+                conversation_features["conversation_rapids_unknownsensedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
            if "silenceexpectedfraction" in features_to_compute:
-                conversation_features["conversation_rapids" + "_silenceexpectedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
+                conversation_features["conversation_rapids_silenceexpectedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
            if "noiseexpectedfraction" in features_to_compute:
-                conversation_features["conversation_rapids" + "_noiseexpectedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
+                conversation_features["conversation_rapids_noiseexpectedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
            if "voiceexpectedfraction" in features_to_compute:
-                conversation_features["conversation_rapids" + "_voiceexpectedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
+                conversation_features["conversation_rapids_voiceexpectedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
            if "unknownexpectedfraction" in features_to_compute:
-                conversation_features["conversation_rapids" + "_unknownexpectedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
+                conversation_features["conversation_rapids_unknownexpectedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
            if "sumconversationduration" in features_to_compute:
-                conversation_features["conversation_rapids" + "_sumconversationduration"] = conversation_data.groupby(["local_segment"])["conv_duration"].sum()
+                conversation_features["conversation_rapids_sumconversationduration"] = conversation_data.groupby(["local_segment"])["conv_duration"].sum()
            if "avgconversationduration" in features_to_compute:
-                conversation_features["conversation_rapids" + "_avgconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].mean()
+                conversation_features["conversation_rapids_avgconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].mean()
            if "sdconversationduration" in features_to_compute:
-                conversation_features["conversation_rapids" + "_sdconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].std()
+                conversation_features["conversation_rapids_sdconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].std()
            if "minconversationduration" in features_to_compute:
-                conversation_features["conversation_rapids" + "_minconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].min()
+                conversation_features["conversation_rapids_minconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].min()
            if "maxconversationduration" in features_to_compute:
-                conversation_features["conversation_rapids" + "_maxconversationduration"] = conversation_data.groupby(["local_segment"])["conv_duration"].max()
+                conversation_features["conversation_rapids_maxconversationduration"] = conversation_data.groupby(["local_segment"])["conv_duration"].max()
            if "timefirstconversation" in features_to_compute:
                timestampsLastConversation = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['timestamp'].min()
@ -94,9 +95,9 @@ def rapids_features(conversation_data, day_segment, provider, filter_data_by_seg
                    for date in list(timestampsLastConversation.index):
                        lastimestamp =  timestampsLastConversation.loc[date]
                        lasttime = (conversation_data.query('timestamp == @lastimestamp', inplace = False))['local_time'].iat[0]
-                        conversation_features.loc[date,"conversation_rapids" + "_timefirstconversation"] = int(lasttime.split(':')[0])*60 + int(lasttime.split(':')[1])
+                        conversation_features.loc[date,"conversation_rapids_timefirstconversation"] = int(lasttime.split(':')[0])*60 + int(lasttime.split(':')[1])
                else:
-                    conversation_features["conversation_rapids" + "_timefirstconversation"] = np.nan
+                    conversation_features["conversation_rapids_timefirstconversation"] = np.nan
            if "timelastconversation" in features_to_compute:
                timestampsLastConversation = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['timestamp'].max()
@ -104,39 +105,39 @@ def rapids_features(conversation_data, day_segment, provider, filter_data_by_seg
                    for date in list(timestampsLastConversation.index):
                        lastimestamp =  timestampsLastConversation.loc[date]
                        lasttime = (conversation_data.query('timestamp == @lastimestamp', inplace = False))['local_time'].iat[0]
-                        conversation_features.loc[date,"conversation_rapids" + "_timelastconversation"] = int(lasttime.split(':')[0])*60 + int(lasttime.split(':')[1])
+                        conversation_features.loc[date,"conversation_rapids_timelastconversation"] = int(lasttime.split(':')[0])*60 + int(lasttime.split(':')[1])
                else:
-                    conversation_features["conversation_rapids" + "_timelastconversation"] = np.nan
+                    conversation_features["conversation_rapids_timelastconversation"] = np.nan
            if "noisesumenergy" in features_to_compute:
-                conversation_features["conversation_rapids" + "_noisesumenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].sum()
+                conversation_features["conversation_rapids_noisesumenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].sum()
            if "noiseavgenergy" in features_to_compute:
-                conversation_features["conversation_rapids" + "_noiseavgenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].mean()
+                conversation_features["conversation_rapids_noiseavgenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].mean()
            if "noisesdenergy" in features_to_compute:
-                conversation_features["conversation_rapids" + "_noisesdenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].std()
+                conversation_features["conversation_rapids_noisesdenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].std()
            if "noiseminenergy" in features_to_compute:
-                conversation_features["conversation_rapids" + "_noiseminenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].min()
+                conversation_features["conversation_rapids_noiseminenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].min()
            if "noisemaxenergy" in features_to_compute:
-                conversation_features["conversation_rapids" + "_noisemaxenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].max()
+                conversation_features["conversation_rapids_noisemaxenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].max()
            if "voicesumenergy" in features_to_compute:
-                conversation_features["conversation_rapids" + "_voicesumenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].sum()
+                conversation_features["conversation_rapids_voicesumenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].sum()
            if "voiceavgenergy" in features_to_compute:
-                conversation_features["conversation_rapids" + "_voiceavgenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].mean()
+                conversation_features["conversation_rapids_voiceavgenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].mean()
            if "voicesdenergy" in features_to_compute:
-                conversation_features["conversation_rapids" + "_voicesdenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].std()
+                conversation_features["conversation_rapids_voicesdenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].std()
            if "voiceminenergy" in features_to_compute:
-                conversation_features["conversation_rapids" + "_voiceminenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].min()
+                conversation_features["conversation_rapids_voiceminenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].min()
            if "voicemaxenergy" in features_to_compute:
-                conversation_features["conversation_rapids" + "_voicemaxenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].max()
+                conversation_features["conversation_rapids_voicemaxenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].max()
            conversation_features = conversation_features.reset_index()
--- a/src/features/calls/calls_entry.R
+++ b/src/features/calls/calls_entry.R
@ -3,11 +3,14 @@ source("src/features/utils/utils.R")
 library("dplyr")
 library("tidyr")
-sensor_data_file <-  snakemake@input[["sensor_data"]]
+sensor_data_files <- snakemake@input
-day_segments_file <-  snakemake@input[["day_segments_labels"]]
+sensor_data_files$day_segments_labels <- NULL
 day_segments_file <- snakemake@input[["day_segments_labels"]]
 provider <- snakemake@params["provider"][["provider"]]
 provider_key <- snakemake@params["provider_key"]
 sensor_key <- snakemake@params["sensor_key"]
-sensor_features <- fetch_provider_features(provider, provider_key, "calls", sensor_data_file, day_segments_file)
+sensor_features <- fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, day_segments_file)
-write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
+write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
--- a/src/features/entry.py
+++ b/src/features/entry.py
@ -0,0 +1,14 @@
 import pandas as pd
 from utils.utils import fetch_provider_features
 sensor_data_files = dict(snakemake.input)
 del sensor_data_files["day_segments_labels"]
 day_segments_file = snakemake.input["day_segments_labels"]
 provider = snakemake.params["provider"]
 provider_key = snakemake.params["provider_key"]
 sensor_key = snakemake.params["sensor_key"]
 sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, day_segments_file)
 sensor_features.to_csv(snakemake.output[0], index=False)
--- a/src/features/light/light_entry.R
+++ b/src/features/light/light_entry.R
@ -1,13 +0,0 @@
 source("renv/activate.R")
 source("src/features/utils/utils.R")
 library("dplyr")
 library("tidyr")
 sensor_data_file <-  snakemake@input[["sensor_data"]]
 day_segments_file <-  snakemake@input[["day_segments_labels"]]
 provider <- snakemake@params["provider"][["provider"]]
 provider_key <- snakemake@params["provider_key"]
 sensor_features <- fetch_provider_features(provider, provider_key, "light", sensor_data_file, day_segments_file)
 write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
--- a/src/features/light/light_entry.py
+++ b/src/features/light/light_entry.py
@ -1,18 +0,0 @@
 import pandas as pd
 from importlib import import_module, util
 from pathlib import Path
 # import fetch_provider_features from src/features/utils/utils.py
 spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
 mod = util.module_from_spec(spec)
 spec.loader.exec_module(mod)
 fetch_provider_features = getattr(mod,  "fetch_provider_features")
 sensor_data_file = snakemake.input["sensor_data"][0]
 day_segments_file = snakemake.input["day_segments_labels"]
 provider = snakemake.params["provider"]
 provider_key = snakemake.params["provider_key"]
 sensor_features = fetch_provider_features(provider, provider_key, "light", sensor_data_file, day_segments_file)
 sensor_features.to_csv(snakemake.output[0], index=False)
--- a/src/features/light/rapids/main.py
+++ b/src/features/light/rapids/main.py
@ -1,33 +1,35 @@
 import pandas as pd
 import numpy as np
-def rapids_features(light_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
+def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
    light_data = pd.read_csv(sensor_data_files["sensor_data"])
    requested_features = provider["FEATURES"]
    # name of the features this function can compute
    base_features_names = ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
    # the subset of requested features this function can compute
    features_to_compute = list(set(requested_features) & set(base_features_names))
-    light_features = pd.DataFrame(columns=["local_segment"] + ["light_rapids_" + "_" + x for x in features_to_compute])
+    light_features = pd.DataFrame(columns=["local_segment"] + ["light_rapids_" + x for x in features_to_compute])
    if not light_data.empty:
        light_data = filter_data_by_segment(light_data, day_segment)
        if not light_data.empty:
            light_features = pd.DataFrame()
            if "count" in features_to_compute:
-                light_features["light_rapids_" + "_count"] = light_data.groupby(["local_segment"]).count()["timestamp"]
+                light_features["light_rapids_count"] = light_data.groupby(["local_segment"]).count()["timestamp"]
            # get light ambient luminance related features
            if "maxlux" in features_to_compute:
-                light_features["light_rapids_" + "_maxlux"] = light_data.groupby(["local_segment"])["double_light_lux"].max()
+                light_features["light_rapids_maxlux"] = light_data.groupby(["local_segment"])["double_light_lux"].max()
            if "minlux" in features_to_compute:
-                light_features["light_rapids_" + "_minlux"] = light_data.groupby(["local_segment"])["double_light_lux"].min()
+                light_features["light_rapids_minlux"] = light_data.groupby(["local_segment"])["double_light_lux"].min()
            if "avglux" in features_to_compute:
-                light_features["light_rapids_" + "_avglux"] = light_data.groupby(["local_segment"])["double_light_lux"].mean()
+                light_features["light_rapids_avglux"] = light_data.groupby(["local_segment"])["double_light_lux"].mean()
            if "medianlux" in features_to_compute:
-                light_features["light_rapids_" + "_medianlux"] = light_data.groupby(["local_segment"])["double_light_lux"].median()
+                light_features["light_rapids_medianlux"] = light_data.groupby(["local_segment"])["double_light_lux"].median()
            if "stdlux" in features_to_compute:
-                light_features["light_rapids_" + "_stdlux"] = light_data.groupby(["local_segment"])["double_light_lux"].std()
+                light_features["light_rapids_stdlux"] = light_data.groupby(["local_segment"])["double_light_lux"].std()
            light_features = light_features.reset_index()
--- a/src/features/locations/barnett/main.R
+++ b/src/features/locations/barnett/main.R
@ -27,8 +27,11 @@ create_empty_file <- function(requested_features){
                      ) %>% select(all_of(requested_features)))
 }
-barnett_features <- function(location_data, day_segment, params){
+barnett_features <- function(sensor_data_files, day_segment, params){
  location_data <-  read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
  location_features <- NULL
  location <- location_data
  accuracy_limit <- params[["ACCURACY_LIMIT"]]
  timezone <- params[["TIMEZONE"]]
--- a/src/features/locations/doryab/main.py
+++ b/src/features/locations/doryab/main.py
@ -4,7 +4,9 @@ from astropy.timeseries import LombScargle
 from sklearn.cluster import DBSCAN
 from math import radians, cos, sin, asin, sqrt
-def doryab_features(location_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
+def doryab_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
    location_data = pd.read_csv(sensor_data_files["sensor_data"])
    requested_features = provider["FEATURES"]
    dbscan_eps = provider["DBSCAN_EPS"]
    dbscan_minsamples = provider["DBSCAN_MINSAMPLES"]
--- a/src/features/locations/locations_entry.R
+++ b/src/features/locations/locations_entry.R
@ -1,13 +0,0 @@
 source("renv/activate.R")
 source("src/features/utils/utils.R")
 library("dplyr")
 library("tidyr")
 sensor_data_file <-  snakemake@input[["sensor_data"]]
 day_segments_file <-  snakemake@input[["day_segments_labels"]]
 provider <- snakemake@params["provider"][["provider"]]
 provider_key <- snakemake@params["provider_key"]
 sensor_features <- fetch_provider_features(provider, provider_key, "locations", sensor_data_file, day_segments_file)
 write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
--- a/src/features/locations/locations_entry.py
+++ b/src/features/locations/locations_entry.py
@ -1,18 +0,0 @@
 import pandas as pd
 from importlib import import_module, util
 from pathlib import Path
 # import fetch_provider_features from src/features/utils/utils.py
 spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
 mod = util.module_from_spec(spec)
 spec.loader.exec_module(mod)
 fetch_provider_features = getattr(mod,  "fetch_provider_features")
 sensor_data_file = snakemake.input["sensor_data"][0]
 day_segments_file = snakemake.input["day_segments_labels"]
 provider = snakemake.params["provider"]
 provider_key = snakemake.params["provider_key"]
 sensor_features = fetch_provider_features(provider, provider_key, "locations", sensor_data_file, day_segments_file)
 sensor_features.to_csv(snakemake.output[0], index=False)
--- a/src/features/messages/messages_entry.R
+++ b/src/features/messages/messages_entry.R
@ -1,13 +0,0 @@
 source("renv/activate.R")
 source("src/features/utils/utils.R")
 library("dplyr")
 library("tidyr")
 sensor_data_file <-  snakemake@input[["sensor_data"]]
 day_segments_file <-  snakemake@input[["day_segments_labels"]]
 provider <- snakemake@params["provider"][["provider"]]
 provider_key <- snakemake@params["provider_key"]
 sensor_features <- fetch_provider_features(provider, provider_key, "messages", sensor_data_file, day_segments_file)
 write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
--- a/src/features/messages/messages_entry.py
+++ b/src/features/messages/messages_entry.py
@ -1,18 +0,0 @@
 import pandas as pd
 from importlib import import_module, util
 from pathlib import Path
 # import fetch_provider_features from src/features/utils/utils.py
 spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
 mod = util.module_from_spec(spec)
 spec.loader.exec_module(mod)
 fetch_provider_features = getattr(mod,  "fetch_provider_features")
 sensor_data_file = snakemake.input["sensor_data"][0]
 day_segments_file = snakemake.input["day_segments_labels"]
 provider = snakemake.params["provider"]
 provider_key = snakemake.params["provider_key"]
 sensor_features = fetch_provider_features(provider, provider_key, "messages", sensor_data_file, day_segments_file)
 sensor_features.to_csv(snakemake.output[0], index=False)
--- a/src/features/messages/rapids/main.R
+++ b/src/features/messages/rapids/main.R
@ -50,8 +50,9 @@ message_features_of_type <- function(messages, messages_type, day_segment, reque
    return(features)
 }
-rapids_features <- function(messages, day_segment, provider){
+rapids_features <- function(sensor_data_files, day_segment, provider){
-    messages <- messages %>% filter_data_by_segment(day_segment)
+    messages_data <-  read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
    messages_data <- messages_data %>% filter_data_by_segment(day_segment)
    messages_types = provider[["MESSAGES_TYPES"]]
    messages_features <- setNames(data.frame(matrix(ncol=1, nrow=0)), c("local_segment"))
@ -62,7 +63,7 @@ rapids_features <- function(messages, day_segment, provider){
            stop(paste("Message type can online be received or sent but instead you typed: ", message_type, " in config[MESSAGES][MESSAGES_TYPES]"))
        requested_features <- provider[["FEATURES"]][[message_type]]
-        messages_of_type <- messages %>% filter(message_type == message_type_label)
+        messages_of_type <- messages_data %>% filter(message_type == message_type_label)
        features <- message_features_of_type(messages_of_type, message_type, day_segment, requested_features)
        messages_features <- merge(messages_features, features, all=TRUE)
--- a/src/features/screen/rapids/main.py
+++ b/src/features/screen/rapids/main.py
@ -25,7 +25,9 @@ def getEpisodeDurationFeatures(screen_data, day_segment, episode, features, refe
    return duration_helper
-def rapids_features(screen_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
+def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
    screen_data = pd.read_csv(sensor_data_files["sensor_episodes"])
    reference_hour_first_use = provider["REFERENCE_HOUR_FIRST_USE"]
    requested_features_episodes = provider["FEATURES"]
--- a/src/features/screen/screen_entry.R
+++ b/src/features/screen/screen_entry.R
@ -1,13 +0,0 @@
 source("renv/activate.R")
 source("src/features/utils/utils.R")
 library("dplyr")
 library("tidyr")
 sensor_data_file <-  snakemake@input[["screen_episodes"]]
 day_segments_file <-  snakemake@input[["day_segments_labels"]]
 provider <- snakemake@params["provider"][["provider"]]
 provider_key <- snakemake@params["provider_key"]
 sensor_features <- fetch_provider_features(provider, provider_key, "screen", sensor_data_file, day_segments_file)
 write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
--- a/src/features/screen/screen_entry.py
+++ b/src/features/screen/screen_entry.py
@ -1,18 +0,0 @@
 import pandas as pd
 from importlib import import_module, util
 from pathlib import Path
 # import fetch_provider_features from src/features/utils/utils.py
 spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
 mod = util.module_from_spec(spec)
 spec.loader.exec_module(mod)
 fetch_provider_features = getattr(mod,  "fetch_provider_features")
 screen_episodes_file = snakemake.input["screen_episodes"]
 day_segments_file = snakemake.input["day_segments_labels"]
 provider = snakemake.params["provider"]
 provider_key = snakemake.params["provider_key"]
 sensor_features = fetch_provider_features(provider, provider_key, "screen", screen_episodes_file, day_segments_file)
 sensor_features.to_csv(snakemake.output[0], index=False)
--- a/src/features/utils/utils.R
+++ b/src/features/utils/utils.R
@ -43,24 +43,23 @@ chunk_episodes <- function(sensor_episodes){
  return(chunked_episodes)
 }
-fetch_provider_features <- function(provider, provider_key, config_key, sensor_data_file, day_segments_file){
+fetch_provider_features <- function(provider, provider_key, sensor_key, sensor_data_files, day_segments_file){
    sensor_features  <-  data.frame(local_segment = character(), stringsAsFactors = FALSE)
    sensor_data <-  read.csv(sensor_data_file, stringsAsFactors = FALSE)
    day_segments_labels <-  read.csv(day_segments_file, stringsAsFactors = FALSE)
    if(!"FEATURES" %in% names(provider))
-        stop(paste0("Provider config[", config_key,"][PROVIDERS][", provider_key,"] is missing a FEATURES attribute in config.yaml"))
+        stop(paste0("Provider config[", sensor_key,"][PROVIDERS][", provider_key,"] is missing a FEATURES attribute in config.yaml"))
    if(provider[["COMPUTE"]] == TRUE){
-        code_path <- paste0("src/features/", config_key,"/", provider[["SRC_FOLDER"]], "/main.R")  
+        code_path <- paste0("src/features/", sensor_key,"/", provider[["SRC_FOLDER"]], "/main.R")  
        source(code_path)
        features_function <- match.fun(paste0(provider[["SRC_FOLDER"]], "_features"))
        day_segments <- day_segments_labels %>% pull(label)
        for (day_segment in day_segments){
-            print(paste(rapids_log_tag,"Processing", config_key, provider_key, day_segment))
+            print(paste(rapids_log_tag,"Processing", sensor_key, provider_key, day_segment))
-            features <- features_function(sensor_data, day_segment, provider)
+            features <- features_function(sensor_data_files, day_segment, provider)
            # Check all features names contain the provider key so they are unique
            features_names <- colnames(features %>% select(-local_segment))
--- a/src/features/utils/utils.py
+++ b/src/features/utils/utils.py
@ -67,24 +67,24 @@ def chunk_episodes(sensor_episodes):
    return merged_sensor_episodes
-def fetch_provider_features(provider, provider_key, config_key, sensor_data_file, day_segments_file):
+def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, day_segments_file):
    import pandas as pd
    from importlib import import_module, util
    sensor_features = pd.DataFrame(columns=["local_segment"])
    sensor_data = pd.read_csv(sensor_data_file)
    day_segments_labels = pd.read_csv(day_segments_file, header=0)
    if "FEATURES" not in provider:
-        raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(config_key.upper(), provider_key))
+        raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(sensor_key.upper(), provider_key))
    if provider["COMPUTE"] == True:
-            code_path = provider["SRC_FOLDER"] + ".main"
+
            code_path =  sensor_key + "." + provider["SRC_FOLDER"] + ".main"
            feature_module = import_module(code_path)
            feature_function = getattr(feature_module,  provider["SRC_FOLDER"] + "_features")
            for day_segment in day_segments_labels["label"]:
-                    print("{} Processing {} {} {}".format(rapids_log_tag, config_key, provider_key, day_segment))
+                    print("{} Processing {} {} {}".format(rapids_log_tag, sensor_key, provider_key, day_segment))
-                    features = feature_function(sensor_data, day_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes)
+                    features = feature_function(sensor_data_files, day_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes)
                    sensor_features = sensor_features.merge(features, how="outer")
    else:
            for feature in provider["FEATURES"]:
--- a/src/features/wifi/rapids/main.R
+++ b/src/features/wifi/rapids/main.R
@ -25,21 +25,22 @@ compute_wifi_feature <- function(data, feature, day_segment){
  }
 }
-rapids_features <- function(wifi_data, day_segment, provider){
+rapids_features <- function(sensor_data_files, day_segment, provider){
-    requested_features <- provider[["FEATURES"]]
+  wifi_data <-  read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
-    # Output dataframe
+  requested_features <- provider[["FEATURES"]]
-    features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
+  # Output dataframe
  features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
-    # The name of the features this function can compute
+  # The name of the features this function can compute
-    base_features_names  <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
+  base_features_names  <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
-    # The subset of requested features this function can compute
+  # The subset of requested features this function can compute
-    features_to_compute  <- intersect(base_features_names, requested_features)
+  features_to_compute  <- intersect(base_features_names, requested_features)
-    for(feature_name in features_to_compute){
+  for(feature_name in features_to_compute){
-      feature <- compute_wifi_feature(wifi_data, feature_name, day_segment)
+    feature <- compute_wifi_feature(wifi_data, feature_name, day_segment)
-      features <- merge(features, feature, by="local_segment", all = TRUE)
+    features <- merge(features, feature, by="local_segment", all = TRUE)
-    }
+  }
-    return(features)
+  return(features)
 }
--- a/src/features/wifi/wifi_entry.R
+++ b/src/features/wifi/wifi_entry.R
@ -1,13 +0,0 @@
 source("renv/activate.R")
 source("src/features/utils/utils.R")
 library("dplyr")
 library("tidyr")
 sensor_data_file <-  snakemake@input[["sensor_data"]]
 day_segments_file <-  snakemake@input[["day_segments_labels"]]
 provider <- snakemake@params["provider"][["provider"]]
 provider_key <- snakemake@params["provider_key"]
 sensor_features <- fetch_provider_features(provider, provider_key, "wifi", sensor_data_file, day_segments_file)
 write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
--- a/src/features/wifi/wifi_entry.py
+++ b/src/features/wifi/wifi_entry.py
@ -1,18 +0,0 @@
 import pandas as pd
 from importlib import import_module, util
 from pathlib import Path
 # import fetch_provider_features from src/features/utils/utils.py
 spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
 mod = util.module_from_spec(spec)
 spec.loader.exec_module(mod)
 fetch_provider_features = getattr(mod,  "fetch_provider_features")
 sensor_data_file = snakemake.input["sensor_data"][0]
 day_segments_file = snakemake.input["day_segments_labels"]
 provider = snakemake.params["provider"]
 provider_key = snakemake.params["provider_key"]
 sensor_features = fetch_provider_features(provider, provider_key, "wifi", sensor_data_file, day_segments_file)
 sensor_features.to_csv(snakemake.output[0], index=False)