From e6438810583a6aa33def4ed6d97ac43a552d741d Mon Sep 17 00:00:00 2001
From: JulioV <JulioV@users.noreply.github.com>
Date: Tue, 1 Sep 2020 12:01:24 -0400
Subject: [PATCH] Migrate light to new file structure

---
 Snakefile                         | 10 +++++----
 config.yaml                       | 10 ++++++---
 rules/features.smk                | 25 +++++++++++++++++------
 src/features/light/light_base.py  | 34 -------------------------------
 src/features/light/light_entry.R  | 13 ++++++++++++
 src/features/light/light_entry.py | 18 ++++++++++++++++
 src/features/light/rapids/main.py | 34 +++++++++++++++++++++++++++++++
 7 files changed, 97 insertions(+), 47 deletions(-)
 delete mode 100644 src/features/light/light_base.py
 create mode 100644 src/features/light/light_entry.R
 create mode 100644 src/features/light/light_entry.py
 create mode 100644 src/features/light/rapids/main.py

diff --git a/Snakefile b/Snakefile
index b7364f84..38133b8f 100644
--- a/Snakefile
+++ b/Snakefile
@@ -83,10 +83,12 @@ if config["SCREEN"]["COMPUTE"]:
     files_to_compute.extend(expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]))
     files_to_compute.extend(expand("data/processed/{pid}/screen_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SCREEN"]["DAY_SEGMENTS"]))
 
-if config["LIGHT"]["COMPUTE"]:
-    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
-    files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
-    files_to_compute.extend(expand("data/processed/{pid}/light_{day_segment}.csv", pid = config["PIDS"], day_segment = config["LIGHT"]["DAY_SEGMENTS"]))
+for provider in config["LIGHT"]["PROVIDERS"].keys():
+    if config["LIGHT"]["PROVIDERS"][provider]["COMPUTE"]:
+        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
+        files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
+        files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["LIGHT"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="LIGHT".lower()))
+        files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="LIGHT".lower()))
 
 if config["ACCELEROMETER"]["COMPUTE"]:
     files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"]))
diff --git a/config.yaml b/config.yaml
index 5011e834..603e7a36 100644
--- a/config.yaml
+++ b/config.yaml
@@ -136,10 +136,14 @@ SCREEN:
   EPISODE_TYPES: ["unlock"]
 
 LIGHT:
-  COMPUTE: False
   DB_TABLE: light
-  DAY_SEGMENTS: *day_segments
-  FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
+  PROVIDERS:
+    RAPIDS:
+      COMPUTE: TRUE
+      FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
+      SRC_FOLDER: "rapids" # inside src/features/light
+      SRC_LANGUAGE: "python"
+
 
 ACCELEROMETER:
   COMPUTE: False
diff --git a/rules/features.smk b/rules/features.smk
index 22bfd035..d01b3bf7 100644
--- a/rules/features.smk
+++ b/rules/features.smk
@@ -173,16 +173,29 @@ rule screen_features:
     script:
         "../src/features/screen_features.py"
 
-rule light_features:
+rule light_r_features:
     input:
-        expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"]),
+        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"]),
+        day_segments_labels = "data/interim/day_segments_labels.csv"
     params:
-        day_segment = "{day_segment}",
-        features = config["LIGHT"]["FEATURES"],
+        provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
+        provider_key = "{provider_key}"
     output:
-        "data/processed/{pid}/light_{day_segment}.csv"
+        "data/interim/{pid}/light_features/light_r_{provider_key}.csv"
     script:
-        "../src/features/light_features.py"
+        "../src/features/light/light_entry.R"
+
+rule light_python_features:
+    input:
+        sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"]),
+        day_segments_labels = "data/interim/day_segments_labels.csv"
+    params:
+        provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
+        provider_key = "{provider_key}"
+    output:
+        "data/interim/{pid}/light_features/light_python_{provider_key}.csv"
+    script:
+        "../src/features/light/light_entry.py"
 
 rule conversation_features:
     input:
diff --git a/src/features/light/light_base.py b/src/features/light/light_base.py
deleted file mode 100644
index 54450000..00000000
--- a/src/features/light/light_base.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import pandas as pd
-import numpy as np
-
-def base_light_features(light_data, day_segment, requested_features):
-    # name of the features this function can compute
-    base_features_names = ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
-    # the subset of requested features this function can compute
-    features_to_compute = list(set(requested_features) & set(base_features_names))
-
-    light_features = pd.DataFrame(columns=["local_date"] + ["light_" + day_segment + "_" + x for x in features_to_compute])
-    if not light_data.empty:
-        if day_segment != "daily":
-            light_data =light_data[light_data["local_day_segment"] == day_segment]
-        
-        if not light_data.empty:
-            light_features = pd.DataFrame()
-            if "count" in features_to_compute:
-                light_features["light_" + day_segment + "_count"] = light_data.groupby(["local_date"]).count()["timestamp"]
-            
-            # get light ambient luminance related features
-            if "maxlux" in features_to_compute:
-                light_features["light_" + day_segment + "_maxlux"] = light_data.groupby(["local_date"])["double_light_lux"].max()
-            if "minlux" in features_to_compute:
-                light_features["light_" + day_segment + "_minlux"] = light_data.groupby(["local_date"])["double_light_lux"].min()
-            if "avglux" in features_to_compute:
-                light_features["light_" + day_segment + "_avglux"] = light_data.groupby(["local_date"])["double_light_lux"].mean()
-            if "medianlux" in features_to_compute:
-                light_features["light_" + day_segment + "_medianlux"] = light_data.groupby(["local_date"])["double_light_lux"].median()
-            if "stdlux" in features_to_compute:
-                light_features["light_" + day_segment + "_stdlux"] = light_data.groupby(["local_date"])["double_light_lux"].std().fillna('NA')
-            
-            light_features = light_features.reset_index()
-
-    return light_features
\ No newline at end of file
diff --git a/src/features/light/light_entry.R b/src/features/light/light_entry.R
new file mode 100644
index 00000000..99f8dca4
--- /dev/null
+++ b/src/features/light/light_entry.R
@@ -0,0 +1,13 @@
+source("renv/activate.R")
+source("src/features/utils/utils.R")
+library("dplyr")
+library("tidyr")
+
+sensor_data_file <-  snakemake@input[["sensor_data"]]
+day_segments_file <-  snakemake@input[["day_segments_labels"]]
+provider <- snakemake@params["provider"][["provider"]]
+provider_key <- snakemake@params["provider_key"]
+
+sensor_features <- fetch_provider_features(provider, provider_key, "light", sensor_data_file, day_segments_file)
+
+write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
diff --git a/src/features/light/light_entry.py b/src/features/light/light_entry.py
new file mode 100644
index 00000000..f0ee5e8f
--- /dev/null
+++ b/src/features/light/light_entry.py
@@ -0,0 +1,18 @@
+import pandas as pd
+from importlib import import_module, util
+from pathlib import Path
+
+# import fetch_provider_features from src/features/utils/utils.py
+spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
+mod = util.module_from_spec(spec)
+spec.loader.exec_module(mod)
+fetch_provider_features = getattr(mod,  "fetch_provider_features")
+
+sensor_data_file = snakemake.input["sensor_data"][0]
+day_segments_file = snakemake.input["day_segments_labels"]
+provider = snakemake.params["provider"]
+provider_key = snakemake.params["provider_key"]
+
+sensor_features = fetch_provider_features(provider, provider_key, "light", sensor_data_file, day_segments_file)
+
+sensor_features.to_csv(snakemake.output[0], index=False)
\ No newline at end of file
diff --git a/src/features/light/rapids/main.py b/src/features/light/rapids/main.py
new file mode 100644
index 00000000..105c1f49
--- /dev/null
+++ b/src/features/light/rapids/main.py
@@ -0,0 +1,34 @@
+import pandas as pd
+import numpy as np
+
+def rapids_features(light_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
+    requested_features = provider["FEATURES"]
+    # name of the features this function can compute
+    base_features_names = ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
+    # the subset of requested features this function can compute
+    features_to_compute = list(set(requested_features) & set(base_features_names))
+
+    light_features = pd.DataFrame(columns=["local_segment"] + ["light_rapids_" + "_" + x for x in features_to_compute])
+    if not light_data.empty:
+        light_data = filter_data_by_segment(light_data, day_segment)
+        
+        if not light_data.empty:
+            light_features = pd.DataFrame()
+            if "count" in features_to_compute:
+                light_features["light_rapids_" + "_count"] = light_data.groupby(["local_segment"]).count()["timestamp"]
+            
+            # get light ambient luminance related features
+            if "maxlux" in features_to_compute:
+                light_features["light_rapids_" + "_maxlux"] = light_data.groupby(["local_segment"])["double_light_lux"].max()
+            if "minlux" in features_to_compute:
+                light_features["light_rapids_" + "_minlux"] = light_data.groupby(["local_segment"])["double_light_lux"].min()
+            if "avglux" in features_to_compute:
+                light_features["light_rapids_" + "_avglux"] = light_data.groupby(["local_segment"])["double_light_lux"].mean()
+            if "medianlux" in features_to_compute:
+                light_features["light_rapids_" + "_medianlux"] = light_data.groupby(["local_segment"])["double_light_lux"].median()
+            if "stdlux" in features_to_compute:
+                light_features["light_rapids_" + "_stdlux"] = light_data.groupby(["local_segment"])["double_light_lux"].std().fillna('NA')
+            
+            light_features = light_features.reset_index()
+
+    return light_features
\ No newline at end of file