From 1ce76a5380049338a2d705b5276435e2e15b8ea7 Mon Sep 17 00:00:00 2001
From: Meng Li <34143965+Meng6@users.noreply.github.com>
Date: Wed, 1 Apr 2020 18:29:53 -0400
Subject: [PATCH] Refactor light features

---
 src/features/light/light_base.py | 34 ++++++++++++++++++++++++++++++++
 src/features/light_metrics.py    | 29 +++++----------------------
 2 files changed, 39 insertions(+), 24 deletions(-)
 create mode 100644 src/features/light/light_base.py

diff --git a/src/features/light/light_base.py b/src/features/light/light_base.py
new file mode 100644
index 00000000..e00843fb
--- /dev/null
+++ b/src/features/light/light_base.py
@@ -0,0 +1,34 @@
+import pandas as pd
+
+def base_light_features(light_data, day_segment, requested_features):
+    # name of the features this function can compute
+    base_features_names = ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
+    # the subset of requested features this function can compute
+    features_to_compute = list(set(requested_features) & set(base_features_names))
+
+    if light_data.empty:
+        light_features = pd.DataFrame(columns=["local_date"] + ["light_" + day_segment + "_" + x for x in features_to_compute])
+    else:
+        if day_segment != "daily":
+            light_data =light_data[light_data["local_day_segment"] == day_segment]
+        
+        if not light_data.empty:
+            light_features = pd.DataFrame()
+            if "count" in features_to_compute:
+                light_features["light_" + day_segment + "_count"] = light_data.groupby(["local_date"]).count()["timestamp"]
+            
+            # get light ambient luminance related features
+            if "maxlux" in features_to_compute:
+                light_features["light_" + day_segment + "_maxlux"] = light_data.groupby(["local_date"])["double_light_lux"].max()
+            if "minlux" in features_to_compute:
+                light_features["light_" + day_segment + "_minlux"] = light_data.groupby(["local_date"])["double_light_lux"].min()
+            if "avglux" in features_to_compute:
+                light_features["light_" + day_segment + "_avglux"] = light_data.groupby(["local_date"])["double_light_lux"].mean()
+            if "medianlux" in features_to_compute:
+                light_features["light_" + day_segment + "_medianlux"] = light_data.groupby(["local_date"])["double_light_lux"].median()
+            if "stdlux" in features_to_compute:
+                light_features["light_" + day_segment + "_stdlux"] = light_data.groupby(["local_date"])["double_light_lux"].std()
+            
+            light_features = light_features.reset_index()
+
+    return light_features
\ No newline at end of file
diff --git a/src/features/light_metrics.py b/src/features/light_metrics.py
index f6219178..b72b1829 100644
--- a/src/features/light_metrics.py
+++ b/src/features/light_metrics.py
@@ -1,32 +1,13 @@
 import pandas as pd
-import numpy as np
+from light.light_base import base_light_features
 
 light_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"])
 day_segment = snakemake.params["day_segment"]
 metrics = snakemake.params["metrics"]
+light_features = pd.DataFrame(columns=["local_date"])
 
-light_features = pd.DataFrame(columns=["local_date"] + ["light_" + day_segment + "_" + x for x in metrics])
-if not light_data.empty:
-    if day_segment != "daily":
-        light_data =light_data[light_data["local_day_segment"] == day_segment]
-    
-    if not light_data.empty:
-        light_features = pd.DataFrame()
-        if "count" in metrics:
-            light_features["light_" + day_segment + "_count"] = light_data.groupby(["local_date"]).count()["timestamp"]
-        
-        # get light ambient luminance related features
-        if "maxlux" in metrics:
-            light_features["light_" + day_segment + "_maxlux"] = light_data.groupby(["local_date"])["double_light_lux"].max()
-        if "minlux" in metrics:
-            light_features["light_" + day_segment + "_minlux"] = light_data.groupby(["local_date"])["double_light_lux"].min()
-        if "avglux" in metrics:
-            light_features["light_" + day_segment + "_avglux"] = light_data.groupby(["local_date"])["double_light_lux"].mean()
-        if "medianlux" in metrics:
-            light_features["light_" + day_segment + "_medianlux"] = light_data.groupby(["local_date"])["double_light_lux"].median()
-        if "stdlux" in metrics:
-            light_features["light_" + day_segment + "_stdlux"] = light_data.groupby(["local_date"])["double_light_lux"].std()
-        
-        light_features = light_features.reset_index()
+light_features = light_features.merge(base_light_features(light_data, day_segment, metrics), on="local_date", how="outer")
+
+assert len(metrics) + 1 == light_features.shape[1], "The number of features in the output dataframe (=" + str(light_features.shape[1]) + ") does not match the expected value (=" + str(len(metrics)) + " + 1). Verify your light feature extraction functions"
 
 light_features.to_csv(snakemake.output[0], index=False)
\ No newline at end of file