From 1ce76a5380049338a2d705b5276435e2e15b8ea7 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Wed, 1 Apr 2020 18:29:53 -0400 Subject: [PATCH] Refactor light features --- src/features/light/light_base.py | 34 ++++++++++++++++++++++++++++++++ src/features/light_metrics.py | 29 +++++---------------------- 2 files changed, 39 insertions(+), 24 deletions(-) create mode 100644 src/features/light/light_base.py diff --git a/src/features/light/light_base.py b/src/features/light/light_base.py new file mode 100644 index 00000000..e00843fb --- /dev/null +++ b/src/features/light/light_base.py @@ -0,0 +1,34 @@ +import pandas as pd + +def base_light_features(light_data, day_segment, requested_features): + # name of the features this function can compute + base_features_names = ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"] + # the subset of requested features this function can compute + features_to_compute = list(set(requested_features) & set(base_features_names)) + + if light_data.empty: + light_features = pd.DataFrame(columns=["local_date"] + ["light_" + day_segment + "_" + x for x in features_to_compute]) + else: + if day_segment != "daily": + light_data =light_data[light_data["local_day_segment"] == day_segment] + + if not light_data.empty: + light_features = pd.DataFrame() + if "count" in features_to_compute: + light_features["light_" + day_segment + "_count"] = light_data.groupby(["local_date"]).count()["timestamp"] + + # get light ambient luminance related features + if "maxlux" in features_to_compute: + light_features["light_" + day_segment + "_maxlux"] = light_data.groupby(["local_date"])["double_light_lux"].max() + if "minlux" in features_to_compute: + light_features["light_" + day_segment + "_minlux"] = light_data.groupby(["local_date"])["double_light_lux"].min() + if "avglux" in features_to_compute: + light_features["light_" + day_segment + "_avglux"] = light_data.groupby(["local_date"])["double_light_lux"].mean() + if "medianlux" in features_to_compute: + light_features["light_" + day_segment + "_medianlux"] = light_data.groupby(["local_date"])["double_light_lux"].median() + if "stdlux" in features_to_compute: + light_features["light_" + day_segment + "_stdlux"] = light_data.groupby(["local_date"])["double_light_lux"].std() + + light_features = light_features.reset_index() + + return light_features \ No newline at end of file diff --git a/src/features/light_metrics.py b/src/features/light_metrics.py index f6219178..b72b1829 100644 --- a/src/features/light_metrics.py +++ b/src/features/light_metrics.py @@ -1,32 +1,13 @@ import pandas as pd -import numpy as np +from light.light_base import base_light_features light_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"]) day_segment = snakemake.params["day_segment"] metrics = snakemake.params["metrics"] +light_features = pd.DataFrame(columns=["local_date"]) -light_features = pd.DataFrame(columns=["local_date"] + ["light_" + day_segment + "_" + x for x in metrics]) -if not light_data.empty: - if day_segment != "daily": - light_data =light_data[light_data["local_day_segment"] == day_segment] - - if not light_data.empty: - light_features = pd.DataFrame() - if "count" in metrics: - light_features["light_" + day_segment + "_count"] = light_data.groupby(["local_date"]).count()["timestamp"] - - # get light ambient luminance related features - if "maxlux" in metrics: - light_features["light_" + day_segment + "_maxlux"] = light_data.groupby(["local_date"])["double_light_lux"].max() - if "minlux" in metrics: - light_features["light_" + day_segment + "_minlux"] = light_data.groupby(["local_date"])["double_light_lux"].min() - if "avglux" in metrics: - light_features["light_" + day_segment + "_avglux"] = light_data.groupby(["local_date"])["double_light_lux"].mean() - if "medianlux" in metrics: - light_features["light_" + day_segment + "_medianlux"] = light_data.groupby(["local_date"])["double_light_lux"].median() - if "stdlux" in metrics: - light_features["light_" + day_segment + "_stdlux"] = light_data.groupby(["local_date"])["double_light_lux"].std() - - light_features = light_features.reset_index() +light_features = light_features.merge(base_light_features(light_data, day_segment, metrics), on="local_date", how="outer") + +assert len(metrics) + 1 == light_features.shape[1], "The number of features in the output dataframe (=" + str(light_features.shape[1]) + ") does not match the expected value (=" + str(len(metrics)) + " + 1). Verify your light feature extraction functions" light_features.to_csv(snakemake.output[0], index=False) \ No newline at end of file