diff --git a/Snakefile b/Snakefile index 90e8ed2a..ffe5d6ec 100644 --- a/Snakefile +++ b/Snakefile @@ -33,6 +33,9 @@ rule all: expand("data/processed/{pid}/screen_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SCREEN"]["DAY_SEGMENTS"]), + expand("data/processed/{pid}/light_{day_segment}.csv", + pid = config["PIDS"], + day_segment = config["LIGHT"]["DAY_SEGMENTS"]), # Reports expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"]), diff --git a/config.yaml b/config.yaml index ee89d16f..19e6d2cf 100644 --- a/config.yaml +++ b/config.yaml @@ -72,4 +72,8 @@ SCREEN: DAY_SEGMENTS: *day_segments METRICS_EVENTS: ["counton", "countunlock", "unlocksperminute"] METRICS_DELTAS: ["sumduration", "maxduration", "minduration", "avgduration", "stdduration"] - EPISODES: ["unlock"] \ No newline at end of file + EPISODES: ["unlock"] + +LIGHT: + DAY_SEGMENTS: *day_segments + METRICS: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"] diff --git a/rules/features.snakefile b/rules/features.snakefile index cfbde01c..ca635d2e 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -108,3 +108,14 @@ rule screen_metrics: "data/processed/{pid}/screen_{day_segment}.csv" script: "../src/features/screen_metrics.py" + +rule light_metrics: + input: + "data/raw/{pid}/light_with_datetime.csv", + params: + day_segment = "{day_segment}", + metrics = config["LIGHT"]["METRICS"], + output: + "data/processed/{pid}/light_{day_segment}.csv" + script: + "../src/features/light_metrics.py" diff --git a/src/features/light_metrics.py b/src/features/light_metrics.py new file mode 100644 index 00000000..3b457e7d --- /dev/null +++ b/src/features/light_metrics.py @@ -0,0 +1,32 @@ +import pandas as pd +import numpy as np + +light_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"]) +day_segment = snakemake.params["day_segment"] +metrics = snakemake.params["metrics"] + +light_features = pd.DataFrame(columns=["local_date"] + ["light_" + day_segment + "_" + x for x in metrics]) +if not light_data.empty: + if day_segment != "daily": + light_data =light_data[light_data["local_day_segment"] == day_segment] + + if not light_data.empty: + light_features = pd.DataFrame() + if "count" in metrics: + light_features["light_" + day_segment + "_count"] = light_data.groupby(["local_date"]).count()["timestamp"] + + # get light ambient luminance related features + if "maxlux" in metrics: + light_features["light_" + day_segment + "_maxlux"] = light_data.groupby(["local_date"])["double_light_lux"].max() + if "minlux" in metrics: + light_features["light_" + day_segment + "_minlux"] = light_data.groupby(["local_date"])["double_light_lux"].min() + if "avglux" in metrics: + light_features["light_" + day_segment + "_avglux"] = light_data.groupby(["local_date"])["double_light_lux"].mean() + if "medianlux" in metrics: + light_features["light_" + day_segment + "_medianlux"] = light_data.groupby(["local_date"])["double_light_lux"].median() + if "stdlux" in metrics: + light_features["light_" + day_segment + "_stdlux"] = light_data.groupby(["local_date"])["double_light_lux"].std() + + light_features = light_features.fillna(0).reset_index() + +light_features.to_csv(snakemake.output[0], index=False) \ No newline at end of file