Migrate light to new file structure
parent
eda58d9d2c
commit
e643881058
10
Snakefile
10
Snakefile
|
@ -83,10 +83,12 @@ if config["SCREEN"]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/screen_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SCREEN"]["DAY_SEGMENTS"]))
|
files_to_compute.extend(expand("data/processed/{pid}/screen_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SCREEN"]["DAY_SEGMENTS"]))
|
||||||
|
|
||||||
if config["LIGHT"]["COMPUTE"]:
|
for provider in config["LIGHT"]["PROVIDERS"].keys():
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
|
if config["LIGHT"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/light_{day_segment}.csv", pid = config["PIDS"], day_segment = config["LIGHT"]["DAY_SEGMENTS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["LIGHT"]["DB_TABLE"]))
|
||||||
|
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["LIGHT"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="LIGHT".lower()))
|
||||||
|
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="LIGHT".lower()))
|
||||||
|
|
||||||
if config["ACCELEROMETER"]["COMPUTE"]:
|
if config["ACCELEROMETER"]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"]))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=config["ACCELEROMETER"]["DB_TABLE"]))
|
||||||
|
|
10
config.yaml
10
config.yaml
|
@ -136,10 +136,14 @@ SCREEN:
|
||||||
EPISODE_TYPES: ["unlock"]
|
EPISODE_TYPES: ["unlock"]
|
||||||
|
|
||||||
LIGHT:
|
LIGHT:
|
||||||
COMPUTE: False
|
|
||||||
DB_TABLE: light
|
DB_TABLE: light
|
||||||
DAY_SEGMENTS: *day_segments
|
PROVIDERS:
|
||||||
FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
|
RAPIDS:
|
||||||
|
COMPUTE: TRUE
|
||||||
|
FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
|
||||||
|
SRC_FOLDER: "rapids" # inside src/features/light
|
||||||
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
|
|
||||||
ACCELEROMETER:
|
ACCELEROMETER:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
|
|
@ -173,16 +173,29 @@ rule screen_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/screen_features.py"
|
"../src/features/screen_features.py"
|
||||||
|
|
||||||
rule light_features:
|
rule light_r_features:
|
||||||
input:
|
input:
|
||||||
expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"]),
|
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"]),
|
||||||
|
day_segments_labels = "data/interim/day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
day_segment = "{day_segment}",
|
provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
|
||||||
features = config["LIGHT"]["FEATURES"],
|
provider_key = "{provider_key}"
|
||||||
output:
|
output:
|
||||||
"data/processed/{pid}/light_{day_segment}.csv"
|
"data/interim/{pid}/light_features/light_r_{provider_key}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/light_features.py"
|
"../src/features/light/light_entry.R"
|
||||||
|
|
||||||
|
rule light_python_features:
|
||||||
|
input:
|
||||||
|
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"]),
|
||||||
|
day_segments_labels = "data/interim/day_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
|
||||||
|
provider_key = "{provider_key}"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/light_features/light_python_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/light/light_entry.py"
|
||||||
|
|
||||||
rule conversation_features:
|
rule conversation_features:
|
||||||
input:
|
input:
|
||||||
|
|
|
@ -1,34 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
def base_light_features(light_data, day_segment, requested_features):
|
|
||||||
# name of the features this function can compute
|
|
||||||
base_features_names = ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
|
|
||||||
# the subset of requested features this function can compute
|
|
||||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
|
||||||
|
|
||||||
light_features = pd.DataFrame(columns=["local_date"] + ["light_" + day_segment + "_" + x for x in features_to_compute])
|
|
||||||
if not light_data.empty:
|
|
||||||
if day_segment != "daily":
|
|
||||||
light_data =light_data[light_data["local_day_segment"] == day_segment]
|
|
||||||
|
|
||||||
if not light_data.empty:
|
|
||||||
light_features = pd.DataFrame()
|
|
||||||
if "count" in features_to_compute:
|
|
||||||
light_features["light_" + day_segment + "_count"] = light_data.groupby(["local_date"]).count()["timestamp"]
|
|
||||||
|
|
||||||
# get light ambient luminance related features
|
|
||||||
if "maxlux" in features_to_compute:
|
|
||||||
light_features["light_" + day_segment + "_maxlux"] = light_data.groupby(["local_date"])["double_light_lux"].max()
|
|
||||||
if "minlux" in features_to_compute:
|
|
||||||
light_features["light_" + day_segment + "_minlux"] = light_data.groupby(["local_date"])["double_light_lux"].min()
|
|
||||||
if "avglux" in features_to_compute:
|
|
||||||
light_features["light_" + day_segment + "_avglux"] = light_data.groupby(["local_date"])["double_light_lux"].mean()
|
|
||||||
if "medianlux" in features_to_compute:
|
|
||||||
light_features["light_" + day_segment + "_medianlux"] = light_data.groupby(["local_date"])["double_light_lux"].median()
|
|
||||||
if "stdlux" in features_to_compute:
|
|
||||||
light_features["light_" + day_segment + "_stdlux"] = light_data.groupby(["local_date"])["double_light_lux"].std().fillna('NA')
|
|
||||||
|
|
||||||
light_features = light_features.reset_index()
|
|
||||||
|
|
||||||
return light_features
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
source("renv/activate.R")
|
||||||
|
source("src/features/utils/utils.R")
|
||||||
|
library("dplyr")
|
||||||
|
library("tidyr")
|
||||||
|
|
||||||
|
sensor_data_file <- snakemake@input[["sensor_data"]]
|
||||||
|
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||||
|
provider <- snakemake@params["provider"][["provider"]]
|
||||||
|
provider_key <- snakemake@params["provider_key"]
|
||||||
|
|
||||||
|
sensor_features <- fetch_provider_features(provider, provider_key, "light", sensor_data_file, day_segments_file)
|
||||||
|
|
||||||
|
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -0,0 +1,18 @@
|
||||||
|
import pandas as pd
|
||||||
|
from importlib import import_module, util
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# import fetch_provider_features from src/features/utils/utils.py
|
||||||
|
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||||
|
mod = util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mod)
|
||||||
|
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||||
|
|
||||||
|
sensor_data_file = snakemake.input["sensor_data"][0]
|
||||||
|
day_segments_file = snakemake.input["day_segments_labels"]
|
||||||
|
provider = snakemake.params["provider"]
|
||||||
|
provider_key = snakemake.params["provider_key"]
|
||||||
|
|
||||||
|
sensor_features = fetch_provider_features(provider, provider_key, "light", sensor_data_file, day_segments_file)
|
||||||
|
|
||||||
|
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -0,0 +1,34 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def rapids_features(light_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||||
|
requested_features = provider["FEATURES"]
|
||||||
|
# name of the features this function can compute
|
||||||
|
base_features_names = ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
|
||||||
|
# the subset of requested features this function can compute
|
||||||
|
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||||
|
|
||||||
|
light_features = pd.DataFrame(columns=["local_segment"] + ["light_rapids_" + "_" + x for x in features_to_compute])
|
||||||
|
if not light_data.empty:
|
||||||
|
light_data = filter_data_by_segment(light_data, day_segment)
|
||||||
|
|
||||||
|
if not light_data.empty:
|
||||||
|
light_features = pd.DataFrame()
|
||||||
|
if "count" in features_to_compute:
|
||||||
|
light_features["light_rapids_" + "_count"] = light_data.groupby(["local_segment"]).count()["timestamp"]
|
||||||
|
|
||||||
|
# get light ambient luminance related features
|
||||||
|
if "maxlux" in features_to_compute:
|
||||||
|
light_features["light_rapids_" + "_maxlux"] = light_data.groupby(["local_segment"])["double_light_lux"].max()
|
||||||
|
if "minlux" in features_to_compute:
|
||||||
|
light_features["light_rapids_" + "_minlux"] = light_data.groupby(["local_segment"])["double_light_lux"].min()
|
||||||
|
if "avglux" in features_to_compute:
|
||||||
|
light_features["light_rapids_" + "_avglux"] = light_data.groupby(["local_segment"])["double_light_lux"].mean()
|
||||||
|
if "medianlux" in features_to_compute:
|
||||||
|
light_features["light_rapids_" + "_medianlux"] = light_data.groupby(["local_segment"])["double_light_lux"].median()
|
||||||
|
if "stdlux" in features_to_compute:
|
||||||
|
light_features["light_rapids_" + "_stdlux"] = light_data.groupby(["local_segment"])["double_light_lux"].std().fillna('NA')
|
||||||
|
|
||||||
|
light_features = light_features.reset_index()
|
||||||
|
|
||||||
|
return light_features
|
Loading…
Reference in New Issue