Add statistic features for empatica accelerometer

feature/plugin_sentimental
JulioV 2021-02-02 17:19:10 -05:00
parent 5f5f19866f
commit 3bb0230bac
3 changed files with 31 additions and 16 deletions

View File

@ -294,12 +294,12 @@ for provider in config["EMPATICA_ACCELEROMETER"]["PROVIDERS"].keys():
suffixes = get_zip_suffixes(pid) suffixes = get_zip_suffixes(pid)
files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_unzipped_{suffix}.csv", pid=pid, suffix=suffixes)) files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_unzipped_{suffix}.csv", pid=pid, suffix=suffixes))
files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_raw_{suffix}.csv", pid=pid, suffix=suffixes)) files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_raw_{suffix}.csv", pid=pid, suffix=suffixes))
# files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_joined.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_joined.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/empatica_accelerometer_with_datetime.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower())) files_to_compute.extend(expand("data/interim/{pid}/empatica_accelerometer_features/empatica_accelerometer_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
# files_to_compute.extend(expand("data/processed/features/{pid}/empatica_accelerometer.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/empatica_accelerometer.csv", pid=config["PIDS"]))
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv") files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
for provider in config["EMPATICA_HEARTRATE"]["PROVIDERS"].keys(): for provider in config["EMPATICA_HEARTRATE"]["PROVIDERS"].keys():
if config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]: if config["EMPATICA_HEARTRATE"]["PROVIDERS"][provider]["COMPUTE"]:

View File

@ -427,8 +427,8 @@ EMPATICA_ACCELEROMETER:
TABLE: acc TABLE: acc
PROVIDERS: PROVIDERS:
DBDP: DBDP:
COMPUTE: True COMPUTE: False
FEATURES: [] FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
SRC_FOLDER: "dbdp" # inside src/features/empatica_accelerometer SRC_FOLDER: "dbdp" # inside src/features/empatica_accelerometer
SRC_LANGUAGE: "python" SRC_LANGUAGE: "python"

View File

@ -3,19 +3,34 @@ import numpy as np
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
sensor_data = pd.read_csv(sensor_data_files["sensor_data"]) acc_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_features = provider["FEATURES"] requested_features = provider["FEATURES"]
# name of the features this function can compute # name of the features this function can compute
base_features_names = [] # ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"] base_features_names = ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
# the subset of requested features this function can compute # the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names)) features_to_compute = list(set(requested_features) & set(base_features_names))
features = pd.DataFrame(columns=["local_segment"] + features_to_compute) acc_features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
if not sensor_data.empty: if not acc_data.empty:
sensor_data = filter_data_by_segment(sensor_data, time_segment) acc_data = filter_data_by_segment(acc_data, time_segment)
if not sensor_data.empty: if not acc_data.empty:
features = pd.DataFrame() acc_features = pd.DataFrame()
# get magnitude related features: magnitude = sqrt(x^2+y^2+z^2)
magnitude = acc_data.apply(lambda row: np.sqrt(row["x"] ** 2 + row["y"] ** 2 + row["z"] ** 2), axis=1)
acc_data = acc_data.assign(magnitude = magnitude.values)
if "maxmagnitude" in features_to_compute:
acc_features["maxmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].max()
if "minmagnitude" in features_to_compute:
acc_features["minmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].min()
if "avgmagnitude" in features_to_compute:
acc_features["avgmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].mean()
if "medianmagnitude" in features_to_compute:
acc_features["medianmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].median()
if "stdmagnitude" in features_to_compute:
acc_features["stdmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].std()
return features acc_features = acc_features.reset_index()
return acc_features