2020-12-15 02:30:34 +01:00
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
def dbdp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
|
|
|
|
2021-02-02 23:19:10 +01:00
|
|
|
acc_data = pd.read_csv(sensor_data_files["sensor_data"])
|
2020-12-15 02:30:34 +01:00
|
|
|
requested_features = provider["FEATURES"]
|
|
|
|
# name of the features this function can compute
|
2021-02-02 23:19:10 +01:00
|
|
|
base_features_names = ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
|
2020-12-15 02:30:34 +01:00
|
|
|
# the subset of requested features this function can compute
|
|
|
|
features_to_compute = list(set(requested_features) & set(base_features_names))
|
|
|
|
|
2021-02-02 23:19:10 +01:00
|
|
|
acc_features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
|
|
|
|
if not acc_data.empty:
|
|
|
|
acc_data = filter_data_by_segment(acc_data, time_segment)
|
2020-12-15 02:30:34 +01:00
|
|
|
|
2021-02-02 23:19:10 +01:00
|
|
|
if not acc_data.empty:
|
|
|
|
acc_features = pd.DataFrame()
|
|
|
|
# get magnitude related features: magnitude = sqrt(x^2+y^2+z^2)
|
|
|
|
magnitude = acc_data.apply(lambda row: np.sqrt(row["x"] ** 2 + row["y"] ** 2 + row["z"] ** 2), axis=1)
|
|
|
|
acc_data = acc_data.assign(magnitude = magnitude.values)
|
2020-12-15 02:30:34 +01:00
|
|
|
|
2021-02-02 23:19:10 +01:00
|
|
|
if "maxmagnitude" in features_to_compute:
|
|
|
|
acc_features["maxmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].max()
|
|
|
|
if "minmagnitude" in features_to_compute:
|
|
|
|
acc_features["minmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].min()
|
|
|
|
if "avgmagnitude" in features_to_compute:
|
|
|
|
acc_features["avgmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].mean()
|
|
|
|
if "medianmagnitude" in features_to_compute:
|
|
|
|
acc_features["medianmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].median()
|
|
|
|
if "stdmagnitude" in features_to_compute:
|
|
|
|
acc_features["stdmagnitude"] = acc_data.groupby(["local_segment"])["magnitude"].std()
|
|
|
|
|
|
|
|
acc_features = acc_features.reset_index()
|
2020-12-15 02:30:34 +01:00
|
|
|
|
2021-02-02 23:19:10 +01:00
|
|
|
return acc_features
|