Refactor activity recognition features
parent
ff1bc33c34
commit
aa1baaf948
|
@ -81,7 +81,7 @@ BLUETOOTH:
|
||||||
|
|
||||||
ACTIVITY_RECOGNITION:
|
ACTIVITY_RECOGNITION:
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
FEATURES: ['count','mostcommonactivity','countuniqueactivities','activitychangecount','sumstationary','summobile','sumvehicle']
|
FEATURES: ["count","mostcommonactivity","countuniqueactivities","activitychangecount","sumstationary","summobile","sumvehicle"]
|
||||||
|
|
||||||
BATTERY:
|
BATTERY:
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
|
|
|
@ -1,63 +1,15 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
from ar.ar_base import base_ar_features
|
||||||
import scipy.stats as stats
|
|
||||||
from features_utils import splitOvernightEpisodes, splitMultiSegmentEpisodes
|
|
||||||
|
|
||||||
day_segment = snakemake.params["segment"]
|
ar_data = pd.read_csv(snakemake.input[0],parse_dates=["local_date_time"])
|
||||||
features = snakemake.params["features"]
|
|
||||||
|
|
||||||
#Read csv into a pandas dataframe
|
|
||||||
data = pd.read_csv(snakemake.input[0],parse_dates=["local_date_time"])
|
|
||||||
ar_deltas = pd.read_csv(snakemake.input[1],parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"])
|
ar_deltas = pd.read_csv(snakemake.input[1],parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"])
|
||||||
columns = list("ar_" + str(day_segment) + "_" + column for column in features)
|
day_segment = snakemake.params["segment"]
|
||||||
|
requested_features = snakemake.params["features"]
|
||||||
if data.empty:
|
ar_features = pd.DataFrame(columns=["local_date"])
|
||||||
finalDataset = pd.DataFrame(columns = columns)
|
|
||||||
else:
|
|
||||||
finalDataset = pd.DataFrame()
|
|
||||||
ar_deltas = splitOvernightEpisodes(ar_deltas, [],['activity'])
|
|
||||||
|
|
||||||
if day_segment != "daily":
|
|
||||||
ar_deltas = splitMultiSegmentEpisodes(ar_deltas, day_segment, [])
|
|
||||||
|
|
||||||
data.local_date_time = pd.to_datetime(data.local_date_time)
|
|
||||||
resampledData = data.set_index(data.local_date_time)
|
|
||||||
resampledData.drop(columns=['local_date_time'],inplace=True)
|
|
||||||
|
|
||||||
if(day_segment!='daily'):
|
|
||||||
resampledData = resampledData.loc[resampledData['local_day_segment'] == str(day_segment)]
|
|
||||||
|
|
||||||
if resampledData.empty:
|
|
||||||
finalDataset = pd.DataFrame(columns = columns)
|
|
||||||
else:
|
|
||||||
#Finding the count of samples of the day
|
|
||||||
if("count" in features):
|
|
||||||
finalDataset["ar_" + str(day_segment) + "_count"] = resampledData['activity_type'].resample('D').count()
|
|
||||||
|
|
||||||
#Finding most common activity of the day
|
|
||||||
if("mostcommonactivity" in features):
|
|
||||||
finalDataset["ar_" + str(day_segment) + "_mostcommonactivity"] = resampledData['activity_type'].resample('D').apply(lambda x: stats.mode(x)[0] if len(stats.mode(x)[0]) != 0 else None)
|
|
||||||
|
|
||||||
#finding different number of activities during a day
|
|
||||||
if("countuniqueactivities" in features):
|
|
||||||
finalDataset["ar_" + str(day_segment) + "_countuniqueactivities"] = resampledData['activity_type'].resample('D').nunique()
|
|
||||||
|
|
||||||
#finding Number of times activity changed
|
|
||||||
if("activitychangecount" in features):
|
|
||||||
resampledData['activity_type_shift'] = resampledData['activity_type'].shift().fillna(resampledData['activity_type'].head(1))
|
|
||||||
resampledData['different_activity'] = np.where(resampledData['activity_type']!=resampledData['activity_type_shift'],1,0)
|
|
||||||
finalDataset["ar_" + str(day_segment) + "_activitychangecount"] = resampledData['different_activity'].resample('D').sum()
|
|
||||||
|
|
||||||
|
|
||||||
deltas_features = {'sumstationary':['still','tilting'],
|
ar_features = ar_features.merge(base_ar_features(ar_data, ar_deltas, day_segment, requested_features), on="local_date", how="outer")
|
||||||
'summobile':['on_foot','walking','running','on_bicycle'],
|
|
||||||
'sumvehicle':['in_vehicle']}
|
|
||||||
|
|
||||||
for column, activity_labels in deltas_features.items():
|
assert len(requested_features) + 1 == ar_features.shape[1], "The number of features in the output dataframe (=" + str(ar_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your activity recognition feature extraction functions"
|
||||||
if column in features:
|
|
||||||
finalDataset["ar_" + str(day_segment) + "_"+str(column)] = (ar_deltas[ar_deltas['activity'].isin(pd.Series(activity_labels))]
|
|
||||||
.groupby(['local_start_date'])['time_diff']
|
|
||||||
.agg({"ar_" + str(day_segment) + "_" + str(column) :'sum'}))
|
|
||||||
|
|
||||||
finalDataset.index.names = ['local_date']
|
ar_features.to_csv(snakemake.output[0], index=False)
|
||||||
finalDataset.to_csv(snakemake.output[0])
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import scipy.stats as stats
|
||||||
|
from features_utils import splitOvernightEpisodes, splitMultiSegmentEpisodes
|
||||||
|
|
||||||
|
def base_ar_features(ar_data, ar_deltas, day_segment, requested_features):
|
||||||
|
# name of the features this function can compute
|
||||||
|
base_features_names = ["count","mostcommonactivity","countuniqueactivities","activitychangecount","sumstationary","summobile","sumvehicle"]
|
||||||
|
# the subset of requested features this function can compute
|
||||||
|
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||||
|
|
||||||
|
if ar_data.empty:
|
||||||
|
ar_features = pd.DataFrame(columns = ["local_date"] + ["ar_" + day_segment + "_" + x for x in features_to_compute])
|
||||||
|
else:
|
||||||
|
ar_features = pd.DataFrame()
|
||||||
|
ar_deltas = splitOvernightEpisodes(ar_deltas, [],["activity"])
|
||||||
|
|
||||||
|
if day_segment != "daily":
|
||||||
|
ar_deltas = splitMultiSegmentEpisodes(ar_deltas, day_segment, [])
|
||||||
|
|
||||||
|
ar_data.local_date_time = pd.to_datetime(ar_data.local_date_time)
|
||||||
|
resampledData = ar_data.set_index(ar_data.local_date_time)
|
||||||
|
resampledData.drop(columns=["local_date_time"], inplace=True)
|
||||||
|
|
||||||
|
if(day_segment!="daily"):
|
||||||
|
resampledData = resampledData.loc[resampledData["local_day_segment"] == day_segment]
|
||||||
|
|
||||||
|
if resampledData.empty:
|
||||||
|
ar_features = pd.DataFrame(columns = ["ar_" + day_segment + "_" + x for x in features_to_compute])
|
||||||
|
else:
|
||||||
|
#Finding the count of samples of the day
|
||||||
|
if "count" in features_to_compute:
|
||||||
|
ar_features["ar_" + day_segment + "_count"] = resampledData["activity_type"].resample("D").count()
|
||||||
|
|
||||||
|
#Finding most common activity of the day
|
||||||
|
if "mostcommonactivity" in features_to_compute:
|
||||||
|
ar_features["ar_" + day_segment + "_mostcommonactivity"] = resampledData["activity_type"].resample("D").apply(lambda x: stats.mode(x)[0] if len(stats.mode(x)[0]) != 0 else None)
|
||||||
|
|
||||||
|
#finding different number of activities during a day
|
||||||
|
if "countuniqueactivities" in features_to_compute:
|
||||||
|
ar_features["ar_" + day_segment + "_countuniqueactivities"] = resampledData["activity_type"].resample("D").nunique()
|
||||||
|
|
||||||
|
#finding Number of times activity changed
|
||||||
|
if "activitychangecount" in features_to_compute:
|
||||||
|
resampledData["activity_type_shift"] = resampledData["activity_type"].shift().fillna(resampledData["activity_type"].head(1))
|
||||||
|
resampledData["different_activity"] = np.where(resampledData["activity_type"]!=resampledData["activity_type_shift"],1,0)
|
||||||
|
ar_features["ar_" + day_segment + "_activitychangecount"] = resampledData["different_activity"].resample("D").sum()
|
||||||
|
|
||||||
|
|
||||||
|
deltas_features = {"sumstationary":["still","tilting"],
|
||||||
|
"summobile":["on_foot","walking","running","on_bicycle"],
|
||||||
|
"sumvehicle":["in_vehicle"]}
|
||||||
|
|
||||||
|
for column, activity_labels in deltas_features.items():
|
||||||
|
if column in features_to_compute:
|
||||||
|
ar_features["ar_" + day_segment + "_" + column] = ar_deltas[ar_deltas["activity"].isin(pd.Series(activity_labels))].groupby(["local_start_date"])["time_diff"].sum()
|
||||||
|
|
||||||
|
ar_features.index.names = ["local_date"]
|
||||||
|
ar_features = ar_features.reset_index()
|
||||||
|
|
||||||
|
return ar_features
|
Loading…
Reference in New Issue