Fix the bug of feature extraction code for some sensors: break when there is no data for an epoch
parent
e66ed6a79b
commit
17f41588d8
|
@ -70,14 +70,12 @@ def base_accelerometer_features(acc_data, day_segment, requested_features, valid
|
|||
|
||||
features_to_compute = features_to_compute_magnitude + features_to_compute_exertionalactivityepisode + features_to_compute_nonexertionalactivityepisode + (["validsensedminutes"] if valid_sensed_minutes else [])
|
||||
|
||||
if acc_data.empty:
|
||||
acc_features = pd.DataFrame(columns=["local_date"] + ["acc_" + day_segment + "_" + x for x in features_to_compute])
|
||||
else:
|
||||
acc_features = pd.DataFrame(columns=["local_date"] + ["acc_" + day_segment + "_" + x for x in features_to_compute])
|
||||
if not acc_data.empty:
|
||||
if day_segment != "daily":
|
||||
acc_data = acc_data[acc_data["local_day_segment"] == day_segment]
|
||||
if acc_data.empty:
|
||||
acc_features = pd.DataFrame(columns=["local_date"] + ["acc_" + day_segment + "_" + x for x in features_to_compute])
|
||||
else:
|
||||
|
||||
if not acc_data.empty:
|
||||
acc_features = pd.DataFrame()
|
||||
# get magnitude related features: magnitude = sqrt(x^2+y^2+z^2)
|
||||
magnitude = acc_data.apply(lambda row: np.sqrt(row["double_values_0"] ** 2 + row["double_values_1"] ** 2 + row["double_values_2"] ** 2), axis=1)
|
||||
|
|
|
@ -28,9 +28,8 @@ def base_applications_foreground_features(apps_data, day_segment, requested_feat
|
|||
# deep copy the apps_data for the top1global computation
|
||||
apps_data_global = apps_data.copy()
|
||||
|
||||
if apps_data.empty:
|
||||
apps_features = pd.DataFrame(columns=["local_date"] + ["apps_" + day_segment + "_" + x for x in ["".join(feature) for feature in itertools.product(requested_features, single_categories + multiple_categories + apps)]])
|
||||
else:
|
||||
apps_features = pd.DataFrame(columns=["local_date"] + ["apps_" + day_segment + "_" + x for x in ["".join(feature) for feature in itertools.product(requested_features, single_categories + multiple_categories + apps)]])
|
||||
if not apps_data.empty::
|
||||
if day_segment != "daily":
|
||||
apps_data =apps_data[apps_data["local_day_segment"] == day_segment]
|
||||
|
||||
|
|
|
@ -9,10 +9,8 @@ def base_ar_features(ar_data, ar_deltas, day_segment, requested_features):
|
|||
# the subset of requested features this function can compute
|
||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||
|
||||
if ar_data.empty:
|
||||
ar_features = pd.DataFrame(columns = ["local_date"] + ["ar_" + day_segment + "_" + x for x in features_to_compute])
|
||||
else:
|
||||
ar_features = pd.DataFrame()
|
||||
ar_features = pd.DataFrame(columns = ["local_date"] + ["ar_" + day_segment + "_" + x for x in features_to_compute])
|
||||
if not ar_data.empty:
|
||||
ar_deltas = splitOvernightEpisodes(ar_deltas, [],["activity"])
|
||||
|
||||
if day_segment != "daily":
|
||||
|
@ -22,25 +20,25 @@ def base_ar_features(ar_data, ar_deltas, day_segment, requested_features):
|
|||
resampledData = ar_data.set_index(ar_data.local_date_time)
|
||||
resampledData.drop(columns=["local_date_time"], inplace=True)
|
||||
|
||||
if(day_segment!="daily"):
|
||||
if day_segment != "daily":
|
||||
resampledData = resampledData.loc[resampledData["local_day_segment"] == day_segment]
|
||||
|
||||
if resampledData.empty:
|
||||
ar_features = pd.DataFrame(columns = ["ar_" + day_segment + "_" + x for x in features_to_compute])
|
||||
else:
|
||||
#Finding the count of samples of the day
|
||||
if not resampledData.empty:
|
||||
ar_features = pd.DataFrame()
|
||||
|
||||
# finding the count of samples of the day
|
||||
if "count" in features_to_compute:
|
||||
ar_features["ar_" + day_segment + "_count"] = resampledData["activity_type"].resample("D").count()
|
||||
|
||||
#Finding most common activity of the day
|
||||
# finding most common activity of the day
|
||||
if "mostcommonactivity" in features_to_compute:
|
||||
ar_features["ar_" + day_segment + "_mostcommonactivity"] = resampledData["activity_type"].resample("D").apply(lambda x: stats.mode(x)[0] if len(stats.mode(x)[0]) != 0 else None)
|
||||
|
||||
#finding different number of activities during a day
|
||||
# finding different number of activities during a day
|
||||
if "countuniqueactivities" in features_to_compute:
|
||||
ar_features["ar_" + day_segment + "_countuniqueactivities"] = resampledData["activity_type"].resample("D").nunique()
|
||||
|
||||
#finding Number of times activity changed
|
||||
# finding Number of times activity changed
|
||||
if "activitychangecount" in features_to_compute:
|
||||
resampledData["activity_type_shift"] = resampledData["activity_type"].shift().fillna(resampledData["activity_type"].head(1))
|
||||
resampledData["different_activity"] = np.where(resampledData["activity_type"]!=resampledData["activity_type_shift"],1,0)
|
||||
|
@ -55,7 +53,7 @@ def base_ar_features(ar_data, ar_deltas, day_segment, requested_features):
|
|||
if column in features_to_compute:
|
||||
ar_features["ar_" + day_segment + "_" + column] = ar_deltas[ar_deltas["activity"].isin(pd.Series(activity_labels))].groupby(["local_start_date"])["time_diff"].sum()
|
||||
|
||||
ar_features.index.names = ["local_date"]
|
||||
ar_features = ar_features.reset_index()
|
||||
ar_features.index.names = ["local_date"]
|
||||
ar_features = ar_features.reset_index()
|
||||
|
||||
return ar_features
|
||||
|
|
|
@ -9,40 +9,40 @@ def base_battery_features(battery_data, day_segment, requested_features):
|
|||
# the subset of requested features this function can compute
|
||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||
|
||||
if battery_data.empty:
|
||||
battery_features = pd.DataFrame(columns=["local_date"] + ["battery_" + day_segment + "_" + x for x in features_to_compute])
|
||||
else:
|
||||
battery_features = pd.DataFrame(columns=["local_date"] + ["battery_" + day_segment + "_" + x for x in features_to_compute])
|
||||
if not battery_data.empty:
|
||||
battery_data = splitOvernightEpisodes(battery_data, ["battery_diff"], [])
|
||||
|
||||
if day_segment != "daily":
|
||||
battery_data = splitMultiSegmentEpisodes(battery_data, day_segment, ["battery_diff"])
|
||||
|
||||
if not battery_data.empty:
|
||||
battery_data["battery_consumption_rate"] = battery_data["battery_diff"] / battery_data["time_diff"]
|
||||
|
||||
battery_data["battery_consumption_rate"] = battery_data["battery_diff"] / battery_data["time_diff"]
|
||||
# for battery_data_discharge:
|
||||
battery_data_discharge = battery_data[battery_data["battery_diff"] > 0]
|
||||
battery_discharge_features = pd.DataFrame()
|
||||
if "countdischarge" in features_to_compute:
|
||||
battery_discharge_features["battery_"+day_segment+"_countdischarge"] = battery_data_discharge.groupby(["local_start_date"])["local_start_date"].count()
|
||||
if "sumdurationdischarge" in features_to_compute:
|
||||
battery_discharge_features["battery_"+day_segment+"_sumdurationdischarge"] = battery_data_discharge.groupby(["local_start_date"])["time_diff"].sum()
|
||||
if "avgconsumptionrate" in features_to_compute:
|
||||
battery_discharge_features["battery_"+day_segment+"_avgconsumptionrate"] = battery_data_discharge.groupby(["local_start_date"])["battery_consumption_rate"].mean()
|
||||
if "maxconsumptionrate" in features_to_compute:
|
||||
battery_discharge_features["battery_"+day_segment+"_maxconsumptionrate"] = battery_data_discharge.groupby(["local_start_date"])["battery_consumption_rate"].max()
|
||||
|
||||
# for battery_data_discharge:
|
||||
battery_data_discharge = battery_data[battery_data["battery_diff"] > 0]
|
||||
battery_discharge_features = pd.DataFrame()
|
||||
if "countdischarge" in features_to_compute:
|
||||
battery_discharge_features["battery_"+day_segment+"_countdischarge"] = battery_data_discharge.groupby(["local_start_date"])["local_start_date"].count()
|
||||
if "sumdurationdischarge" in features_to_compute:
|
||||
battery_discharge_features["battery_"+day_segment+"_sumdurationdischarge"] = battery_data_discharge.groupby(["local_start_date"])["time_diff"].sum()
|
||||
if "avgconsumptionrate" in features_to_compute:
|
||||
battery_discharge_features["battery_"+day_segment+"_avgconsumptionrate"] = battery_data_discharge.groupby(["local_start_date"])["battery_consumption_rate"].mean()
|
||||
if "maxconsumptionrate" in features_to_compute:
|
||||
battery_discharge_features["battery_"+day_segment+"_maxconsumptionrate"] = battery_data_discharge.groupby(["local_start_date"])["battery_consumption_rate"].max()
|
||||
# for battery_data_charge:
|
||||
battery_data_charge = battery_data[battery_data["battery_diff"] <= 0]
|
||||
battery_charge_features = pd.DataFrame()
|
||||
if "countcharge" in features_to_compute:
|
||||
battery_charge_features["battery_"+day_segment+"_countcharge"] = battery_data_charge.groupby(["local_start_date"])["local_start_date"].count()
|
||||
if "sumdurationcharge" in features_to_compute:
|
||||
battery_charge_features["battery_"+day_segment+"_sumdurationcharge"] = battery_data_charge.groupby(["local_start_date"])["time_diff"].sum()
|
||||
|
||||
# for battery_data_charge:
|
||||
battery_data_charge = battery_data[battery_data["battery_diff"] <= 0]
|
||||
battery_charge_features = pd.DataFrame()
|
||||
if "countcharge" in features_to_compute:
|
||||
battery_charge_features["battery_"+day_segment+"_countcharge"] = battery_data_charge.groupby(["local_start_date"])["local_start_date"].count()
|
||||
if "sumdurationcharge" in features_to_compute:
|
||||
battery_charge_features["battery_"+day_segment+"_sumdurationcharge"] = battery_data_charge.groupby(["local_start_date"])["time_diff"].sum()
|
||||
# combine discharge features and charge features; fill the missing values with ZERO
|
||||
battery_features = pd.concat([battery_discharge_features, battery_charge_features], axis=1, sort=True).fillna(0)
|
||||
|
||||
# combine discharge features and charge features; fill the missing values with ZERO
|
||||
battery_features = pd.concat([battery_discharge_features, battery_charge_features], axis=1, sort=True).fillna(0)
|
||||
|
||||
battery_features.index.rename("local_date", inplace=True)
|
||||
battery_features = battery_features.reset_index()
|
||||
battery_features.index.rename("local_date", inplace=True)
|
||||
battery_features = battery_features.reset_index()
|
||||
|
||||
return battery_features
|
||||
|
|
|
@ -11,16 +11,12 @@ def base_conversation_features(conversation_data, day_segment, requested_feature
|
|||
# the subset of requested features this function can compute
|
||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||
|
||||
|
||||
if conversation_data.empty:
|
||||
conversation_features = pd.DataFrame(columns=["local_date"] + ["conversation_" + day_segment + "_" + x for x in features_to_compute])
|
||||
else:
|
||||
conversation_features = pd.DataFrame(columns=["local_date"] + ["conversation_" + day_segment + "_" + x for x in features_to_compute])
|
||||
if not conversation_data.empty:
|
||||
if day_segment != "daily":
|
||||
conversation_data = conversation_data[conversation_data["local_day_segment"] == day_segment]
|
||||
|
||||
if conversation_data.empty:
|
||||
conversation_features = pd.DataFrame(columns=["local_date"] + ["conversation_" + day_segment + "_" + x for x in features_to_compute])
|
||||
else:
|
||||
if not conversation_data.empty:
|
||||
conversation_features = pd.DataFrame()
|
||||
|
||||
conversation_data = conversation_data.drop_duplicates(subset = 'local_time', keep= first)
|
||||
|
|
|
@ -6,9 +6,8 @@ def base_light_features(light_data, day_segment, requested_features):
|
|||
# the subset of requested features this function can compute
|
||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||
|
||||
if light_data.empty:
|
||||
light_features = pd.DataFrame(columns=["local_date"] + ["light_" + day_segment + "_" + x for x in features_to_compute])
|
||||
else:
|
||||
light_features = pd.DataFrame(columns=["local_date"] + ["light_" + day_segment + "_" + x for x in features_to_compute])
|
||||
if not light_data.empty:
|
||||
if day_segment != "daily":
|
||||
light_data =light_data[light_data["local_day_segment"] == day_segment]
|
||||
|
||||
|
|
Loading…
Reference in New Issue