Fix the bug of feature extraction code for some sensors: break when there is no data for an epoch

pull/95/head
Meng Li 2020-06-26 11:25:25 -04:00
parent e66ed6a79b
commit 17f41588d8
6 changed files with 50 additions and 60 deletions

View File

@ -70,14 +70,12 @@ def base_accelerometer_features(acc_data, day_segment, requested_features, valid
features_to_compute = features_to_compute_magnitude + features_to_compute_exertionalactivityepisode + features_to_compute_nonexertionalactivityepisode + (["validsensedminutes"] if valid_sensed_minutes else [])
if acc_data.empty:
acc_features = pd.DataFrame(columns=["local_date"] + ["acc_" + day_segment + "_" + x for x in features_to_compute])
else:
acc_features = pd.DataFrame(columns=["local_date"] + ["acc_" + day_segment + "_" + x for x in features_to_compute])
if not acc_data.empty:
if day_segment != "daily":
acc_data = acc_data[acc_data["local_day_segment"] == day_segment]
if acc_data.empty:
acc_features = pd.DataFrame(columns=["local_date"] + ["acc_" + day_segment + "_" + x for x in features_to_compute])
else:
if not acc_data.empty:
acc_features = pd.DataFrame()
# get magnitude related features: magnitude = sqrt(x^2+y^2+z^2)
magnitude = acc_data.apply(lambda row: np.sqrt(row["double_values_0"] ** 2 + row["double_values_1"] ** 2 + row["double_values_2"] ** 2), axis=1)

View File

@ -28,9 +28,8 @@ def base_applications_foreground_features(apps_data, day_segment, requested_feat
# deep copy the apps_data for the top1global computation
apps_data_global = apps_data.copy()
if apps_data.empty:
apps_features = pd.DataFrame(columns=["local_date"] + ["apps_" + day_segment + "_" + x for x in ["".join(feature) for feature in itertools.product(requested_features, single_categories + multiple_categories + apps)]])
else:
apps_features = pd.DataFrame(columns=["local_date"] + ["apps_" + day_segment + "_" + x for x in ["".join(feature) for feature in itertools.product(requested_features, single_categories + multiple_categories + apps)]])
if not apps_data.empty::
if day_segment != "daily":
apps_data =apps_data[apps_data["local_day_segment"] == day_segment]

View File

@ -9,10 +9,8 @@ def base_ar_features(ar_data, ar_deltas, day_segment, requested_features):
# the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names))
if ar_data.empty:
ar_features = pd.DataFrame(columns = ["local_date"] + ["ar_" + day_segment + "_" + x for x in features_to_compute])
else:
ar_features = pd.DataFrame()
ar_features = pd.DataFrame(columns = ["local_date"] + ["ar_" + day_segment + "_" + x for x in features_to_compute])
if not ar_data.empty:
ar_deltas = splitOvernightEpisodes(ar_deltas, [],["activity"])
if day_segment != "daily":
@ -22,25 +20,25 @@ def base_ar_features(ar_data, ar_deltas, day_segment, requested_features):
resampledData = ar_data.set_index(ar_data.local_date_time)
resampledData.drop(columns=["local_date_time"], inplace=True)
if(day_segment!="daily"):
if day_segment != "daily":
resampledData = resampledData.loc[resampledData["local_day_segment"] == day_segment]
if resampledData.empty:
ar_features = pd.DataFrame(columns = ["ar_" + day_segment + "_" + x for x in features_to_compute])
else:
#Finding the count of samples of the day
if not resampledData.empty:
ar_features = pd.DataFrame()
# finding the count of samples of the day
if "count" in features_to_compute:
ar_features["ar_" + day_segment + "_count"] = resampledData["activity_type"].resample("D").count()
#Finding most common activity of the day
# finding most common activity of the day
if "mostcommonactivity" in features_to_compute:
ar_features["ar_" + day_segment + "_mostcommonactivity"] = resampledData["activity_type"].resample("D").apply(lambda x: stats.mode(x)[0] if len(stats.mode(x)[0]) != 0 else None)
#finding different number of activities during a day
# finding different number of activities during a day
if "countuniqueactivities" in features_to_compute:
ar_features["ar_" + day_segment + "_countuniqueactivities"] = resampledData["activity_type"].resample("D").nunique()
#finding Number of times activity changed
# finding Number of times activity changed
if "activitychangecount" in features_to_compute:
resampledData["activity_type_shift"] = resampledData["activity_type"].shift().fillna(resampledData["activity_type"].head(1))
resampledData["different_activity"] = np.where(resampledData["activity_type"]!=resampledData["activity_type_shift"],1,0)
@ -55,7 +53,7 @@ def base_ar_features(ar_data, ar_deltas, day_segment, requested_features):
if column in features_to_compute:
ar_features["ar_" + day_segment + "_" + column] = ar_deltas[ar_deltas["activity"].isin(pd.Series(activity_labels))].groupby(["local_start_date"])["time_diff"].sum()
ar_features.index.names = ["local_date"]
ar_features = ar_features.reset_index()
ar_features.index.names = ["local_date"]
ar_features = ar_features.reset_index()
return ar_features

View File

@ -9,40 +9,40 @@ def base_battery_features(battery_data, day_segment, requested_features):
# the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names))
if battery_data.empty:
battery_features = pd.DataFrame(columns=["local_date"] + ["battery_" + day_segment + "_" + x for x in features_to_compute])
else:
battery_features = pd.DataFrame(columns=["local_date"] + ["battery_" + day_segment + "_" + x for x in features_to_compute])
if not battery_data.empty:
battery_data = splitOvernightEpisodes(battery_data, ["battery_diff"], [])
if day_segment != "daily":
battery_data = splitMultiSegmentEpisodes(battery_data, day_segment, ["battery_diff"])
if not battery_data.empty:
battery_data["battery_consumption_rate"] = battery_data["battery_diff"] / battery_data["time_diff"]
battery_data["battery_consumption_rate"] = battery_data["battery_diff"] / battery_data["time_diff"]
# for battery_data_discharge:
battery_data_discharge = battery_data[battery_data["battery_diff"] > 0]
battery_discharge_features = pd.DataFrame()
if "countdischarge" in features_to_compute:
battery_discharge_features["battery_"+day_segment+"_countdischarge"] = battery_data_discharge.groupby(["local_start_date"])["local_start_date"].count()
if "sumdurationdischarge" in features_to_compute:
battery_discharge_features["battery_"+day_segment+"_sumdurationdischarge"] = battery_data_discharge.groupby(["local_start_date"])["time_diff"].sum()
if "avgconsumptionrate" in features_to_compute:
battery_discharge_features["battery_"+day_segment+"_avgconsumptionrate"] = battery_data_discharge.groupby(["local_start_date"])["battery_consumption_rate"].mean()
if "maxconsumptionrate" in features_to_compute:
battery_discharge_features["battery_"+day_segment+"_maxconsumptionrate"] = battery_data_discharge.groupby(["local_start_date"])["battery_consumption_rate"].max()
# for battery_data_discharge:
battery_data_discharge = battery_data[battery_data["battery_diff"] > 0]
battery_discharge_features = pd.DataFrame()
if "countdischarge" in features_to_compute:
battery_discharge_features["battery_"+day_segment+"_countdischarge"] = battery_data_discharge.groupby(["local_start_date"])["local_start_date"].count()
if "sumdurationdischarge" in features_to_compute:
battery_discharge_features["battery_"+day_segment+"_sumdurationdischarge"] = battery_data_discharge.groupby(["local_start_date"])["time_diff"].sum()
if "avgconsumptionrate" in features_to_compute:
battery_discharge_features["battery_"+day_segment+"_avgconsumptionrate"] = battery_data_discharge.groupby(["local_start_date"])["battery_consumption_rate"].mean()
if "maxconsumptionrate" in features_to_compute:
battery_discharge_features["battery_"+day_segment+"_maxconsumptionrate"] = battery_data_discharge.groupby(["local_start_date"])["battery_consumption_rate"].max()
# for battery_data_charge:
battery_data_charge = battery_data[battery_data["battery_diff"] <= 0]
battery_charge_features = pd.DataFrame()
if "countcharge" in features_to_compute:
battery_charge_features["battery_"+day_segment+"_countcharge"] = battery_data_charge.groupby(["local_start_date"])["local_start_date"].count()
if "sumdurationcharge" in features_to_compute:
battery_charge_features["battery_"+day_segment+"_sumdurationcharge"] = battery_data_charge.groupby(["local_start_date"])["time_diff"].sum()
# for battery_data_charge:
battery_data_charge = battery_data[battery_data["battery_diff"] <= 0]
battery_charge_features = pd.DataFrame()
if "countcharge" in features_to_compute:
battery_charge_features["battery_"+day_segment+"_countcharge"] = battery_data_charge.groupby(["local_start_date"])["local_start_date"].count()
if "sumdurationcharge" in features_to_compute:
battery_charge_features["battery_"+day_segment+"_sumdurationcharge"] = battery_data_charge.groupby(["local_start_date"])["time_diff"].sum()
# combine discharge features and charge features; fill the missing values with ZERO
battery_features = pd.concat([battery_discharge_features, battery_charge_features], axis=1, sort=True).fillna(0)
# combine discharge features and charge features; fill the missing values with ZERO
battery_features = pd.concat([battery_discharge_features, battery_charge_features], axis=1, sort=True).fillna(0)
battery_features.index.rename("local_date", inplace=True)
battery_features = battery_features.reset_index()
battery_features.index.rename("local_date", inplace=True)
battery_features = battery_features.reset_index()
return battery_features

View File

@ -11,16 +11,12 @@ def base_conversation_features(conversation_data, day_segment, requested_feature
# the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names))
if conversation_data.empty:
conversation_features = pd.DataFrame(columns=["local_date"] + ["conversation_" + day_segment + "_" + x for x in features_to_compute])
else:
conversation_features = pd.DataFrame(columns=["local_date"] + ["conversation_" + day_segment + "_" + x for x in features_to_compute])
if not conversation_data.empty:
if day_segment != "daily":
conversation_data = conversation_data[conversation_data["local_day_segment"] == day_segment]
if conversation_data.empty:
conversation_features = pd.DataFrame(columns=["local_date"] + ["conversation_" + day_segment + "_" + x for x in features_to_compute])
else:
if not conversation_data.empty:
conversation_features = pd.DataFrame()
conversation_data = conversation_data.drop_duplicates(subset = 'local_time', keep= first)

View File

@ -6,9 +6,8 @@ def base_light_features(light_data, day_segment, requested_features):
# the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names))
if light_data.empty:
light_features = pd.DataFrame(columns=["local_date"] + ["light_" + day_segment + "_" + x for x in features_to_compute])
else:
light_features = pd.DataFrame(columns=["local_date"] + ["light_" + day_segment + "_" + x for x in features_to_compute])
if not light_data.empty:
if day_segment != "daily":
light_data =light_data[light_data["local_day_segment"] == day_segment]