Doryab bluetooth and locations features fill in NaN values.
parent
3cf7ca41aa
commit
18002f59e1
|
@ -15,8 +15,8 @@ def deviceFeatures(devices, ownership, common_devices, features_to_compute, feat
|
||||||
if "meanscans" in features_to_compute:
|
if "meanscans" in features_to_compute:
|
||||||
features = features.join(device_value_counts.groupby("local_segment")["scans"].mean().to_frame("meanscans" + ownership), how="outer")
|
features = features.join(device_value_counts.groupby("local_segment")["scans"].mean().to_frame("meanscans" + ownership), how="outer")
|
||||||
if "stdscans" in features_to_compute:
|
if "stdscans" in features_to_compute:
|
||||||
# TODO: std scans
|
# TODO: check if std scans implementation works
|
||||||
features = features.join(device_value_counts.groupby("local_segment")["scans"].std().to_frame("stdscans" + ownership), how="outer")
|
features = features.join(device_value_counts.groupby("local_segment")["scans"].std().to_frame("stdscans" + ownership).fillna(0), how="outer")
|
||||||
# Most frequent device within segments, across segments, and across dataset
|
# Most frequent device within segments, across segments, and across dataset
|
||||||
if "countscansmostfrequentdevicewithinsegments" in features_to_compute:
|
if "countscansmostfrequentdevicewithinsegments" in features_to_compute:
|
||||||
features = features.join(device_value_counts.groupby("local_segment")["scans"].max().to_frame("countscansmostfrequentdevicewithinsegments" + ownership), how="outer")
|
features = features.join(device_value_counts.groupby("local_segment")["scans"].max().to_frame("countscansmostfrequentdevicewithinsegments" + ownership), how="outer")
|
||||||
|
|
|
@ -36,9 +36,10 @@ def variance_and_logvariance_features(location_data, location_features):
|
||||||
location_data["latitude_for_wvar"] = (location_data["double_latitude"] - location_data["latitude_wavg"]) ** 2 * location_data["duration"] * 60
|
location_data["latitude_for_wvar"] = (location_data["double_latitude"] - location_data["latitude_wavg"]) ** 2 * location_data["duration"] * 60
|
||||||
location_data["longitude_for_wvar"] = (location_data["double_longitude"] - location_data["longitude_wavg"]) ** 2 * location_data["duration"] * 60
|
location_data["longitude_for_wvar"] = (location_data["double_longitude"] - location_data["longitude_wavg"]) ** 2 * location_data["duration"] * 60
|
||||||
|
|
||||||
# TODO: location variance
|
|
||||||
location_features["locationvariance"] = ((location_data_grouped["latitude_for_wvar"].sum() + location_data_grouped["longitude_for_wvar"].sum()) / (location_data_grouped["duration"].sum() * 60 - 1)).fillna(0)
|
location_features["locationvariance"] = ((location_data_grouped["latitude_for_wvar"].sum() + location_data_grouped["longitude_for_wvar"].sum()) / (location_data_grouped["duration"].sum() * 60 - 1)).fillna(0)
|
||||||
location_features["loglocationvariance"] = np.log10(location_features["locationvariance"]).replace(-np.inf, np.nan)
|
|
||||||
|
# TODO: loglocationvariance - check if the implementation works
|
||||||
|
location_features["loglocationvariance"] = np.log10(location_features["locationvariance"]).replace(-np.inf, -1000000)
|
||||||
|
|
||||||
return location_features
|
return location_features
|
||||||
|
|
||||||
|
@ -113,8 +114,6 @@ def location_entropy(location_data):
|
||||||
entropy = -1 * location_data.groupby(["local_segment"])[["plogp"]].sum().rename(columns={"plogp": "locationentropy"})
|
entropy = -1 * location_data.groupby(["local_segment"])[["plogp"]].sum().rename(columns={"plogp": "locationentropy"})
|
||||||
|
|
||||||
entropy["num_clusters"] = location_data.groupby(["local_segment"])["cluster_label"].nunique()
|
entropy["num_clusters"] = location_data.groupby(["local_segment"])["cluster_label"].nunique()
|
||||||
|
|
||||||
# TODO: normalizedlocationentropy
|
|
||||||
entropy["normalizedlocationentropy"] = entropy["locationentropy"] / entropy["num_clusters"]
|
entropy["normalizedlocationentropy"] = entropy["locationentropy"] / entropy["num_clusters"]
|
||||||
|
|
||||||
return entropy
|
return entropy
|
||||||
|
@ -156,7 +155,6 @@ def doryab_features(sensor_data_files, time_segment, provider, filter_data_by_se
|
||||||
# distance and speed features
|
# distance and speed features
|
||||||
moving_data = location_data[location_data["is_stationary"] == 0].copy()
|
moving_data = location_data[location_data["is_stationary"] == 0].copy()
|
||||||
location_features = location_features.merge(distance_and_speed_features(moving_data), how="outer", left_index=True, right_index=True)
|
location_features = location_features.merge(distance_and_speed_features(moving_data), how="outer", left_index=True, right_index=True)
|
||||||
# TODO: zakaj se ne zapolni varspeed z 0?
|
|
||||||
location_features[["totaldistance", "avgspeed", "varspeed"]] = location_features[["totaldistance", "avgspeed", "varspeed"]].fillna(0)
|
location_features[["totaldistance", "avgspeed", "varspeed"]] = location_features[["totaldistance", "avgspeed", "varspeed"]].fillna(0)
|
||||||
|
|
||||||
# stationary features
|
# stationary features
|
||||||
|
|
|
@ -13,9 +13,18 @@ df_bt = df[doryab_cols_bt]
|
||||||
print(len(doryab_cols_bt))
|
print(len(doryab_cols_bt))
|
||||||
print(df_bt)
|
print(df_bt)
|
||||||
|
|
||||||
sns.heatmap(df_bt, xticklabels=1)
|
df_bt = df_bt.dropna(axis=0, how="all")
|
||||||
|
sns.heatmap(df_bt.isna(), xticklabels=1)
|
||||||
plt.savefig(f'bluetooth_doryab_values', bbox_inches='tight')
|
plt.savefig(f'bluetooth_doryab_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_bt:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_bt[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_bluetooth_doryab_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
# Location
|
# Location
|
||||||
doryab_cols_loc = [col for col in df.columns if "locations_doryab" in col]
|
doryab_cols_loc = [col for col in df.columns if "locations_doryab" in col]
|
||||||
df_loc = df[doryab_cols_loc]
|
df_loc = df[doryab_cols_loc]
|
||||||
|
@ -23,6 +32,20 @@ df_loc = df[doryab_cols_loc]
|
||||||
print(len(doryab_cols_loc))
|
print(len(doryab_cols_loc))
|
||||||
print(df_loc)
|
print(df_loc)
|
||||||
|
|
||||||
sns.heatmap(df_loc, xticklabels=1)
|
df_loc = df_loc.dropna(axis=0, how="all").reset_index(drop=True)
|
||||||
|
print(df_loc)
|
||||||
|
sns.heatmap(df_loc.isna())
|
||||||
plt.savefig(f'locations_doryab_values', bbox_inches='tight')
|
plt.savefig(f'locations_doryab_values', bbox_inches='tight')
|
||||||
|
|
||||||
|
df_q = pd.DataFrame()
|
||||||
|
for col in df_loc:
|
||||||
|
df_q[col] = pd.to_numeric(pd.cut(df_loc[col], bins=[-1,0,0.000000000001,1000], labels=[-1,0,1], right=False))
|
||||||
|
|
||||||
|
sns.heatmap(df_q, cbar=False, xticklabels=1)
|
||||||
|
plt.savefig(f'cut_location_doryab_values', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
plt.plot(df_loc['phone_locations_doryab_loglocationvariance'])
|
||||||
|
plt.savefig(f'phone_locations_doryab_loglocationvariance', bbox_inches='tight')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue