Fix location_doryab bug: same location all day

pull/95/head
Meng Li 2020-07-21 15:47:48 -04:00
parent edd46f5d17
commit c86efb19d6
1 changed files with 6 additions and 14 deletions

View File

@ -25,14 +25,11 @@ def base_location_features(location_data, day_segment, requested_features, dbsca
location_data = location_data[(location_data['double_latitude']!=0.0) & (location_data['double_longitude']!=0.0)] location_data = location_data[(location_data['double_latitude']!=0.0) & (location_data['double_longitude']!=0.0)]
# exclude dates when "double_latitude" and "double_longitude" values are constant
location_data = dropDatesSameLocationAllDay(location_data)
if "locationvariance" in features_to_compute: if "locationvariance" in features_to_compute:
location_features["location_" + day_segment + "_locationvariance"] = location_data.groupby(['local_date'])['double_latitude'].var() + location_data.groupby(['local_date'])['double_longitude'].var() location_features["location_" + day_segment + "_locationvariance"] = location_data.groupby(['local_date'])['double_latitude'].var() + location_data.groupby(['local_date'])['double_longitude'].var()
if "loglocationvariance" in features_to_compute: if "loglocationvariance" in features_to_compute:
location_features["location_" + day_segment + "_loglocationvariance"] = np.log10(location_data.groupby(['local_date'])['double_latitude'].var() + location_data.groupby(['local_date'])['double_longitude'].var()) location_features["location_" + day_segment + "_loglocationvariance"] = (location_data.groupby(['local_date'])['double_latitude'].var() + location_data.groupby(['local_date'])['double_longitude'].var()).apply(lambda x: np.log10(x) if x > 0 else None)
preComputedDistanceandSpeed = pd.DataFrame() preComputedDistanceandSpeed = pd.DataFrame()
@ -128,13 +125,6 @@ def base_location_features(location_data, day_segment, requested_features, dbsca
return location_features return location_features
def dropDatesSameLocationAllDay(data):
data_grouped = data.groupby(["local_date"])["double_latitude", "double_longitude"].var()
drop_dates = data_grouped[((data_grouped["double_latitude"] == 0) & (data_grouped["double_longitude"] == 0)) | (data_grouped["double_latitude"].isnull()) | (data_grouped["double_longitude"].isnull())].index
data.set_index(["local_date"], inplace = True)
if not drop_dates.empty:
data.drop(drop_dates, axis = 0, inplace = True)
return data.reset_index()
def distance_to_degrees(d): def distance_to_degrees(d):
#Just an approximation, but speeds up clustering by a huge amount and doesnt introduce much error #Just an approximation, but speeds up clustering by a huge amount and doesnt introduce much error
@ -156,9 +146,11 @@ def get_all_travel_distances_meters_speed(locationData,threshold):
lat_lon_temp['time_diff'] = lat_lon_temp['time_after'] - lat_lon_temp['time_before'] lat_lon_temp['time_diff'] = lat_lon_temp['time_after'] - lat_lon_temp['time_before']
lat_lon_temp['timeInSeconds'] = lat_lon_temp['time_diff'].apply(lambda x: x.total_seconds()) lat_lon_temp['timeInSeconds'] = lat_lon_temp['time_diff'].apply(lambda x: x.total_seconds())
lat_lon_temp = lat_lon_temp[lat_lon_temp['timeInSeconds'] <= 300] lat_lon_temp = lat_lon_temp[lat_lon_temp['timeInSeconds'] <= 300]
# if lat_lon_temp.empty:
# return pd.Series([0]), pd.DataFrame({"speed": [0], "speedTag": ["Static"]}) if lat_lon_temp.empty:
return pd.Series(), pd.DataFrame({"speed": [], "speedTag": []})
lat_lon_temp['distances'] = lat_lon_temp.apply(haversine, axis=1) # meters lat_lon_temp['distances'] = lat_lon_temp.apply(haversine, axis=1) # meters
lat_lon_temp['speed'] = (lat_lon_temp['distances'] / lat_lon_temp['timeInSeconds'] ) lat_lon_temp['speed'] = (lat_lon_temp['distances'] / lat_lon_temp['timeInSeconds'] )
lat_lon_temp['speed'] = lat_lon_temp['speed'].replace(np.inf, np.nan) * 3.6 lat_lon_temp['speed'] = lat_lon_temp['speed'].replace(np.inf, np.nan) * 3.6