Fix location_doryab bug: same location all day
parent
edd46f5d17
commit
c86efb19d6
|
@ -25,14 +25,11 @@ def base_location_features(location_data, day_segment, requested_features, dbsca
|
|||
|
||||
location_data = location_data[(location_data['double_latitude']!=0.0) & (location_data['double_longitude']!=0.0)]
|
||||
|
||||
# exclude dates when "double_latitude" and "double_longitude" values are constant
|
||||
location_data = dropDatesSameLocationAllDay(location_data)
|
||||
|
||||
if "locationvariance" in features_to_compute:
|
||||
location_features["location_" + day_segment + "_locationvariance"] = location_data.groupby(['local_date'])['double_latitude'].var() + location_data.groupby(['local_date'])['double_longitude'].var()
|
||||
|
||||
if "loglocationvariance" in features_to_compute:
|
||||
location_features["location_" + day_segment + "_loglocationvariance"] = np.log10(location_data.groupby(['local_date'])['double_latitude'].var() + location_data.groupby(['local_date'])['double_longitude'].var())
|
||||
location_features["location_" + day_segment + "_loglocationvariance"] = (location_data.groupby(['local_date'])['double_latitude'].var() + location_data.groupby(['local_date'])['double_longitude'].var()).apply(lambda x: np.log10(x) if x > 0 else None)
|
||||
|
||||
|
||||
preComputedDistanceandSpeed = pd.DataFrame()
|
||||
|
@ -128,13 +125,6 @@ def base_location_features(location_data, day_segment, requested_features, dbsca
|
|||
|
||||
return location_features
|
||||
|
||||
def dropDatesSameLocationAllDay(data):
|
||||
data_grouped = data.groupby(["local_date"])["double_latitude", "double_longitude"].var()
|
||||
drop_dates = data_grouped[((data_grouped["double_latitude"] == 0) & (data_grouped["double_longitude"] == 0)) | (data_grouped["double_latitude"].isnull()) | (data_grouped["double_longitude"].isnull())].index
|
||||
data.set_index(["local_date"], inplace = True)
|
||||
if not drop_dates.empty:
|
||||
data.drop(drop_dates, axis = 0, inplace = True)
|
||||
return data.reset_index()
|
||||
|
||||
def distance_to_degrees(d):
|
||||
#Just an approximation, but speeds up clustering by a huge amount and doesnt introduce much error
|
||||
|
@ -156,9 +146,11 @@ def get_all_travel_distances_meters_speed(locationData,threshold):
|
|||
lat_lon_temp['time_diff'] = lat_lon_temp['time_after'] - lat_lon_temp['time_before']
|
||||
lat_lon_temp['timeInSeconds'] = lat_lon_temp['time_diff'].apply(lambda x: x.total_seconds())
|
||||
|
||||
lat_lon_temp = lat_lon_temp[lat_lon_temp['timeInSeconds'] <= 300]
|
||||
# if lat_lon_temp.empty:
|
||||
# return pd.Series([0]), pd.DataFrame({"speed": [0], "speedTag": ["Static"]})
|
||||
lat_lon_temp = lat_lon_temp[lat_lon_temp['timeInSeconds'] <= 300]
|
||||
|
||||
if lat_lon_temp.empty:
|
||||
return pd.Series(), pd.DataFrame({"speed": [], "speedTag": []})
|
||||
|
||||
lat_lon_temp['distances'] = lat_lon_temp.apply(haversine, axis=1) # meters
|
||||
lat_lon_temp['speed'] = (lat_lon_temp['distances'] / lat_lon_temp['timeInSeconds'] )
|
||||
lat_lon_temp['speed'] = lat_lon_temp['speed'].replace(np.inf, np.nan) * 3.6
|
||||
|
|
Loading…
Reference in New Issue