diff --git a/src/features/location_doryab/location_base.py b/src/features/location_doryab/location_base.py index be4d3ed3..1a8aee82 100644 --- a/src/features/location_doryab/location_base.py +++ b/src/features/location_doryab/location_base.py @@ -25,14 +25,11 @@ def base_location_features(location_data, day_segment, requested_features, dbsca location_data = location_data[(location_data['double_latitude']!=0.0) & (location_data['double_longitude']!=0.0)] - # exclude dates when "double_latitude" and "double_longitude" values are constant - location_data = dropDatesSameLocationAllDay(location_data) - if "locationvariance" in features_to_compute: location_features["location_" + day_segment + "_locationvariance"] = location_data.groupby(['local_date'])['double_latitude'].var() + location_data.groupby(['local_date'])['double_longitude'].var() if "loglocationvariance" in features_to_compute: - location_features["location_" + day_segment + "_loglocationvariance"] = np.log10(location_data.groupby(['local_date'])['double_latitude'].var() + location_data.groupby(['local_date'])['double_longitude'].var()) + location_features["location_" + day_segment + "_loglocationvariance"] = (location_data.groupby(['local_date'])['double_latitude'].var() + location_data.groupby(['local_date'])['double_longitude'].var()).apply(lambda x: np.log10(x) if x > 0 else None) preComputedDistanceandSpeed = pd.DataFrame() @@ -128,13 +125,6 @@ def base_location_features(location_data, day_segment, requested_features, dbsca return location_features -def dropDatesSameLocationAllDay(data): - data_grouped = data.groupby(["local_date"])["double_latitude", "double_longitude"].var() - drop_dates = data_grouped[((data_grouped["double_latitude"] == 0) & (data_grouped["double_longitude"] == 0)) | (data_grouped["double_latitude"].isnull()) | (data_grouped["double_longitude"].isnull())].index - data.set_index(["local_date"], inplace = True) - if not drop_dates.empty: - data.drop(drop_dates, axis = 0, inplace = True) - return data.reset_index() def distance_to_degrees(d): #Just an approximation, but speeds up clustering by a huge amount and doesnt introduce much error @@ -156,9 +146,11 @@ def get_all_travel_distances_meters_speed(locationData,threshold): lat_lon_temp['time_diff'] = lat_lon_temp['time_after'] - lat_lon_temp['time_before'] lat_lon_temp['timeInSeconds'] = lat_lon_temp['time_diff'].apply(lambda x: x.total_seconds()) - lat_lon_temp = lat_lon_temp[lat_lon_temp['timeInSeconds'] <= 300] - # if lat_lon_temp.empty: - # return pd.Series([0]), pd.DataFrame({"speed": [0], "speedTag": ["Static"]}) + lat_lon_temp = lat_lon_temp[lat_lon_temp['timeInSeconds'] <= 300] + + if lat_lon_temp.empty: + return pd.Series(), pd.DataFrame({"speed": [], "speedTag": []}) + lat_lon_temp['distances'] = lat_lon_temp.apply(haversine, axis=1) # meters lat_lon_temp['speed'] = (lat_lon_temp['distances'] / lat_lon_temp['timeInSeconds'] ) lat_lon_temp['speed'] = lat_lon_temp['speed'].replace(np.inf, np.nan) * 3.6