Improved code for radius of gyration in location features.
parent
84f9d41b4f
commit
8cfe2dabb7
|
@ -320,23 +320,25 @@ def radius_of_gyration(locationData):
|
|||
return None
|
||||
# Center is the centroid, not the home location
|
||||
valid_clusters = locationData[locationData["location_label"] != -1]
|
||||
centroid_ce = (valid_clusters.groupby('location_label')[['double_latitude','double_longitude']].mean()).mean()
|
||||
centroid_clusters = valid_clusters.groupby('location_label')[['double_latitude','double_longitude']].mean()
|
||||
centroid_all_clusters = (valid_clusters.groupby('location_label')[['double_latitude','double_longitude']].mean()).mean()
|
||||
clusters_centroid = valid_clusters.groupby('location_label')[['double_latitude','double_longitude']].mean()
|
||||
|
||||
finalX = 0
|
||||
for labels in centroid_clusters.index:
|
||||
lon1, lat1, lon2, lat2 = centroid_clusters.loc[labels].double_longitude, centroid_clusters.loc[labels].double_latitude,centroid_ce.double_longitude, centroid_ce.double_latitude
|
||||
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
|
||||
rog = 0
|
||||
for labels in clusters_centroid.index:
|
||||
lat_lon_dict = dict()
|
||||
lat_lon_dict['_lon_before'] = clusters_centroid.loc[labels].double_longitude
|
||||
lat_lon_dict['_lat_before'] = clusters_centroid.loc[labels].double_latitude
|
||||
lat_lon_dict['_lon_after'] = centroid_all_clusters.double_longitude
|
||||
lat_lon_dict['_lat_after'] = centroid_all_clusters.double_latitude
|
||||
|
||||
dlon = lon2 - lon1
|
||||
dlat = lat2 - lat1
|
||||
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
|
||||
c = 2 * asin(sqrt(a))
|
||||
r = 6371 # Radius of earth in kilometers. Use 3956 for miles
|
||||
distance = (c * r * 1000) ** 2
|
||||
finalX = finalX + (locationData[locationData["location_label"]==labels].shape[0] * distance)
|
||||
distance = haversine(lat_lon_dict) ** 2
|
||||
|
||||
return np.sqrt(valid_clusters.shape[0]*finalX)
|
||||
time_in_cluster = locationData[locationData["location_label"]==labels].shape[0]
|
||||
rog = rog + (time_in_cluster * distance)
|
||||
time_all_clusters = valid_clusters.shape[0]
|
||||
final_rog = (1/time_all_clusters) * rog
|
||||
|
||||
return np.sqrt(final_rog)
|
||||
|
||||
def time_at_topn_clusters_in_group(locationData,n): # relevant only for global location features since, top3_clusters = top3_clusters_in_group for local
|
||||
|
||||
|
|
Loading…
Reference in New Issue