Improved code for radius of gyration in location features.

pull/95/head
nikunjgoel95 2020-08-05 12:06:39 -04:00
parent 84f9d41b4f
commit 8cfe2dabb7
1 changed files with 17 additions and 15 deletions

View File

@ -320,23 +320,25 @@ def radius_of_gyration(locationData):
return None return None
# Center is the centroid, not the home location # Center is the centroid, not the home location
valid_clusters = locationData[locationData["location_label"] != -1] valid_clusters = locationData[locationData["location_label"] != -1]
centroid_ce = (valid_clusters.groupby('location_label')[['double_latitude','double_longitude']].mean()).mean() centroid_all_clusters = (valid_clusters.groupby('location_label')[['double_latitude','double_longitude']].mean()).mean()
centroid_clusters = valid_clusters.groupby('location_label')[['double_latitude','double_longitude']].mean() clusters_centroid = valid_clusters.groupby('location_label')[['double_latitude','double_longitude']].mean()
finalX = 0 rog = 0
for labels in centroid_clusters.index: for labels in clusters_centroid.index:
lon1, lat1, lon2, lat2 = centroid_clusters.loc[labels].double_longitude, centroid_clusters.loc[labels].double_latitude,centroid_ce.double_longitude, centroid_ce.double_latitude lat_lon_dict = dict()
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) lat_lon_dict['_lon_before'] = clusters_centroid.loc[labels].double_longitude
lat_lon_dict['_lat_before'] = clusters_centroid.loc[labels].double_latitude
lat_lon_dict['_lon_after'] = centroid_all_clusters.double_longitude
lat_lon_dict['_lat_after'] = centroid_all_clusters.double_latitude
dlon = lon2 - lon1 distance = haversine(lat_lon_dict) ** 2
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371 # Radius of earth in kilometers. Use 3956 for miles
distance = (c * r * 1000) ** 2
finalX = finalX + (locationData[locationData["location_label"]==labels].shape[0] * distance)
return np.sqrt(valid_clusters.shape[0]*finalX) time_in_cluster = locationData[locationData["location_label"]==labels].shape[0]
rog = rog + (time_in_cluster * distance)
time_all_clusters = valid_clusters.shape[0]
final_rog = (1/time_all_clusters) * rog
return np.sqrt(final_rog)
def time_at_topn_clusters_in_group(locationData,n): # relevant only for global location features since, top3_clusters = top3_clusters_in_group for local def time_at_topn_clusters_in_group(locationData,n): # relevant only for global location features since, top3_clusters = top3_clusters_in_group for local