Merge branch 'feature/doryab_location_empty_df_fix' into develop
Adding the branch to fix infer home locations and empty dataframe.pull/130/head
commit
7815c380a2
|
@ -111,27 +111,30 @@ def haversine(lon1,lat1,lon2,lat2):
|
||||||
|
|
||||||
origDf = pd.read_csv(snakemake.input[0])
|
origDf = pd.read_csv(snakemake.input[0])
|
||||||
filteredDf = filterDatafromDf(origDf)
|
filteredDf = filterDatafromDf(origDf)
|
||||||
dbscan_eps = snakemake.params["dbscan_eps"]
|
if filteredDf.empty:
|
||||||
dbscan_minsamples = snakemake.params["dbscan_minsamples"]
|
filteredDf.to_csv(snakemake.output[0])
|
||||||
threshold_static = snakemake.params["threshold_static"]
|
|
||||||
clustering_algorithm = snakemake.params["clustering_algorithm"]
|
|
||||||
|
|
||||||
if clustering_algorithm == "DBSCAN":
|
|
||||||
hyperparameters = {'eps' : distance_to_degrees(dbscan_eps), 'min_samples': dbscan_minsamples}
|
|
||||||
elif clustering_algorithm == "OPTICS":
|
|
||||||
hyperparameters = {'max_eps': distance_to_degrees(dbscan_eps), 'min_samples': 2, 'metric':'euclidean', 'cluster_method' : 'dbscan'}
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("config[PHONE_LOCATIONS][HOME_INFERENCE][CLUSTERING ALGORITHM] only accepts DBSCAN or OPTICS but you provided ",clustering_algorithm)
|
dbscan_eps = snakemake.params["dbscan_eps"]
|
||||||
|
dbscan_minsamples = snakemake.params["dbscan_minsamples"]
|
||||||
|
threshold_static = snakemake.params["threshold_static"]
|
||||||
|
clustering_algorithm = snakemake.params["clustering_algorithm"]
|
||||||
|
|
||||||
filteredDf = cluster_and_label(filteredDf,clustering_algorithm,threshold_static,**hyperparameters)
|
if clustering_algorithm == "DBSCAN":
|
||||||
|
hyperparameters = {'eps' : distance_to_degrees(dbscan_eps), 'min_samples': dbscan_minsamples}
|
||||||
|
elif clustering_algorithm == "OPTICS":
|
||||||
|
hyperparameters = {'max_eps': distance_to_degrees(dbscan_eps), 'min_samples': 2, 'metric':'euclidean', 'cluster_method' : 'dbscan'}
|
||||||
|
else:
|
||||||
|
raise ValueError("config[PHONE_LOCATIONS][HOME_INFERENCE][CLUSTERING ALGORITHM] only accepts DBSCAN or OPTICS but you provided ",clustering_algorithm)
|
||||||
|
|
||||||
origDf['home_latitude'] = filteredDf[filteredDf['location_label']==1][['double_latitude','double_longitude']].mean()['double_latitude']
|
filteredDf = cluster_and_label(filteredDf,clustering_algorithm,threshold_static,**hyperparameters)
|
||||||
origDf['home_longitude'] = filteredDf[filteredDf['location_label']==1][['double_latitude','double_longitude']].mean()['double_longitude']
|
|
||||||
|
|
||||||
distanceFromHome = haversine(origDf.double_longitude,origDf.double_latitude,origDf.home_longitude,origDf.home_latitude)
|
origDf['home_latitude'] = filteredDf[filteredDf['location_label']==1][['double_latitude','double_longitude']].mean()['double_latitude']
|
||||||
|
origDf['home_longitude'] = filteredDf[filteredDf['location_label']==1][['double_latitude','double_longitude']].mean()['double_longitude']
|
||||||
|
|
||||||
finalDf = origDf.drop(['home_latitude','home_longitude'], axis=1)
|
distanceFromHome = haversine(origDf.double_longitude,origDf.double_latitude,origDf.home_longitude,origDf.home_latitude)
|
||||||
finalDf.insert(len(finalDf.columns)-1,'distancefromhome',distanceFromHome)
|
|
||||||
finalDf.to_csv(snakemake.output[0], index=False)
|
finalDf = origDf.drop(['home_latitude','home_longitude'], axis=1)
|
||||||
|
finalDf.insert(len(finalDf.columns)-1,'distancefromhome',distanceFromHome)
|
||||||
|
finalDf.to_csv(snakemake.output[0], index=False)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -188,12 +188,14 @@ def len_stay_timeattopn(locationData,maximum_gap_allowed,maximum_row_duration):
|
||||||
calculationDf.loc[calculationDf.timeInSeconds >= maximum_gap_allowed,'timeInSeconds'] = maximum_row_duration
|
calculationDf.loc[calculationDf.timeInSeconds >= maximum_gap_allowed,'timeInSeconds'] = maximum_row_duration
|
||||||
timeArray = calculationDf.groupby('location_label')['timeInSeconds'].sum().reset_index()['timeInSeconds'].sort_values(ascending=False)/60
|
timeArray = calculationDf.groupby('location_label')['timeInSeconds'].sum().reset_index()['timeInSeconds'].sort_values(ascending=False)/60
|
||||||
|
|
||||||
if len(timeArray) > 2:
|
if len(timeArray) == 3:
|
||||||
return (timeArray[0],timeArray[1],timeArray[2],timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean())
|
return (timeArray[0],timeArray[1],timeArray[2],timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean())
|
||||||
elif len(timeArray)==2:
|
elif len(timeArray)==2:
|
||||||
return (timeArray[0],timeArray[1],None,timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean())
|
return (timeArray[0],timeArray[1],None,timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean())
|
||||||
else:
|
elif len(timeArray)==1:
|
||||||
return (timeArray[0],None,None,timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean())
|
return (timeArray[0],None,None,timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean())
|
||||||
|
else:
|
||||||
|
return (None,None,None,timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean())
|
||||||
|
|
||||||
|
|
||||||
def getMinutesData(locationData):
|
def getMinutesData(locationData):
|
||||||
|
|
Loading…
Reference in New Issue