From cfc50399181a11a9601def8dc212aab9f614f509 Mon Sep 17 00:00:00 2001 From: nikunjgoel95 Date: Thu, 18 Mar 2021 18:44:11 -0400 Subject: [PATCH] Fixed the empty dataframe case in infer_home_locations.py and added array condition in doryab location --- src/data/infer_home_location.py | 37 +++++++++++---------- src/features/phone_locations/doryab/main.py | 6 ++-- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/data/infer_home_location.py b/src/data/infer_home_location.py index db4ef1b9..4a5fa342 100644 --- a/src/data/infer_home_location.py +++ b/src/data/infer_home_location.py @@ -111,27 +111,30 @@ def haversine(lon1,lat1,lon2,lat2): origDf = pd.read_csv(snakemake.input[0]) filteredDf = filterDatafromDf(origDf) -dbscan_eps = snakemake.params["dbscan_eps"] -dbscan_minsamples = snakemake.params["dbscan_minsamples"] -threshold_static = snakemake.params["threshold_static"] -clustering_algorithm = snakemake.params["clustering_algorithm"] - -if clustering_algorithm == "DBSCAN": - hyperparameters = {'eps' : distance_to_degrees(dbscan_eps), 'min_samples': dbscan_minsamples} -elif clustering_algorithm == "OPTICS": - hyperparameters = {'max_eps': distance_to_degrees(dbscan_eps), 'min_samples': 2, 'metric':'euclidean', 'cluster_method' : 'dbscan'} +if filteredDf.empty: + filteredDf.to_csv(snakemake.output[0]) else: - raise ValueError("config[PHONE_LOCATIONS][HOME_INFERENCE][CLUSTERING ALGORITHM] only accepts DBSCAN or OPTICS but you provided ",clustering_algorithm) + dbscan_eps = snakemake.params["dbscan_eps"] + dbscan_minsamples = snakemake.params["dbscan_minsamples"] + threshold_static = snakemake.params["threshold_static"] + clustering_algorithm = snakemake.params["clustering_algorithm"] -filteredDf = cluster_and_label(filteredDf,clustering_algorithm,threshold_static,**hyperparameters) + if clustering_algorithm == "DBSCAN": + hyperparameters = {'eps' : distance_to_degrees(dbscan_eps), 'min_samples': dbscan_minsamples} + elif clustering_algorithm == "OPTICS": + hyperparameters = {'max_eps': distance_to_degrees(dbscan_eps), 'min_samples': 2, 'metric':'euclidean', 'cluster_method' : 'dbscan'} + else: + raise ValueError("config[PHONE_LOCATIONS][HOME_INFERENCE][CLUSTERING ALGORITHM] only accepts DBSCAN or OPTICS but you provided ",clustering_algorithm) -origDf['home_latitude'] = filteredDf[filteredDf['location_label']==1][['double_latitude','double_longitude']].mean()['double_latitude'] -origDf['home_longitude'] = filteredDf[filteredDf['location_label']==1][['double_latitude','double_longitude']].mean()['double_longitude'] + filteredDf = cluster_and_label(filteredDf,clustering_algorithm,threshold_static,**hyperparameters) -distanceFromHome = haversine(origDf.double_longitude,origDf.double_latitude,origDf.home_longitude,origDf.home_latitude) + origDf['home_latitude'] = filteredDf[filteredDf['location_label']==1][['double_latitude','double_longitude']].mean()['double_latitude'] + origDf['home_longitude'] = filteredDf[filteredDf['location_label']==1][['double_latitude','double_longitude']].mean()['double_longitude'] -finalDf = origDf.drop(['home_latitude','home_longitude'], axis=1) -finalDf.insert(len(finalDf.columns)-1,'distancefromhome',distanceFromHome) -finalDf.to_csv(snakemake.output[0], index=False) + distanceFromHome = haversine(origDf.double_longitude,origDf.double_latitude,origDf.home_longitude,origDf.home_latitude) + + finalDf = origDf.drop(['home_latitude','home_longitude'], axis=1) + finalDf.insert(len(finalDf.columns)-1,'distancefromhome',distanceFromHome) + finalDf.to_csv(snakemake.output[0], index=False) diff --git a/src/features/phone_locations/doryab/main.py b/src/features/phone_locations/doryab/main.py index 5067274a..1913377a 100644 --- a/src/features/phone_locations/doryab/main.py +++ b/src/features/phone_locations/doryab/main.py @@ -188,12 +188,14 @@ def len_stay_timeattopn(locationData,maximum_gap_allowed,maximum_row_duration): calculationDf.loc[calculationDf.timeInSeconds >= maximum_gap_allowed,'timeInSeconds'] = maximum_row_duration timeArray = calculationDf.groupby('location_label')['timeInSeconds'].sum().reset_index()['timeInSeconds'].sort_values(ascending=False)/60 - if len(timeArray) > 2: + if len(timeArray) == 3: return (timeArray[0],timeArray[1],timeArray[2],timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean()) elif len(timeArray)==2: return (timeArray[0],timeArray[1],None,timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean()) - else: + elif len(timeArray)==1: return (timeArray[0],None,None,timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean()) + else: + return (None,None,None,timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean()) def getMinutesData(locationData):