diff --git a/config.yaml b/config.yaml index 7e44df20..21b09f81 100644 --- a/config.yaml +++ b/config.yaml @@ -90,6 +90,7 @@ DORYAB_LOCATION: DBSCAN_EPS: 10 # meters DBSCAN_MINSAMPLES: 5 THRESHOLD_STATIC : 1 # km/h + MAXIMUM_GAP_ALLOWED: 300 BLUETOOTH: COMPUTE: False diff --git a/rules/features.snakefile b/rules/features.snakefile index e8de9167..34dc80a3 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -129,7 +129,8 @@ rule location_doryab_features: day_segment = "{day_segment}", dbscan_eps = config["DORYAB_LOCATION"]["DBSCAN_EPS"], dbscan_minsamples = config["DORYAB_LOCATION"]["DBSCAN_MINSAMPLES"], - threshold_static = config["DORYAB_LOCATION"]["THRESHOLD_STATIC"] + threshold_static = config["DORYAB_LOCATION"]["THRESHOLD_STATIC"], + maximum_gap_allowed = config["DORYAB_LOCATION"]["MAXIMUM_GAP_ALLOWED"] output: "data/processed/{pid}/location_doryab_{day_segment}.csv" script: diff --git a/src/features/location_doryab/location_base.py b/src/features/location_doryab/location_base.py index 1a8aee82..e40603ce 100644 --- a/src/features/location_doryab/location_base.py +++ b/src/features/location_doryab/location_base.py @@ -4,7 +4,7 @@ from astropy.timeseries import LombScargle from sklearn.cluster import DBSCAN from math import radians, cos, sin, asin, sqrt -def base_location_features(location_data, day_segment, requested_features, dbscan_eps, dbscan_minsamples, threshold_static): +def base_location_features(location_data, day_segment, requested_features, dbscan_eps, dbscan_minsamples, threshold_static, maximum_gap_allowed): # name of the features this function can compute base_features_names = ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy"] @@ -34,7 +34,7 @@ def base_location_features(location_data, day_segment, requested_features, dbsca preComputedDistanceandSpeed = pd.DataFrame() for localDate in location_data['local_date'].unique(): - distance, speeddf = get_all_travel_distances_meters_speed(location_data[location_data['local_date']==localDate],threshold_static) + distance, speeddf = get_all_travel_distances_meters_speed(location_data[location_data['local_date']==localDate],threshold_static,maximum_gap_allowed) preComputedDistanceandSpeed.loc[localDate,"distance"] = distance.sum() preComputedDistanceandSpeed.loc[localDate,"avgspeed"] = speeddf[speeddf['speedTag'] == 'Moving']['speed'].mean() preComputedDistanceandSpeed.loc[localDate,"varspeed"] = speeddf[speeddf['speedTag'] == 'Moving']['speed'].var() @@ -133,7 +133,7 @@ def distance_to_degrees(d): d = d / 60 return d -def get_all_travel_distances_meters_speed(locationData,threshold): +def get_all_travel_distances_meters_speed(locationData,threshold,maximum_gap_allowed): lat_lon_temp = pd.DataFrame() @@ -146,7 +146,7 @@ def get_all_travel_distances_meters_speed(locationData,threshold): lat_lon_temp['time_diff'] = lat_lon_temp['time_after'] - lat_lon_temp['time_before'] lat_lon_temp['timeInSeconds'] = lat_lon_temp['time_diff'].apply(lambda x: x.total_seconds()) - lat_lon_temp = lat_lon_temp[lat_lon_temp['timeInSeconds'] <= 300] + lat_lon_temp = lat_lon_temp[lat_lon_temp['timeInSeconds'] <= maximum_gap_allowed] if lat_lon_temp.empty: return pd.Series(), pd.DataFrame({"speed": [], "speedTag": []}) diff --git a/src/features/location_doryab_features.py b/src/features/location_doryab_features.py index 2a78e98e..a053a8d8 100644 --- a/src/features/location_doryab_features.py +++ b/src/features/location_doryab_features.py @@ -8,8 +8,9 @@ location_features = pd.DataFrame(columns=["local_date"]) dbscan_eps = snakemake.params["dbscan_eps"] dbscan_minsamples = snakemake.params["dbscan_minsamples"] threshold_static = snakemake.params["threshold_static"] +maximum_gap_allowed = snakemake.params["maximum_gap_allowed"] -location_features = location_features.merge(base_location_features(location_data, day_segment, requested_features, dbscan_eps, dbscan_minsamples,threshold_static), on="local_date", how="outer") +location_features = location_features.merge(base_location_features(location_data, day_segment, requested_features, dbscan_eps, dbscan_minsamples,threshold_static,maximum_gap_allowed), on="local_date", how="outer") assert len(requested_features) + 1 == location_features.shape[1], "The number of features in the output dataframe (=" + str(location_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your location feature extraction functions"