Added maximum_gap_allowed parameter
parent
c86efb19d6
commit
0ada1292ed
|
@ -90,6 +90,7 @@ DORYAB_LOCATION:
|
||||||
DBSCAN_EPS: 10 # meters
|
DBSCAN_EPS: 10 # meters
|
||||||
DBSCAN_MINSAMPLES: 5
|
DBSCAN_MINSAMPLES: 5
|
||||||
THRESHOLD_STATIC : 1 # km/h
|
THRESHOLD_STATIC : 1 # km/h
|
||||||
|
MAXIMUM_GAP_ALLOWED: 300
|
||||||
|
|
||||||
BLUETOOTH:
|
BLUETOOTH:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
|
|
@ -129,7 +129,8 @@ rule location_doryab_features:
|
||||||
day_segment = "{day_segment}",
|
day_segment = "{day_segment}",
|
||||||
dbscan_eps = config["DORYAB_LOCATION"]["DBSCAN_EPS"],
|
dbscan_eps = config["DORYAB_LOCATION"]["DBSCAN_EPS"],
|
||||||
dbscan_minsamples = config["DORYAB_LOCATION"]["DBSCAN_MINSAMPLES"],
|
dbscan_minsamples = config["DORYAB_LOCATION"]["DBSCAN_MINSAMPLES"],
|
||||||
threshold_static = config["DORYAB_LOCATION"]["THRESHOLD_STATIC"]
|
threshold_static = config["DORYAB_LOCATION"]["THRESHOLD_STATIC"],
|
||||||
|
maximum_gap_allowed = config["DORYAB_LOCATION"]["MAXIMUM_GAP_ALLOWED"]
|
||||||
output:
|
output:
|
||||||
"data/processed/{pid}/location_doryab_{day_segment}.csv"
|
"data/processed/{pid}/location_doryab_{day_segment}.csv"
|
||||||
script:
|
script:
|
||||||
|
|
|
@ -4,7 +4,7 @@ from astropy.timeseries import LombScargle
|
||||||
from sklearn.cluster import DBSCAN
|
from sklearn.cluster import DBSCAN
|
||||||
from math import radians, cos, sin, asin, sqrt
|
from math import radians, cos, sin, asin, sqrt
|
||||||
|
|
||||||
def base_location_features(location_data, day_segment, requested_features, dbscan_eps, dbscan_minsamples, threshold_static):
|
def base_location_features(location_data, day_segment, requested_features, dbscan_eps, dbscan_minsamples, threshold_static, maximum_gap_allowed):
|
||||||
# name of the features this function can compute
|
# name of the features this function can compute
|
||||||
base_features_names = ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy"]
|
base_features_names = ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy"]
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ def base_location_features(location_data, day_segment, requested_features, dbsca
|
||||||
|
|
||||||
preComputedDistanceandSpeed = pd.DataFrame()
|
preComputedDistanceandSpeed = pd.DataFrame()
|
||||||
for localDate in location_data['local_date'].unique():
|
for localDate in location_data['local_date'].unique():
|
||||||
distance, speeddf = get_all_travel_distances_meters_speed(location_data[location_data['local_date']==localDate],threshold_static)
|
distance, speeddf = get_all_travel_distances_meters_speed(location_data[location_data['local_date']==localDate],threshold_static,maximum_gap_allowed)
|
||||||
preComputedDistanceandSpeed.loc[localDate,"distance"] = distance.sum()
|
preComputedDistanceandSpeed.loc[localDate,"distance"] = distance.sum()
|
||||||
preComputedDistanceandSpeed.loc[localDate,"avgspeed"] = speeddf[speeddf['speedTag'] == 'Moving']['speed'].mean()
|
preComputedDistanceandSpeed.loc[localDate,"avgspeed"] = speeddf[speeddf['speedTag'] == 'Moving']['speed'].mean()
|
||||||
preComputedDistanceandSpeed.loc[localDate,"varspeed"] = speeddf[speeddf['speedTag'] == 'Moving']['speed'].var()
|
preComputedDistanceandSpeed.loc[localDate,"varspeed"] = speeddf[speeddf['speedTag'] == 'Moving']['speed'].var()
|
||||||
|
@ -133,7 +133,7 @@ def distance_to_degrees(d):
|
||||||
d = d / 60
|
d = d / 60
|
||||||
return d
|
return d
|
||||||
|
|
||||||
def get_all_travel_distances_meters_speed(locationData,threshold):
|
def get_all_travel_distances_meters_speed(locationData,threshold,maximum_gap_allowed):
|
||||||
|
|
||||||
lat_lon_temp = pd.DataFrame()
|
lat_lon_temp = pd.DataFrame()
|
||||||
|
|
||||||
|
@ -146,7 +146,7 @@ def get_all_travel_distances_meters_speed(locationData,threshold):
|
||||||
lat_lon_temp['time_diff'] = lat_lon_temp['time_after'] - lat_lon_temp['time_before']
|
lat_lon_temp['time_diff'] = lat_lon_temp['time_after'] - lat_lon_temp['time_before']
|
||||||
lat_lon_temp['timeInSeconds'] = lat_lon_temp['time_diff'].apply(lambda x: x.total_seconds())
|
lat_lon_temp['timeInSeconds'] = lat_lon_temp['time_diff'].apply(lambda x: x.total_seconds())
|
||||||
|
|
||||||
lat_lon_temp = lat_lon_temp[lat_lon_temp['timeInSeconds'] <= 300]
|
lat_lon_temp = lat_lon_temp[lat_lon_temp['timeInSeconds'] <= maximum_gap_allowed]
|
||||||
|
|
||||||
if lat_lon_temp.empty:
|
if lat_lon_temp.empty:
|
||||||
return pd.Series(), pd.DataFrame({"speed": [], "speedTag": []})
|
return pd.Series(), pd.DataFrame({"speed": [], "speedTag": []})
|
||||||
|
|
|
@ -8,8 +8,9 @@ location_features = pd.DataFrame(columns=["local_date"])
|
||||||
dbscan_eps = snakemake.params["dbscan_eps"]
|
dbscan_eps = snakemake.params["dbscan_eps"]
|
||||||
dbscan_minsamples = snakemake.params["dbscan_minsamples"]
|
dbscan_minsamples = snakemake.params["dbscan_minsamples"]
|
||||||
threshold_static = snakemake.params["threshold_static"]
|
threshold_static = snakemake.params["threshold_static"]
|
||||||
|
maximum_gap_allowed = snakemake.params["maximum_gap_allowed"]
|
||||||
|
|
||||||
location_features = location_features.merge(base_location_features(location_data, day_segment, requested_features, dbscan_eps, dbscan_minsamples,threshold_static), on="local_date", how="outer")
|
location_features = location_features.merge(base_location_features(location_data, day_segment, requested_features, dbscan_eps, dbscan_minsamples,threshold_static,maximum_gap_allowed), on="local_date", how="outer")
|
||||||
|
|
||||||
assert len(requested_features) + 1 == location_features.shape[1], "The number of features in the output dataframe (=" + str(location_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your location feature extraction functions"
|
assert len(requested_features) + 1 == location_features.shape[1], "The number of features in the output dataframe (=" + str(location_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your location feature extraction functions"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue