Added minutes of data in a day/epoch.
parent
f42753d28a
commit
faad8f5a8f
|
@ -91,6 +91,7 @@ DORYAB_LOCATION:
|
||||||
DBSCAN_MINSAMPLES: 5
|
DBSCAN_MINSAMPLES: 5
|
||||||
THRESHOLD_STATIC : 1 # km/h
|
THRESHOLD_STATIC : 1 # km/h
|
||||||
MAXIMUM_GAP_ALLOWED: 300
|
MAXIMUM_GAP_ALLOWED: 300
|
||||||
|
MINUTES_DATA_USED: True
|
||||||
|
|
||||||
BLUETOOTH:
|
BLUETOOTH:
|
||||||
COMPUTE: False
|
COMPUTE: False
|
||||||
|
|
|
@ -130,7 +130,8 @@ rule location_doryab_features:
|
||||||
dbscan_eps = config["DORYAB_LOCATION"]["DBSCAN_EPS"],
|
dbscan_eps = config["DORYAB_LOCATION"]["DBSCAN_EPS"],
|
||||||
dbscan_minsamples = config["DORYAB_LOCATION"]["DBSCAN_MINSAMPLES"],
|
dbscan_minsamples = config["DORYAB_LOCATION"]["DBSCAN_MINSAMPLES"],
|
||||||
threshold_static = config["DORYAB_LOCATION"]["THRESHOLD_STATIC"],
|
threshold_static = config["DORYAB_LOCATION"]["THRESHOLD_STATIC"],
|
||||||
maximum_gap_allowed = config["DORYAB_LOCATION"]["MAXIMUM_GAP_ALLOWED"]
|
maximum_gap_allowed = config["DORYAB_LOCATION"]["MAXIMUM_GAP_ALLOWED"],
|
||||||
|
minutes_data_used = config["DORYAB_LOCATION"]["MINUTES_DATA_USED"]
|
||||||
output:
|
output:
|
||||||
"data/processed/{pid}/location_doryab_{day_segment}.csv"
|
"data/processed/{pid}/location_doryab_{day_segment}.csv"
|
||||||
script:
|
script:
|
||||||
|
|
|
@ -6,8 +6,7 @@ from math import radians, cos, sin, asin, sqrt
|
||||||
|
|
||||||
def base_location_features(location_data, day_segment, requested_features, dbscan_eps, dbscan_minsamples, threshold_static, maximum_gap_allowed):
|
def base_location_features(location_data, day_segment, requested_features, dbscan_eps, dbscan_minsamples, threshold_static, maximum_gap_allowed):
|
||||||
# name of the features this function can compute
|
# name of the features this function can compute
|
||||||
base_features_names = ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy"]
|
base_features_names = ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","minutesdataused"]
|
||||||
|
|
||||||
# the subset of requested features this function can compute
|
# the subset of requested features this function can compute
|
||||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||||
|
|
||||||
|
@ -23,6 +22,12 @@ def base_location_features(location_data, day_segment, requested_features, dbsca
|
||||||
else:
|
else:
|
||||||
location_features = pd.DataFrame()
|
location_features = pd.DataFrame()
|
||||||
|
|
||||||
|
if "minutesdataused" in features_to_compute:
|
||||||
|
for localDate in location_data["local_date"].unique():
|
||||||
|
location_features.loc[localDate,"location_" + day_segment + "_minutesdataused"] = getMinutesData(location_data[location_data["local_date"]==localDate])
|
||||||
|
|
||||||
|
location_features.index.name = 'local_date'
|
||||||
|
|
||||||
location_data = location_data[(location_data['double_latitude']!=0.0) & (location_data['double_longitude']!=0.0)]
|
location_data = location_data[(location_data['double_latitude']!=0.0) & (location_data['double_longitude']!=0.0)]
|
||||||
|
|
||||||
if "locationvariance" in features_to_compute:
|
if "locationvariance" in features_to_compute:
|
||||||
|
@ -126,6 +131,10 @@ def base_location_features(location_data, day_segment, requested_features, dbsca
|
||||||
return location_features
|
return location_features
|
||||||
|
|
||||||
|
|
||||||
|
def getMinutesData(locationData):
|
||||||
|
|
||||||
|
return locationData[['local_hour','local_minute']].drop_duplicates(inplace = False).shape[0]
|
||||||
|
|
||||||
def distance_to_degrees(d):
|
def distance_to_degrees(d):
|
||||||
#Just an approximation, but speeds up clustering by a huge amount and doesnt introduce much error
|
#Just an approximation, but speeds up clustering by a huge amount and doesnt introduce much error
|
||||||
#over small distances
|
#over small distances
|
||||||
|
|
|
@ -9,8 +9,14 @@ dbscan_eps = snakemake.params["dbscan_eps"]
|
||||||
dbscan_minsamples = snakemake.params["dbscan_minsamples"]
|
dbscan_minsamples = snakemake.params["dbscan_minsamples"]
|
||||||
threshold_static = snakemake.params["threshold_static"]
|
threshold_static = snakemake.params["threshold_static"]
|
||||||
maximum_gap_allowed = snakemake.params["maximum_gap_allowed"]
|
maximum_gap_allowed = snakemake.params["maximum_gap_allowed"]
|
||||||
|
minutes_data_used = snakemake.params["minutes_data_used"]
|
||||||
|
|
||||||
location_features = location_features.merge(base_location_features(location_data, day_segment, requested_features, dbscan_eps, dbscan_minsamples,threshold_static,maximum_gap_allowed), on="local_date", how="outer")
|
if(minutes_data_used):
|
||||||
|
requested_features.append("minutesdataused")
|
||||||
|
|
||||||
|
base_features = base_location_features(location_data, day_segment, requested_features, dbscan_eps, dbscan_minsamples,threshold_static,maximum_gap_allowed)
|
||||||
|
|
||||||
|
location_features = location_features.merge(base_features, on="local_date", how="outer")
|
||||||
|
|
||||||
assert len(requested_features) + 1 == location_features.shape[1], "The number of features in the output dataframe (=" + str(location_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your location feature extraction functions"
|
assert len(requested_features) + 1 == location_features.shape[1], "The number of features in the output dataframe (=" + str(location_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your location feature extraction functions"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue