Refactor PHONE_LOCATIONS DORYAB provider to compute features based on location episodes
parent
80522e6b7f
commit
a3fb718aea
|
@ -217,7 +217,8 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
|||
if provider == "BARNETT":
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_barnett_daily.csv", pid=config["PIDS"]))
|
||||
if provider == "DORYAB":
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns_episodes.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||
|
||||
files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"]))
|
||||
|
|
|
@ -241,12 +241,12 @@ PHONE_LOCATIONS:
|
|||
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
|
||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy equal or higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
|
||||
PROVIDERS:
|
||||
DORYAB:
|
||||
COMPUTE: False
|
||||
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome", "homelabel"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
DBSCAN_EPS: 100 # meters
|
||||
DBSCAN_MINSAMPLES: 5
|
||||
THRESHOLD_STATIC : 1 # km/h
|
||||
|
@ -262,7 +262,6 @@ PHONE_LOCATIONS:
|
|||
BARNETT:
|
||||
COMPUTE: False
|
||||
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
|
||||
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
|
||||
SRC_SCRIPT: src/features/phone_locations/barnett/main.R
|
||||
|
|
|
@ -18,7 +18,7 @@ The following is a list of the sensors that testing is currently available.
|
|||
| Phone Conversation | RAPIDS | Y | Y | Y |
|
||||
| Phone Data Yield | RAPIDS | Y | Y | Y |
|
||||
| Phone Light | RAPIDS | Y | Y | Y |
|
||||
| Phone Locations | Doryab | N | N | N |
|
||||
| Phone Locations | Doryab | Y | Y | Y |
|
||||
| Phone Locations | Barnett | N | N | N |
|
||||
| Phone Messages | RAPIDS | Y | Y | Y |
|
||||
| Phone Screen | RAPIDS | Y | Y | Y |
|
||||
|
|
|
@ -23,7 +23,7 @@ def get_barnett_daily(wildcards):
|
|||
|
||||
def get_locations_python_input(wildcards):
|
||||
if wildcards.provider_key.upper() == "DORYAB":
|
||||
return "data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns.csv"
|
||||
return "data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns_episodes_resampled_with_datetime.csv"
|
||||
else:
|
||||
return "data/interim/{pid}/phone_locations_processed_with_datetime.csv"
|
||||
|
||||
|
|
|
@ -382,7 +382,7 @@ rule phone_locations_add_doryab_extra_columns:
|
|||
params:
|
||||
provider = config["PHONE_LOCATIONS"]["PROVIDERS"]["DORYAB"]
|
||||
output:
|
||||
"data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns.csv"
|
||||
"data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns_episodes.csv"
|
||||
script:
|
||||
"../src/features/phone_locations/doryab/add_doryab_extra_columns.py"
|
||||
|
||||
|
|
|
@ -98,7 +98,8 @@ rule process_phone_locations_types:
|
|||
params:
|
||||
consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
|
||||
time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
|
||||
locations_to_use = config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"]
|
||||
locations_to_use = config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"],
|
||||
accuracy_limit = config["PHONE_LOCATIONS"]["ACCURACY_LIMIT"]
|
||||
output:
|
||||
"data/interim/{pid}/phone_locations_processed.csv"
|
||||
script:
|
||||
|
|
|
@ -6,9 +6,10 @@ library(tidyr)
|
|||
consecutive_threshold <- snakemake@params[["consecutive_threshold"]]
|
||||
time_since_valid_location <- snakemake@params[["time_since_valid_location"]]
|
||||
locations_to_use <- snakemake@params[["locations_to_use"]]
|
||||
accuracy_limit <- snakemake@params[["accuracy_limit"]]
|
||||
|
||||
locations <- read.csv(snakemake@input[["locations"]]) %>%
|
||||
filter(double_latitude != 0 & double_longitude != 0) %>%
|
||||
filter(double_latitude != 0 & double_longitude != 0 & accuracy < accuracy_limit) %>%
|
||||
drop_na(double_longitude, double_latitude) %>%
|
||||
group_by(timestamp) %>% # keep only the row with the best accuracy if two or more have the same timestamp
|
||||
filter(accuracy == min(accuracy, na.rm=TRUE)) %>%
|
||||
|
@ -63,7 +64,7 @@ if(locations_to_use == "ALL"){
|
|||
# you can think of consecutive_threshold as the period a location row is valid for
|
||||
mutate(limit = pmin(lead(timestamp, default = 9999999999999) - 1, limit + (1000 * 60 * consecutive_threshold)),
|
||||
n_resample = (limit - timestamp)%/%60001,
|
||||
n_resample = if_else(n_resample == 0, 1, n_resample)) %>%
|
||||
n_resample = n_resample + 1) %>%
|
||||
drop_na(double_longitude, double_latitude) %>%
|
||||
uncount(weights = n_resample, .id = "id") %>%
|
||||
mutate(provider = if_else(id > 1, "resampled", provider),
|
||||
|
|
|
@ -3,33 +3,28 @@ from statistics import mode
|
|||
import warnings
|
||||
|
||||
def barnett_daily_features(snakemake):
|
||||
accuracy_limit = 999999999 # We filter rows based on accuracy in src/data/process_location_types.R script
|
||||
location_data = pd.read_csv(snakemake.input["sensor_data"])
|
||||
accuracy_limit = snakemake.params["provider"]["ACCURACY_LIMIT"]
|
||||
|
||||
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
|
||||
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
|
||||
segment_regex = ".*#{},{}".format(datetime_start_regex, datetime_end_regex)
|
||||
location_data = location_data[location_data["assigned_segments"].str.match(segment_regex)]
|
||||
loc_daily_data_len = len(location_data)
|
||||
|
||||
location_data.query("accuracy < @accuracy_limit", inplace=True)
|
||||
|
||||
features_to_compute = ["local_date", "hometime", "disttravelled", "rog", "maxdiam", "maxhomedist", "siglocsvisited", "avgflightlen", "stdflightlen", "avgflightdur", "stdflightdur", "probpause", "siglocentropy", "minsmissing", "circdnrtn", "wkenddayrtn", "minutes_data_used"]
|
||||
|
||||
nrows = len(location_data)
|
||||
if loc_daily_data_len == 0:
|
||||
warnings.warn("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). This participant does not have location data or it spans less than 24 hours.")
|
||||
location_features = pd.DataFrame(columns=features_to_compute)
|
||||
elif nrows == 0:
|
||||
warnings.warn("Barnett's location features cannot be computed because there are no rows with an accuracy value lower than ACCURACY_LIMIT: {}".format(accuracy_limit))
|
||||
location_features = pd.DataFrame(columns=features_to_compute)
|
||||
location_features = pd.DataFrame(columns=features_to_compute)
|
||||
if len(location_data) == 0:
|
||||
warnings.warn("Barnett's location features cannot be computed because the input data is empty.")
|
||||
else:
|
||||
location_minutes_used = location_data.groupby(["local_date", "local_hour"])[["local_minute"]].nunique().reset_index().groupby("local_date").sum()[["local_minute"]].rename(columns={"local_minute": "minutes_data_used"})
|
||||
timezone = mode(location_data["local_timezone"].values)
|
||||
location_df = location_data[["timestamp", "double_latitude", "double_longitude", "double_altitude", "accuracy"]]
|
||||
location_df.rename(columns={"double_latitude": "latitude", "double_longitude": "longitude", "double_altitude": "altitude"})
|
||||
output_mobility = run_barnett_features_for_rapids(location_df, accuracy_limit=accuracy_limit, timezone=timezone) #make local_date as the index for the output_mobility dataframe
|
||||
location_features = output_mobility.merge(location_minutes_used, on="local_date", how="left")
|
||||
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
|
||||
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
|
||||
segment_regex = ".*#{},{}".format(datetime_start_regex, datetime_end_regex)
|
||||
location_data = location_data[location_data["assigned_segments"].str.match(segment_regex)]
|
||||
|
||||
if len(location_data) == 0:
|
||||
warnings.warn("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59).")
|
||||
else:
|
||||
location_minutes_used = location_data.groupby(["local_date", "local_hour"])[["local_minute"]].nunique().reset_index().groupby("local_date").sum()[["local_minute"]].rename(columns={"local_minute": "minutes_data_used"})
|
||||
timezone = mode(location_data["local_timezone"].values)
|
||||
location_df = location_data[["timestamp", "double_latitude", "double_longitude", "double_altitude", "accuracy"]]
|
||||
location_df.rename(columns={"double_latitude": "latitude", "double_longitude": "longitude", "double_altitude": "altitude"})
|
||||
output_mobility = run_barnett_features_for_rapids(location_df, accuracy_limit=accuracy_limit, timezone=timezone) #make local_date as the index for the output_mobility dataframe
|
||||
location_features = output_mobility.merge(location_minutes_used, on="local_date", how="left")
|
||||
|
||||
location_features.reset_index(inplace=True)
|
||||
location_features.to_csv(snakemake.output[0], index=False)
|
||||
|
|
|
@ -106,7 +106,6 @@ def infer_home_location(location_data, clustering_algorithm, hyperparameters, st
|
|||
location_data = pd.read_csv(snakemake.input["sensor_input"])
|
||||
provider = snakemake.params["provider"]
|
||||
|
||||
accuracy_limit = provider["ACCURACY_LIMIT"]
|
||||
maximum_row_gap = provider["MAXIMUM_ROW_GAP"]
|
||||
dbscan_eps = provider["DBSCAN_EPS"]
|
||||
dbscan_minsamples = provider["DBSCAN_MINSAMPLES"]
|
||||
|
@ -116,12 +115,6 @@ cluster_on = provider["CLUSTER_ON"]
|
|||
strategy = provider["INFER_HOME_LOCATION_STRATEGY"]
|
||||
days_threshold = provider["MINIMUM_DAYS_TO_DETECT_HOME_CHANGES"]
|
||||
|
||||
rows_before_accuracy_filter = len(location_data)
|
||||
location_data = location_data[location_data["accuracy"] < accuracy_limit]
|
||||
|
||||
if rows_before_accuracy_filter > 0 and len(location_data) == 0:
|
||||
warnings.warn("Cannot compute Doryab location features because there are no rows with an accuracy value lower than ACCURACY_LIMIT: {}".format(accuracy_limit))
|
||||
|
||||
if not location_data.timestamp.is_monotonic:
|
||||
location_data.sort_values(by=["timestamp"], inplace=True)
|
||||
|
||||
|
@ -133,8 +126,12 @@ location_data = mark_as_stationary(location_data, threshold_static)
|
|||
hyperparameters = create_clustering_hyperparameters(clustering_algorithm, dbscan_eps, dbscan_minsamples)
|
||||
location_data_with_doryab_columns = infer_home_location(location_data, clustering_algorithm, hyperparameters, strategy, days_threshold)
|
||||
|
||||
selected_columns = ["local_timezone", "device_id", "start_timestamp", "end_timestamp", "provider", "double_latitude", "double_longitude", "distance", "speed", "is_stationary", "distance_from_home", "home_label"]
|
||||
if cluster_on == "PARTICIPANT_DATASET":
|
||||
location_data_with_doryab_columns = cluster(location_data_with_doryab_columns, clustering_algorithm, **hyperparameters)
|
||||
selected_columns.append("cluster_label")
|
||||
|
||||
location_data_with_doryab_columns.to_csv(snakemake.output[0], index=False)
|
||||
|
||||
# Prepare for episodes
|
||||
location_data_with_doryab_columns = location_data_with_doryab_columns.rename(columns={"timestamp": "start_timestamp"})
|
||||
location_data_with_doryab_columns["end_timestamp"] = (location_data_with_doryab_columns["start_timestamp"] + location_data_with_doryab_columns["duration_in_seconds"] * 1000 - 1).astype(int)
|
||||
location_data_with_doryab_columns[selected_columns].to_csv(snakemake.output[0], index=False)
|
||||
|
|
|
@ -26,14 +26,14 @@ def meters_to_degrees(distance):
|
|||
# Relabel clusters: -1 denotes the outliers (insignificant or rarely visited locations), 1 denotes the most visited significant location, 2 denotes the 2nd most significant location,...
|
||||
def label(location_data):
|
||||
|
||||
# Exclude outliers (cluster_label = -1) while counting number of locations in a cluster
|
||||
label2count = pd.DataFrame({"count": location_data["cluster_label"].replace(-1, np.nan).value_counts(ascending=False, sort=True)})
|
||||
# Add the row number as the new cluster label since value_counts() will order it by default
|
||||
label2count["new_cluster_label"] = np.arange(len(label2count)) + 1
|
||||
# Exclude outliers (cluster_label = -1) while calculating the total duration of locations in a cluster
|
||||
label2duration = location_data[["cluster_label", "duration"]].replace(-1, np.nan).groupby("cluster_label")[["duration"]].sum().sort_values(by=["duration"], ascending=False)
|
||||
# Add the row number as the new cluster label
|
||||
label2duration["new_cluster_label"] = np.arange(len(label2duration)) + 1
|
||||
# Still use -1 to denote the outliers
|
||||
label2count.loc[-1, "new_cluster_label"] = -1
|
||||
label2duration.loc[-1, "new_cluster_label"] = -1
|
||||
# Merge the new cluster label with the original location data
|
||||
location_data = location_data.merge(label2count[["new_cluster_label"]], left_on="cluster_label", right_index=True, how="left")
|
||||
location_data = location_data.merge(label2duration[["new_cluster_label"]], left_on="cluster_label", right_index=True, how="left")
|
||||
|
||||
del location_data["cluster_label"]
|
||||
location_data.rename(columns={"new_cluster_label": "cluster_label"}, inplace=True)
|
||||
|
@ -54,24 +54,28 @@ def cluster(location_data, clustering_algorithm, **kwargs):
|
|||
if location_data.empty:
|
||||
return pd.DataFrame(columns=location_data.columns.tolist() + ["is_stationary", "cluster_label"])
|
||||
|
||||
if "duration" not in location_data.columns:
|
||||
# Convert second to minute
|
||||
location_data = location_data.assign(duration=location_data["duration_in_seconds"] / 60)
|
||||
|
||||
# Only keep stationary samples for clustering
|
||||
stationary_data = location_data[location_data["is_stationary"] == 1][["double_latitude", "double_longitude", "is_stationary"]]
|
||||
stationary_data = location_data[location_data["is_stationary"] == 1][["double_latitude", "double_longitude", "duration"]]
|
||||
|
||||
# Remove duplicates and apply sample_weight (only available for DBSCAN currently) to reduce memory usage
|
||||
stationary_data_dedup = stationary_data.groupby(["double_latitude", "double_longitude", "is_stationary"]).size().reset_index()
|
||||
lat_lon = stationary_data_dedup[["double_latitude", "double_longitude"]].values
|
||||
stationary_data_dedup = stationary_data.groupby(["double_latitude", "double_longitude"])[["duration"]].sum().reset_index()
|
||||
lat_lon_dedup = stationary_data_dedup[["double_latitude", "double_longitude"]].values
|
||||
|
||||
if stationary_data_dedup.shape[0] < kwargs["min_samples"]:
|
||||
cluster_results = np.array([-1] * stationary_data_dedup.shape[0])
|
||||
elif clustering_algorithm == "DBSCAN":
|
||||
clusterer = DBSCAN(**kwargs)
|
||||
cluster_results = clusterer.fit_predict(lat_lon, sample_weight=stationary_data_dedup[0])
|
||||
cluster_results = clusterer.fit_predict(lat_lon_dedup, sample_weight=stationary_data_dedup["duration"])
|
||||
else: # OPTICS
|
||||
clusterer = OPTICS(**kwargs)
|
||||
cluster_results = clusterer.fit_predict(lat_lon)
|
||||
cluster_results = clusterer.fit_predict(lat_lon_dedup)
|
||||
|
||||
# Add cluster labels
|
||||
stationary_data_dedup["cluster_label"] = cluster_results
|
||||
location_data_with_labels = label(location_data.merge(stationary_data_dedup[["double_latitude", "double_longitude", "is_stationary", "cluster_label"]], how="left", on=["double_latitude", "double_longitude", "is_stationary"]))
|
||||
location_data_with_labels = label(location_data.merge(stationary_data_dedup[["double_latitude", "double_longitude", "cluster_label"]], how="left", on=["double_latitude", "double_longitude"]))
|
||||
|
||||
return location_data_with_labels
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
from datetime import time
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from phone_locations.doryab.doryab_clustering import haversine, create_clustering_hyperparameters, cluster
|
||||
|
@ -9,7 +10,7 @@ def apply_cluster_strategy(location_data, time_segment, clustering_algorithm, db
|
|||
hyperparameters = create_clustering_hyperparameters(clustering_algorithm, dbscan_eps, dbscan_minsamples)
|
||||
|
||||
if cluster_on == "PARTICIPANT_DATASET":
|
||||
# clusters are created in cluster_accross_participant_dataset.py script
|
||||
# clusters are created in add_doryab_extra_columns.py script
|
||||
location_data = filter_data_by_segment(location_data, time_segment)
|
||||
elif cluster_on == "TIME_SEGMENT":
|
||||
location_data = filter_data_by_segment(location_data, time_segment)
|
||||
|
@ -21,28 +22,59 @@ def apply_cluster_strategy(location_data, time_segment, clustering_algorithm, db
|
|||
location_data_per_group = cluster(instance_data, clustering_algorithm, **hyperparameters)
|
||||
location_data_clusters = pd.concat([location_data_per_group, location_data_clusters])
|
||||
location_data = location_data_clusters
|
||||
|
||||
return location_data
|
||||
|
||||
def variance_and_logvariance_features(location_data, location_features):
|
||||
location_data_grouped = location_data.groupby("local_segment")
|
||||
location_data["latitude_X_duration"] = location_data["double_latitude"] * location_data["duration"]
|
||||
location_data["longitude_X_duration"] = location_data["double_longitude"] * location_data["duration"]
|
||||
|
||||
location_data["latitude_wavg"] = location_data_grouped["latitude_X_duration"].transform("sum") / location_data_grouped["duration"].transform("sum")
|
||||
location_data["longitude_wavg"] = location_data_grouped["longitude_X_duration"].transform("sum") / location_data_grouped["duration"].transform("sum")
|
||||
|
||||
location_data["latitude_for_wvar"] = (location_data["double_latitude"] - location_data["latitude_wavg"]) ** 2 * location_data["duration"] * 60
|
||||
location_data["longitude_for_wvar"] = (location_data["double_longitude"] - location_data["longitude_wavg"]) ** 2 * location_data["duration"] * 60
|
||||
|
||||
location_features["locationvariance"] = ((location_data_grouped["latitude_for_wvar"].sum() + location_data_grouped["longitude_for_wvar"].sum()) / (location_data_grouped["duration"].sum() * 60 - 1)).fillna(0)
|
||||
location_features["loglocationvariance"] = np.log10(location_features["locationvariance"]).replace(-np.inf, np.nan)
|
||||
|
||||
return location_features
|
||||
|
||||
def distance_and_speed_features(moving_data):
|
||||
|
||||
distance_and_speed = moving_data[["local_segment", "distance"]].groupby(["local_segment"]).sum().rename(columns={"distance": "totaldistance"})
|
||||
|
||||
moving_data_grouped = moving_data[["local_segment", "speed"]].groupby(["local_segment"])
|
||||
distance_and_speed["avgspeed"] = moving_data_grouped["speed"].mean()
|
||||
distance_and_speed["varspeed"] = moving_data_grouped["speed"].var()
|
||||
moving_data_grouped = moving_data.groupby(["local_segment"])
|
||||
|
||||
moving_data["speed_X_duration"] = moving_data["speed"] * moving_data["duration"]
|
||||
distance_and_speed["avgspeed"] = moving_data_grouped["speed_X_duration"].sum() / moving_data_grouped["duration"].sum()
|
||||
|
||||
moving_data["speed_wavg"] = moving_data_grouped["speed_X_duration"].transform("sum") / moving_data_grouped["duration"].transform("sum")
|
||||
moving_data["speed_for_wvar"] = (moving_data["speed"] - moving_data["speed_wavg"]) ** 2 * moving_data["duration"] * 60
|
||||
distance_and_speed["varspeed"] = moving_data_grouped["speed_for_wvar"].sum() / (moving_data_grouped["duration"].sum() * 60 - 1)
|
||||
|
||||
return distance_and_speed
|
||||
|
||||
def radius_of_gyration(location_data):
|
||||
|
||||
if location_data.empty:
|
||||
return np.nan
|
||||
|
||||
# define a lambda function to compute the weighted mean for each cluster
|
||||
weighted_mean = lambda x: np.average(x, weights=location_data.loc[x.index, "duration"])
|
||||
|
||||
# center is the centroid of the places visited during a segment instance, not the home location
|
||||
clusters = location_data.groupby(["local_segment", "cluster_label"]).agg(
|
||||
double_latitude=("double_latitude", "mean"),
|
||||
double_longitude=("double_longitude", "mean"),
|
||||
time_in_a_cluster=("duration_in_seconds", "sum")
|
||||
double_latitude=("double_latitude", weighted_mean),
|
||||
double_longitude=("double_longitude", weighted_mean),
|
||||
time_in_a_cluster=("duration", "sum")
|
||||
).reset_index()
|
||||
|
||||
clusters[["centroid_double_latitude", "centroid_double_longitude"]] = clusters.groupby(["local_segment"], sort=False)[["double_latitude", "double_longitude"]].transform("mean")
|
||||
# redefine the lambda function to compute the weighted mean across clusters
|
||||
weighted_mean = lambda x: np.average(x, weights=clusters.loc[x.index, "time_in_a_cluster"])
|
||||
|
||||
clusters[["centroid_double_latitude", "centroid_double_longitude"]] = clusters.groupby(["local_segment"], sort=False)[["double_latitude", "double_longitude"]].transform(weighted_mean)
|
||||
clusters["distance_squared"] = haversine(clusters["double_longitude"], clusters["double_latitude"], clusters["centroid_double_longitude"], clusters["centroid_double_latitude"]) ** 2
|
||||
|
||||
clusters["distance_squared_X_time_in_a_cluster"] = clusters["distance_squared"] * clusters["time_in_a_cluster"]
|
||||
|
@ -57,24 +89,23 @@ def cluster_stay(x, stay_at_clusters, cluster_n):
|
|||
|
||||
def stay_at_topn_clusters(location_data):
|
||||
|
||||
stay_at_clusters = location_data[["local_segment", "cluster_label", "duration_in_seconds"]].groupby(["local_segment", "cluster_label"], sort=True).sum().reset_index()
|
||||
stay_at_clusters["duration_in_minutes"] = stay_at_clusters["duration_in_seconds"] / 60
|
||||
stay_at_clusters = location_data[["local_segment", "cluster_label", "duration"]].groupby(["local_segment", "cluster_label"], sort=True).sum().reset_index()
|
||||
|
||||
stay_at_clusters_features = stay_at_clusters.groupby(["local_segment"]).agg(
|
||||
timeattop1location=("duration_in_minutes", lambda x: cluster_stay(x, stay_at_clusters, 1)),
|
||||
timeattop2location=("duration_in_minutes", lambda x: cluster_stay(x, stay_at_clusters, 2)),
|
||||
timeattop3location=("duration_in_minutes", lambda x: cluster_stay(x, stay_at_clusters, 3)),
|
||||
maxlengthstayatclusters=("duration_in_minutes", "max"),
|
||||
minlengthstayatclusters=("duration_in_minutes", "min"),
|
||||
avglengthstayatclusters=("duration_in_minutes", "mean"),
|
||||
stdlengthstayatclusters=("duration_in_minutes", "std")
|
||||
timeattop1location=("duration", lambda x: cluster_stay(x, stay_at_clusters, 1)),
|
||||
timeattop2location=("duration", lambda x: cluster_stay(x, stay_at_clusters, 2)),
|
||||
timeattop3location=("duration", lambda x: cluster_stay(x, stay_at_clusters, 3)),
|
||||
maxlengthstayatclusters=("duration", "max"),
|
||||
minlengthstayatclusters=("duration", "min"),
|
||||
avglengthstayatclusters=("duration", "mean"),
|
||||
stdlengthstayatclusters=("duration", "std")
|
||||
).fillna(0)
|
||||
|
||||
return stay_at_clusters_features
|
||||
|
||||
def location_entropy(location_data):
|
||||
|
||||
location_data = location_data.groupby(["local_segment", "cluster_label"])[["duration_in_seconds"]].sum().reset_index().rename(columns={"duration_in_seconds": "cluster_duration"})
|
||||
location_data = location_data.groupby(["local_segment", "cluster_label"])[["duration"]].sum().reset_index().rename(columns={"duration": "cluster_duration"})
|
||||
location_data["all_clusters_duration"] = location_data.groupby(["local_segment"])["cluster_duration"].transform("sum")
|
||||
location_data["plogp"] = (location_data["cluster_duration"] / location_data["all_clusters_duration"]).apply(lambda x: x * np.log(x))
|
||||
|
||||
|
@ -111,18 +142,21 @@ def doryab_features(sensor_data_files, time_segment, provider, filter_data_by_se
|
|||
return pd.DataFrame(columns=["local_segment"] + features_to_compute)
|
||||
location_features = pd.DataFrame()
|
||||
|
||||
location_features["minutesdataused"] = location_data.drop_duplicates(subset=["local_segment", "local_date", "local_hour", "local_minute"])[["local_segment", "local_minute"]].groupby(["local_segment"])["local_minute"].count()
|
||||
# update distance after chunk_episodes() function
|
||||
location_data["distance"] = location_data["speed"] * (location_data["duration"] / 60) * 1000 # in meters
|
||||
|
||||
location_features["minutesdataused"] = location_data[["local_segment", "duration"]].groupby(["local_segment"])["duration"].sum()
|
||||
|
||||
# variance features
|
||||
location_features["locationvariance"] = location_data.groupby(["local_segment"])["double_latitude"].var() + location_data.groupby(["local_segment"])["double_longitude"].var()
|
||||
location_features["loglocationvariance"] = np.log10(location_features["locationvariance"]).replace(-np.inf, np.nan)
|
||||
location_features = variance_and_logvariance_features(location_data, location_features)
|
||||
|
||||
# distance and speed features
|
||||
moving_data = location_data[location_data["is_stationary"] == 0]
|
||||
moving_data = location_data[location_data["is_stationary"] == 0].copy()
|
||||
location_features = location_features.merge(distance_and_speed_features(moving_data), how="outer", left_index=True, right_index=True)
|
||||
location_features[["totaldistance", "avgspeed", "varspeed"]] = location_features[["totaldistance", "avgspeed", "varspeed"]].fillna(0)
|
||||
|
||||
# stationary features
|
||||
stationary_data = location_data[location_data["is_stationary"] == 1]
|
||||
stationary_data = location_data[location_data["is_stationary"] == 1].copy()
|
||||
stationary_data_without_outliers = stationary_data[stationary_data["cluster_label"] != -1]
|
||||
|
||||
location_features["numberofsignificantplaces"] = stationary_data_without_outliers.groupby(["local_segment"])["cluster_label"].nunique()
|
||||
|
@ -134,19 +168,20 @@ def doryab_features(sensor_data_files, time_segment, provider, filter_data_by_se
|
|||
location_features = location_features.merge(stay_at_topn_clusters(stationary_data_without_outliers), how="outer", left_index=True, right_index=True)
|
||||
|
||||
# moving to static ratio
|
||||
static_time = stationary_data.groupby(["local_segment"])["duration_in_seconds"].sum()
|
||||
total_time = location_data.groupby(["local_segment"])["duration_in_seconds"].sum()
|
||||
static_time = stationary_data.groupby(["local_segment"])["duration"].sum()
|
||||
total_time = location_data.groupby(["local_segment"])["duration"].sum()
|
||||
location_features["movingtostaticratio"] = static_time / total_time
|
||||
|
||||
# outliers time percent
|
||||
outliers_time = stationary_data[stationary_data["cluster_label"] == -1].groupby(["local_segment"])["duration_in_seconds"].sum()
|
||||
location_features["outlierstimepercent"] = outliers_time / static_time
|
||||
outliers_time = stationary_data[stationary_data["cluster_label"] == -1].groupby(["local_segment"])["duration"].sum()
|
||||
location_features["outlierstimepercent"] = (outliers_time / static_time).fillna(0)
|
||||
|
||||
# entropy features
|
||||
location_features = location_features.merge(location_entropy(stationary_data_without_outliers), how="outer", left_index=True, right_index=True)
|
||||
|
||||
# time at home
|
||||
location_features["timeathome"] = stationary_data[stationary_data["distance_from_home"] <= radius_from_home].groupby(["local_segment"])["duration_in_seconds"].sum() / 60
|
||||
stationary_data["time_at_home"] = stationary_data.apply(lambda row: row["duration"] if row["distance_from_home"] <= radius_from_home else 0, axis=1)
|
||||
location_features["timeathome"] = stationary_data[["local_segment", "time_at_home"]].groupby(["local_segment"])["time_at_home"].sum()
|
||||
|
||||
# home label
|
||||
location_features["homelabel"] = stationary_data[["local_segment", "home_label"]].groupby(["local_segment"]).agg(lambda x: pd.Series.mode(x)[0])
|
||||
|
|
|
@ -71,7 +71,7 @@ def chunk_episodes(sensor_episodes):
|
|||
# Merge episodes
|
||||
cols_for_groupby = [col for col in sensor_episodes.columns if col not in ["timestamps_segment", "timestamp", "assigned_segments", "start_datetime", "end_datetime", "start_timestamp", "end_timestamp", "duration", "chunked_start_timestamp", "chunked_end_timestamp"]]
|
||||
|
||||
sensor_episodes_grouped = sensor_episodes.groupby(by=cols_for_groupby, sort=False)
|
||||
sensor_episodes_grouped = sensor_episodes.groupby(by=cols_for_groupby, sort=False, dropna=False)
|
||||
merged_sensor_episodes = sensor_episodes_grouped[["duration"]].sum()
|
||||
|
||||
merged_sensor_episodes["start_timestamp"] = sensor_episodes_grouped["chunked_start_timestamp"].first()
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,3 +1,3 @@
|
|||
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_loglocationvariance","phone_locations_doryab_totaldistance","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_varspeed","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_timeattop2location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeattop1location","phone_locations_doryab_timeathome","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_avgspeed","phone_locations_doryab_locationvariance"
|
||||
"beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00","beforeMarchEvent","2020-03-07 16:00:00","2020-03-08 15:00:00",1,3.71826910068082,NA,6,NA,0.346573590279973,0,0.693147180559945,6,6,2,6268829.80206745,6,6,1,0,6,NA,NA,5227.19980200003
|
||||
"beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00","beforeNovemberEvent","2020-10-31 16:00:00","2020-11-01 13:00:00",1,3.71826910068082,NA,6,NA,0.346573590279973,0,0.693147180559945,6,6,2,6268829.80206745,6,6,1,0,6,NA,NA,5227.19980200003
|
||||
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_radiusgyration","phone_locations_doryab_loglocationvariance","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_timeathome","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_timeattop3location","phone_locations_doryab_timeattop1location","phone_locations_doryab_varspeed","phone_locations_doryab_totaldistance","phone_locations_doryab_locationentropy","phone_locations_doryab_avgspeed","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_stdlengthstayatclusters"
|
||||
"beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00","beforeMarchEvent","2020-03-07 16:00:00","2020-03-08 15:00:00",0.346573590279973,1,5.9999,6268829.80447871,3.69084399356179,2,5.9999,1,5.9999,5.9999,0,0,0,0.693147180559945,0,0,5.9999,4907.31564827534,5.9999,0
|
||||
"beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00","beforeNovemberEvent","2020-10-31 16:00:00","2020-11-01 13:00:00",0.346573590279973,1,5.9999,6268829.80447871,3.69084399356179,2,5.9999,1,5.9999,5.9999,0,0,0,0.693147180559945,0,0,5.9999,4907.31564827534,5.9999,0
|
||||
|
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,8 +1,8 @@
|
|||
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop3location","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_timeattop1location","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_loglocationvariance","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_timeathome","phone_locations_doryab_locationentropy","phone_locations_doryab_radiusgyration","phone_locations_doryab_totaldistance","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_timeattop2location","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_avgspeed","phone_locations_doryab_varspeed","phone_locations_doryab_minlengthstayatclusters"
|
||||
"thirtyminutes0017#2020-03-08 08:30:00,2020-03-08 08:59:59","thirtyminutes0017","2020-03-08 08:30:00","2020-03-08 08:59:59",1.58333332533827e-12,0,6,0,0,6,-11.8004276472878,1,NA,0,0,NA,NA,6,0,1,0,NA,NA,6
|
||||
"thirtyminutes0017#2020-11-01 08:30:00,2020-11-01 08:59:59","thirtyminutes0017","2020-11-01 08:30:00","2020-11-01 08:59:59",1.58333332533827e-12,0,6,0,0,6,-11.8004276472878,1,NA,0,0,NA,NA,6,0,1,0,NA,NA,6
|
||||
"thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59","thirtyminutes0021","2020-03-07 10:30:00","2020-03-07 10:59:59",4.99999997475243e-13,3,3,0,0,3,-12.301029997857,1,NA,0,0,NA,NA,0,0,1,0,NA,NA,3
|
||||
"thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59","thirtyminutes0021","2020-10-31 10:30:00","2020-10-31 10:59:59",4.99999997475243e-13,3,3,0,0,3,-12.301029997857,1,NA,0,0,NA,NA,0,0,1,0,NA,NA,3
|
||||
"thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59","thirtyminutes0022","2020-03-07 11:00:00","2020-03-07 11:29:59",9.99999994950485e-13,3,3,0,0,3,-12.000000002193,1,NA,0,0,NA,NA,0,0,1,0,NA,NA,3
|
||||
"thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59","thirtyminutes0022","2020-10-31 11:00:00","2020-10-31 11:29:59",9.99999994950485e-13,3,3,0,0,3,-12.000000002193,1,NA,0,0,NA,NA,0,0,1,0,NA,NA,3
|
||||
"thirtyminutes0047#2020-03-07 23:30:00,2020-03-07 23:59:59","thirtyminutes0047","2020-03-07 23:30:00","2020-03-07 23:59:59",3.8799999999484e-11,0,6,0,6,6,-10.4111682744116,1,6,0,0,NA,NA,0,0,1,0,NA,NA,6
|
||||
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_totaldistance","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_varspeed","phone_locations_doryab_locationentropy","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeattop1location","phone_locations_doryab_timeattop3location","phone_locations_doryab_loglocationvariance"
|
||||
"thirtyminutes0017#2020-03-08 08:30:00,2020-03-08 08:59:59","thirtyminutes0017","2020-03-08 08:30:00","2020-03-08 08:59:59",5.9999,0,0,0,0,0,1,5.9999,0,0,0,0,8.91364939388538e-13,0,1,5.9999,0,0,5.9999,-12.0499444522888
|
||||
"thirtyminutes0017#2020-11-01 08:30:00,2020-11-01 08:59:59","thirtyminutes0017","2020-11-01 08:30:00","2020-11-01 08:59:59",5.9999,0,0,0,0,0,1,5.9999,0,0,0,0,8.91364939388538e-13,0,1,5.9999,0,0,5.9999,-12.0499444522888
|
||||
"thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59","thirtyminutes0021","2020-03-07 10:30:00","2020-03-07 10:59:59",1.99996666666667,0,0,0,0,0,1,1.99996666666667,0,0,0,0,2.52100874371982e-13,0,1,1.99996666666667,0,1.99996666666667,0,-12.5984256480393
|
||||
"thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59","thirtyminutes0021","2020-10-31 10:30:00","2020-10-31 10:59:59",1.99996666666667,0,0,0,0,0,1,1.99996666666667,0,0,0,0,2.52100874371982e-13,0,1,1.99996666666667,0,1.99996666666667,0,-12.5984256480393
|
||||
"thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59","thirtyminutes0022","2020-03-07 11:00:00","2020-03-07 11:29:59",3.99993333333333,0,0,0,0,0,1,3.99993333333333,0,0,0,0,6.90376613695811e-13,0,1,3.99993333333333,0,3.99993333333333,0,-12.1609139286412
|
||||
"thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59","thirtyminutes0022","2020-10-31 11:00:00","2020-10-31 11:29:59",3.99993333333333,0,0,0,0,0,1,3.99993333333333,0,0,0,0,6.90376613695811e-13,0,1,3.99993333333333,0,3.99993333333333,0,-12.1609139286412
|
||||
"thirtyminutes0047#2020-03-07 23:30:00,2020-03-07 23:59:59","thirtyminutes0047","2020-03-07 23:30:00","2020-03-07 23:59:59",5.9999,0,0,0,0,0,1,5.9999,5.9999,0,0,0,3.24233998346206e-11,5.9999,1,5.9999,0,0,0,-10.4891414480933
|
||||
|
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,16 +1,16 @@
|
|||
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_timeattop1location","phone_locations_doryab_totaldistance","phone_locations_doryab_timeattop3location","phone_locations_doryab_avgspeed","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_radiusgyration","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_locationvariance"
|
||||
"daily#2020-03-07 00:00:00,2020-03-07 23:59:59","daily","2020-03-07 00:00:00","2020-03-07 23:59:59",6,0.693147180559945,NA,6,NA,6,NA,1,6416036.08057409,1,0,NA,3.73564149377632,0,6,2,6,6,0.346573590279973,5440.53356226669
|
||||
"daily#2020-03-08 00:00:00,2020-03-08 23:59:59","daily","2020-03-08 00:00:00","2020-03-08 23:59:59",6,0,NA,0,NA,0,NA,0,0,1,6,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
|
||||
"daily#2020-10-31 00:00:00,2020-10-31 23:59:59","daily","2020-10-31 00:00:00","2020-10-31 23:59:59",6,0,NA,0,NA,6,NA,0,0,1,0,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
|
||||
"daily#2020-11-01 00:00:00,2020-11-01 23:59:59","daily","2020-11-01 00:00:00","2020-11-01 23:59:59",6,0.693147180559945,NA,6,NA,0,NA,1,6268829.80206745,1,6,NA,3.71826910068082,0,6,2,6,6,0.346573590279973,5227.19980200003
|
||||
"morning#2020-03-07 06:00:00,2020-03-07 11:59:59","morning","2020-03-07 06:00:00","2020-03-07 11:59:59",6,0,NA,0,NA,6,NA,0,0,1,0,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
|
||||
"morning#2020-03-08 06:00:00,2020-03-08 11:59:59","morning","2020-03-08 06:00:00","2020-03-08 11:59:59",6,0,NA,0,NA,0,NA,0,0,1,6,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
|
||||
"morning#2020-10-31 06:00:00,2020-10-31 11:59:59","morning","2020-10-31 06:00:00","2020-10-31 11:59:59",6,0,NA,0,NA,6,NA,0,0,1,0,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
|
||||
"morning#2020-11-01 06:00:00,2020-11-01 11:59:59","morning","2020-11-01 06:00:00","2020-11-01 11:59:59",6,0,NA,0,NA,0,NA,0,0,1,6,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
|
||||
"threeday#2020-03-07 00:00:00,2020-03-09 23:59:59","threeday","2020-03-07 00:00:00","2020-03-09 23:59:59",6,1.09861228866811,NA,6,NA,6,NA,2,8014514.68387131,1,6,NA,4.09019524373105,0,6,3,6,6,0.366204096222703,12308.2198130989
|
||||
"threeday#2020-03-08 00:00:00,2020-03-10 23:59:59","threeday","2020-03-08 00:00:00","2020-03-10 23:59:59",6,0,NA,0,NA,0,NA,0,0,1,6,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
|
||||
"threeday#2020-10-29 00:00:00,2020-10-31 23:59:59","threeday","2020-10-29 00:00:00","2020-10-31 23:59:59",6,0,NA,0,NA,6,NA,0,0,1,0,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
|
||||
"threeday#2020-10-30 00:00:00,2020-11-01 23:59:59","threeday","2020-10-30 00:00:00","2020-11-01 23:59:59",6,1.09861228866811,NA,6,NA,6,NA,2,8014514.68387131,1,6,NA,4.09019524373105,0,6,3,6,6,0.366204096222703,12308.2198130989
|
||||
"threeday#2020-10-31 00:00:00,2020-11-02 23:59:59","threeday","2020-10-31 00:00:00","2020-11-02 23:59:59",6,1.09861228866811,NA,6,NA,6,NA,2,8014514.68387131,1,6,NA,4.09019524373105,0,6,3,6,6,0.366204096222703,12308.2198130989
|
||||
"threeday#2020-11-01 00:00:00,2020-11-03 23:59:59","threeday","2020-11-01 00:00:00","2020-11-03 23:59:59",6,0.693147180559945,NA,6,NA,0,NA,1,6268829.80206745,1,6,NA,3.71826910068082,0,6,2,6,6,0.346573590279973,5227.19980200003
|
||||
"weekend#2020-10-30 00:00:00,2020-11-01 23:59:59","weekend","2020-10-30 00:00:00","2020-11-01 23:59:59",6,1.09861228866811,NA,6,NA,6,NA,2,8014514.68387131,1,6,NA,4.09019524373105,0,6,3,6,6,0.366204096222703,12308.2198130989
|
||||
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_loglocationvariance","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_varspeed","phone_locations_doryab_avgspeed","phone_locations_doryab_totaldistance","phone_locations_doryab_locationentropy","phone_locations_doryab_timeattop3location","phone_locations_doryab_timeattop2location","phone_locations_doryab_radiusgyration","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_locationvariance","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_timeathome"
|
||||
"daily#2020-03-07 00:00:00,2020-03-07 23:59:59","daily","2020-03-07 00:00:00","2020-03-07 23:59:59",5.9999,2,3.70821638665005,1,5.9999,0.346573590279973,0,0,5.9999,0,0,0,0.693147180559945,0,5.9999,6416036.08292984,1,5107.5942179249,5.9999,5.9999
|
||||
"daily#2020-03-08 00:00:00,2020-03-08 23:59:59","daily","2020-03-08 00:00:00","2020-03-08 23:59:59",0,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,5.9999,0,0,1,8.91364939388538e-13,5.9999,0
|
||||
"daily#2020-10-31 00:00:00,2020-10-31 23:59:59","daily","2020-10-31 00:00:00","2020-10-31 23:59:59",5.9999,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,0,0,0,1,8.91364939388538e-13,5.9999,0
|
||||
"daily#2020-11-01 00:00:00,2020-11-01 23:59:59","daily","2020-11-01 00:00:00","2020-11-01 23:59:59",0,2,3.69084399356179,1,5.9999,0.346573590279973,0,0,5.9999,0,0,0,0.693147180559945,5.9999,5.9999,6268829.80447871,1,4907.31564827534,5.9999,5.9999
|
||||
"morning#2020-03-07 06:00:00,2020-03-07 11:59:59","morning","2020-03-07 06:00:00","2020-03-07 11:59:59",5.9999,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,0,0,0,1,8.91364939388538e-13,5.9999,0
|
||||
"morning#2020-03-08 06:00:00,2020-03-08 11:59:59","morning","2020-03-08 06:00:00","2020-03-08 11:59:59",0,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,5.9999,0,0,1,8.91364939388538e-13,5.9999,0
|
||||
"morning#2020-10-31 06:00:00,2020-10-31 11:59:59","morning","2020-10-31 06:00:00","2020-10-31 11:59:59",5.9999,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,0,0,0,1,8.91364939388538e-13,5.9999,0
|
||||
"morning#2020-11-01 06:00:00,2020-11-01 11:59:59","morning","2020-11-01 06:00:00","2020-11-01 11:59:59",0,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,5.9999,0,0,1,8.91364939388538e-13,5.9999,0
|
||||
"threeday#2020-03-07 00:00:00,2020-03-09 23:59:59","threeday","2020-03-07 00:00:00","2020-03-09 23:59:59",5.9999,3,4.1253555318677,2,5.9999,0.366204096222703,0,0,5.9999,0,0,0,1.09861228866811,5.9999,5.9999,8014514.68257681,1,13346.1355628909,5.9999,5.9999
|
||||
"threeday#2020-03-08 00:00:00,2020-03-10 23:59:59","threeday","2020-03-08 00:00:00","2020-03-10 23:59:59",0,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,5.9999,0,0,1,8.91364939388538e-13,5.9999,0
|
||||
"threeday#2020-10-29 00:00:00,2020-10-31 23:59:59","threeday","2020-10-29 00:00:00","2020-10-31 23:59:59",5.9999,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,0,0,0,1,8.91364939388538e-13,5.9999,0
|
||||
"threeday#2020-10-30 00:00:00,2020-11-01 23:59:59","threeday","2020-10-30 00:00:00","2020-11-01 23:59:59",5.9999,3,4.1253555318677,2,5.9999,0.366204096222703,0,0,5.9999,0,0,0,1.09861228866811,5.9999,5.9999,8014514.68257681,1,13346.1355628909,5.9999,5.9999
|
||||
"threeday#2020-10-31 00:00:00,2020-11-02 23:59:59","threeday","2020-10-31 00:00:00","2020-11-02 23:59:59",5.9999,3,4.1253555318677,2,5.9999,0.366204096222703,0,0,5.9999,0,0,0,1.09861228866811,5.9999,5.9999,8014514.68257681,1,13346.1355628909,5.9999,5.9999
|
||||
"threeday#2020-11-01 00:00:00,2020-11-03 23:59:59","threeday","2020-11-01 00:00:00","2020-11-03 23:59:59",0,2,3.69084399356179,1,5.9999,0.346573590279973,0,0,5.9999,0,0,0,0.693147180559945,5.9999,5.9999,6268829.80447871,1,4907.31564827534,5.9999,5.9999
|
||||
"weekend#2020-10-30 00:00:00,2020-11-01 23:59:59","weekend","2020-10-30 00:00:00","2020-11-01 23:59:59",5.9999,3,4.1253555318677,2,5.9999,0.366204096222703,0,0,5.9999,0,0,0,1.09861228866811,5.9999,5.9999,8014514.68257681,1,13346.1355628909,5.9999,5.9999
|
||||
|
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,3 +1,3 @@
|
|||
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_loglocationvariance","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_locationvariance","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop2location","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_timeattop3location","phone_locations_doryab_locationentropy","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeattop1location","phone_locations_doryab_totaldistance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_timeathome"
|
||||
"beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00","beforeMarchEvent","2020-03-07 16:00:00","2020-03-08 15:00:00",3.71826910068082,2,1,6,NA,NA,6,5227.19980200003,NA,6,0.346573590279973,1,0,0.693147180559945,6,6268829.80206745,6,NA,0,6
|
||||
"beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00","beforeNovemberEvent","2020-10-31 16:00:00","2020-11-01 13:00:00",3.71826910068082,2,1,6,NA,NA,6,5227.19980200003,NA,6,0.346573590279973,1,0,0.693147180559945,6,6268829.80206745,6,NA,0,6
|
||||
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_timeattop1location","phone_locations_doryab_loglocationvariance","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_totaldistance","phone_locations_doryab_timeattop3location","phone_locations_doryab_timeathome","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_locationentropy","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_timeattop2location","phone_locations_doryab_radiusgyration","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_locationvariance","phone_locations_doryab_varspeed","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_numberofsignificantplaces"
|
||||
"beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00","beforeMarchEvent","2020-03-07 16:00:00","2020-03-08 15:00:00",0,3.69084399356179,5.9999,5.9999,0,5.9999,5.9999,0,0.346573590279973,1,0.693147180559945,5.9999,5.9999,6268829.80447871,0,4907.31564827534,0,0,1,2
|
||||
"beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00","beforeNovemberEvent","2020-10-31 16:00:00","2020-11-01 13:00:00",0,3.69084399356179,5.9999,5.9999,0,5.9999,5.9999,0,0.346573590279973,1,0.693147180559945,5.9999,5.9999,6268829.80447871,0,4907.31564827534,0,0,1,2
|
||||
|
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,9 +1,9 @@
|
|||
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_timeattop1location","phone_locations_doryab_varspeed","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_locationvariance","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_loglocationvariance","phone_locations_doryab_totaldistance","phone_locations_doryab_radiusgyration","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_timeattop2location","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_timeattop3location"
|
||||
"thirtyminutes0007#2020-03-08 03:30:00,2020-03-08 03:59:59","thirtyminutes0007","2020-03-08 03:30:00","2020-03-08 03:59:59",6,NA,6,0,3.8799999999484e-11,NA,6,6,1,1,-10.4111682744116,NA,0,0,0,0,0,6,NA,0
|
||||
"thirtyminutes0007#2020-11-01 03:30:00,2020-11-01 03:59:59","thirtyminutes0007","2020-11-01 03:30:00","2020-11-01 03:59:59",6,NA,6,0,3.8799999999484e-11,NA,6,6,1,1,-10.4111682744116,NA,0,0,0,0,0,6,NA,0
|
||||
"thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59","thirtyminutes0021","2020-03-07 10:30:00","2020-03-07 10:59:59",0,NA,3,0,4.99999997475243e-13,NA,3,NA,1,1,-12.301029997857,NA,0,0,0,0,0,3,NA,3
|
||||
"thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59","thirtyminutes0021","2020-10-31 10:30:00","2020-10-31 10:59:59",0,NA,3,0,4.99999997475243e-13,NA,3,NA,1,1,-12.301029997857,NA,0,0,0,0,0,3,NA,3
|
||||
"thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59","thirtyminutes0022","2020-03-07 11:00:00","2020-03-07 11:29:59",0,NA,3,0,9.99999994950485e-13,NA,3,NA,1,1,-12.000000002193,NA,0,0,0,0,0,3,NA,3
|
||||
"thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59","thirtyminutes0022","2020-10-31 11:00:00","2020-10-31 11:29:59",0,NA,3,0,9.99999994950485e-13,NA,3,NA,1,1,-12.000000002193,NA,0,0,0,0,0,3,NA,3
|
||||
"thirtyminutes0023#2020-03-08 11:30:00,2020-03-08 11:59:59","thirtyminutes0023","2020-03-08 11:30:00","2020-03-08 11:59:59",0,NA,6,0,1.58333332533827e-12,NA,6,NA,1,1,-11.8004276472878,NA,0,0,0,0,6,6,NA,0
|
||||
"thirtyminutes0023#2020-11-01 11:30:00,2020-11-01 11:59:59","thirtyminutes0023","2020-11-01 11:30:00","2020-11-01 11:59:59",0,NA,6,0,1.58333332533827e-12,NA,6,NA,1,1,-11.8004276472878,NA,0,0,0,0,6,6,NA,0
|
||||
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_locationentropy","phone_locations_doryab_timeattop1location","phone_locations_doryab_locationvariance","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_avgspeed","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop2location","phone_locations_doryab_timeattop3location","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_normalizedlocationentropy"
|
||||
"thirtyminutes0007#2020-03-08 03:30:00,2020-03-08 03:59:59","thirtyminutes0007","2020-03-08 03:30:00","2020-03-08 03:59:59",0,5.9999,0,-10.4891414480933,1,0,5.9999,0,5.9999,0,0,3.24233998346206e-11,1,0,5.9999,0,5.9999,0,0,0
|
||||
"thirtyminutes0007#2020-11-01 03:30:00,2020-11-01 03:59:59","thirtyminutes0007","2020-11-01 03:30:00","2020-11-01 03:59:59",0,5.9999,0,-10.4891414480933,1,0,5.9999,0,5.9999,0,0,3.24233998346206e-11,1,0,5.9999,0,5.9999,0,0,0
|
||||
"thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59","thirtyminutes0021","2020-03-07 10:30:00","2020-03-07 10:59:59",0,1.99996666666667,0,-12.5984256480393,1,0,1.99996666666667,0,0,0,1.99996666666667,2.52100874371982e-13,1,0,1.99996666666667,0,0,0,0,0
|
||||
"thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59","thirtyminutes0021","2020-10-31 10:30:00","2020-10-31 10:59:59",0,1.99996666666667,0,-12.5984256480393,1,0,1.99996666666667,0,0,0,1.99996666666667,2.52100874371982e-13,1,0,1.99996666666667,0,0,0,0,0
|
||||
"thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59","thirtyminutes0022","2020-03-07 11:00:00","2020-03-07 11:29:59",0,3.99993333333333,0,-12.1609139286412,1,0,3.99993333333333,0,0,0,3.99993333333333,6.90376613695811e-13,1,0,3.99993333333333,0,0,0,0,0
|
||||
"thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59","thirtyminutes0022","2020-10-31 11:00:00","2020-10-31 11:29:59",0,3.99993333333333,0,-12.1609139286412,1,0,3.99993333333333,0,0,0,3.99993333333333,6.90376613695811e-13,1,0,3.99993333333333,0,0,0,0,0
|
||||
"thirtyminutes0023#2020-03-08 11:30:00,2020-03-08 11:59:59","thirtyminutes0023","2020-03-08 11:30:00","2020-03-08 11:59:59",0,5.9999,0,-12.0499444522888,1,0,5.9999,0,0,0,0,8.91364939388538e-13,1,0,5.9999,0,0,5.9999,0,0
|
||||
"thirtyminutes0023#2020-11-01 11:30:00,2020-11-01 11:59:59","thirtyminutes0023","2020-11-01 11:30:00","2020-11-01 11:59:59",0,5.9999,0,-12.0499444522888,1,0,5.9999,0,0,0,0,8.91364939388538e-13,1,0,5.9999,0,0,5.9999,0,0
|
||||
|
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,16 +1,16 @@
|
|||
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_radiusgyration","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_locationvariance","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_timeattop2location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_totaldistance","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop3location","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_locationentropy","phone_locations_doryab_loglocationvariance","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_timeattop1location","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_avgspeed"
|
||||
"daily#2020-03-07 00:00:00,2020-03-07 23:59:59","daily","2020-03-07 00:00:00","2020-03-07 23:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA
|
||||
"daily#2020-03-08 00:00:00,2020-03-08 23:59:59","daily","2020-03-08 00:00:00","2020-03-08 23:59:59",0,6,6268829.80206745,NA,5227.19980200003,0.346573590279973,6,1,NA,NA,0,6,1,2,0.693147180559945,3.71826910068082,6,6,6,NA
|
||||
"daily#2020-10-31 00:00:00,2020-10-31 23:59:59","daily","2020-10-31 00:00:00","2020-10-31 23:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA
|
||||
"daily#2020-11-01 00:00:00,2020-11-01 23:59:59","daily","2020-11-01 00:00:00","2020-11-01 23:59:59",0,6,6268829.80206745,NA,5227.19980200003,0.346573590279973,6,1,NA,NA,0,6,1,2,0.693147180559945,3.71826910068082,6,6,6,NA
|
||||
"morning#2020-03-07 06:00:00,2020-03-07 11:59:59","morning","2020-03-07 06:00:00","2020-03-07 11:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA
|
||||
"morning#2020-03-08 06:00:00,2020-03-08 11:59:59","morning","2020-03-08 06:00:00","2020-03-08 11:59:59",0,NA,0,NA,1.58333332533827e-12,0,6,0,NA,NA,0,6,1,1,0,-11.8004276472878,6,0,6,NA
|
||||
"morning#2020-10-31 06:00:00,2020-10-31 11:59:59","morning","2020-10-31 06:00:00","2020-10-31 11:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA
|
||||
"morning#2020-11-01 06:00:00,2020-11-01 11:59:59","morning","2020-11-01 06:00:00","2020-11-01 11:59:59",0,NA,0,NA,1.58333332533827e-12,0,6,0,NA,NA,0,6,1,1,0,-11.8004276472878,6,0,6,NA
|
||||
"threeday#2020-03-07 00:00:00,2020-03-09 23:59:59","threeday","2020-03-07 00:00:00","2020-03-09 23:59:59",0,6,8014514.68387131,NA,12308.2198130989,0.366204096222703,6,2,NA,NA,6,6,1,3,1.09861228866811,4.09019524373105,6,6,6,NA
|
||||
"threeday#2020-03-08 00:00:00,2020-03-10 23:59:59","threeday","2020-03-08 00:00:00","2020-03-10 23:59:59",0,6,6268829.80206745,NA,5227.19980200003,0.346573590279973,6,1,NA,NA,0,6,1,2,0.693147180559945,3.71826910068082,6,6,6,NA
|
||||
"threeday#2020-10-29 00:00:00,2020-10-31 23:59:59","threeday","2020-10-29 00:00:00","2020-10-31 23:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA
|
||||
"threeday#2020-10-30 00:00:00,2020-11-01 23:59:59","threeday","2020-10-30 00:00:00","2020-11-01 23:59:59",0,6,8014514.68387131,NA,12308.2198130989,0.366204096222703,6,2,NA,NA,6,6,1,3,1.09861228866811,4.09019524373105,6,6,6,NA
|
||||
"threeday#2020-10-31 00:00:00,2020-11-02 23:59:59","threeday","2020-10-31 00:00:00","2020-11-02 23:59:59",0,6,8014514.68387131,NA,12308.2198130989,0.366204096222703,6,2,NA,NA,6,6,1,3,1.09861228866811,4.09019524373105,6,6,6,NA
|
||||
"threeday#2020-11-01 00:00:00,2020-11-03 23:59:59","threeday","2020-11-01 00:00:00","2020-11-03 23:59:59",0,6,6268829.80206745,NA,5227.19980200003,0.346573590279973,6,1,NA,NA,0,6,1,2,0.693147180559945,3.71826910068082,6,6,6,NA
|
||||
"weekend#2020-10-30 00:00:00,2020-11-01 23:59:59","weekend","2020-10-30 00:00:00","2020-11-01 23:59:59",0,6,8014514.68387131,NA,12308.2198130989,0.366204096222703,6,2,NA,NA,6,6,1,3,1.09861228866811,4.09019524373105,6,6,6,NA
|
||||
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_radiusgyration","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_locationvariance","phone_locations_doryab_avgspeed","phone_locations_doryab_locationentropy","phone_locations_doryab_timeattop2location","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_timeattop3location","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop1location","phone_locations_doryab_loglocationvariance","phone_locations_doryab_varspeed"
|
||||
"daily#2020-03-07 00:00:00,2020-03-07 23:59:59","daily","2020-03-07 00:00:00","2020-03-07 23:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,0,0,5.9999,0,0,1,5.9999,-12.0499444522888,0
|
||||
"daily#2020-03-08 00:00:00,2020-03-08 23:59:59","daily","2020-03-08 00:00:00","2020-03-08 23:59:59",5.9999,1,0,6268829.80447871,5.9999,4907.31564827534,0,0.693147180559945,5.9999,0.346573590279973,2,5.9999,0,5.9999,5.9999,0,1,0,3.69084399356179,0
|
||||
"daily#2020-10-31 00:00:00,2020-10-31 23:59:59","daily","2020-10-31 00:00:00","2020-10-31 23:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,0,0,5.9999,0,0,1,5.9999,-12.0499444522888,0
|
||||
"daily#2020-11-01 00:00:00,2020-11-01 23:59:59","daily","2020-11-01 00:00:00","2020-11-01 23:59:59",5.9999,1,0,6268829.80447871,5.9999,4907.31564827534,0,0.693147180559945,5.9999,0.346573590279973,2,5.9999,0,5.9999,5.9999,0,1,0,3.69084399356179,0
|
||||
"morning#2020-03-07 06:00:00,2020-03-07 11:59:59","morning","2020-03-07 06:00:00","2020-03-07 11:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,0,0,5.9999,0,0,1,5.9999,-12.0499444522888,0
|
||||
"morning#2020-03-08 06:00:00,2020-03-08 11:59:59","morning","2020-03-08 06:00:00","2020-03-08 11:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,5.9999,0,5.9999,0,0,1,0,-12.0499444522888,0
|
||||
"morning#2020-10-31 06:00:00,2020-10-31 11:59:59","morning","2020-10-31 06:00:00","2020-10-31 11:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,0,0,5.9999,0,0,1,5.9999,-12.0499444522888,0
|
||||
"morning#2020-11-01 06:00:00,2020-11-01 11:59:59","morning","2020-11-01 06:00:00","2020-11-01 11:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,5.9999,0,5.9999,0,0,1,0,-12.0499444522888,0
|
||||
"threeday#2020-03-07 00:00:00,2020-03-09 23:59:59","threeday","2020-03-07 00:00:00","2020-03-09 23:59:59",5.9999,2,0,8014514.68257681,5.9999,13346.1355628909,0,1.09861228866811,5.9999,0.366204096222703,3,5.9999,0,5.9999,5.9999,0,1,5.9999,4.1253555318677,0
|
||||
"threeday#2020-03-08 00:00:00,2020-03-10 23:59:59","threeday","2020-03-08 00:00:00","2020-03-10 23:59:59",5.9999,1,0,6268829.80447871,5.9999,4907.31564827534,0,0.693147180559945,5.9999,0.346573590279973,2,5.9999,0,5.9999,5.9999,0,1,0,3.69084399356179,0
|
||||
"threeday#2020-10-29 00:00:00,2020-10-31 23:59:59","threeday","2020-10-29 00:00:00","2020-10-31 23:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,0,0,5.9999,0,0,1,5.9999,-12.0499444522888,0
|
||||
"threeday#2020-10-30 00:00:00,2020-11-01 23:59:59","threeday","2020-10-30 00:00:00","2020-11-01 23:59:59",5.9999,2,0,8014514.68257681,5.9999,13346.1355628909,0,1.09861228866811,5.9999,0.366204096222703,3,5.9999,0,5.9999,5.9999,0,1,5.9999,4.1253555318677,0
|
||||
"threeday#2020-10-31 00:00:00,2020-11-02 23:59:59","threeday","2020-10-31 00:00:00","2020-11-02 23:59:59",5.9999,2,0,8014514.68257681,5.9999,13346.1355628909,0,1.09861228866811,5.9999,0.366204096222703,3,5.9999,0,5.9999,5.9999,0,1,5.9999,4.1253555318677,0
|
||||
"threeday#2020-11-01 00:00:00,2020-11-03 23:59:59","threeday","2020-11-01 00:00:00","2020-11-03 23:59:59",5.9999,1,0,6268829.80447871,5.9999,4907.31564827534,0,0.693147180559945,5.9999,0.346573590279973,2,5.9999,0,5.9999,5.9999,0,1,0,3.69084399356179,0
|
||||
"weekend#2020-10-30 00:00:00,2020-11-01 23:59:59","weekend","2020-10-30 00:00:00","2020-11-01 23:59:59",5.9999,2,0,8014514.68257681,5.9999,13346.1355628909,0,1.09861228866811,5.9999,0.366204096222703,3,5.9999,0,5.9999,5.9999,0,1,5.9999,4.1253555318677,0
|
||||
|
|
|
File diff suppressed because one or more lines are too long
|
@ -244,12 +244,12 @@ PHONE_LOCATIONS:
|
|||
LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
|
||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
|
||||
PROVIDERS:
|
||||
DORYAB:
|
||||
COMPUTE: True
|
||||
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
DBSCAN_EPS: 10 # meters
|
||||
DBSCAN_MINSAMPLES: 3
|
||||
THRESHOLD_STATIC : 1 # km/h
|
||||
|
@ -265,7 +265,6 @@ PHONE_LOCATIONS:
|
|||
BARNETT:
|
||||
COMPUTE: False
|
||||
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
|
||||
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
|
||||
SRC_SCRIPT: src/features/phone_locations/barnett/main.R
|
||||
|
|
|
@ -244,12 +244,12 @@ PHONE_LOCATIONS:
|
|||
LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
|
||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
|
||||
PROVIDERS:
|
||||
DORYAB:
|
||||
COMPUTE: True
|
||||
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
DBSCAN_EPS: 10 # meters
|
||||
DBSCAN_MINSAMPLES: 3
|
||||
THRESHOLD_STATIC : 1 # km/h
|
||||
|
@ -265,7 +265,6 @@ PHONE_LOCATIONS:
|
|||
BARNETT:
|
||||
COMPUTE: False
|
||||
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
|
||||
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
|
||||
SRC_SCRIPT: src/features/phone_locations/barnett/main.R
|
||||
|
|
|
@ -244,12 +244,12 @@ PHONE_LOCATIONS:
|
|||
LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
|
||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
|
||||
PROVIDERS:
|
||||
DORYAB:
|
||||
COMPUTE: True
|
||||
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
DBSCAN_EPS: 10 # meters
|
||||
DBSCAN_MINSAMPLES: 3
|
||||
THRESHOLD_STATIC : 1 # km/h
|
||||
|
@ -265,7 +265,6 @@ PHONE_LOCATIONS:
|
|||
BARNETT:
|
||||
COMPUTE: False
|
||||
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
|
||||
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
|
||||
SRC_SCRIPT: src/features/phone_locations/barnett/main.R
|
||||
|
|
|
@ -244,12 +244,12 @@ PHONE_LOCATIONS:
|
|||
LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
|
||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
|
||||
PROVIDERS:
|
||||
DORYAB:
|
||||
COMPUTE: True
|
||||
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
DBSCAN_EPS: 10 # meters
|
||||
DBSCAN_MINSAMPLES: 3
|
||||
THRESHOLD_STATIC : 1 # km/h
|
||||
|
@ -265,7 +265,6 @@ PHONE_LOCATIONS:
|
|||
BARNETT:
|
||||
COMPUTE: False
|
||||
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
|
||||
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
|
||||
SRC_SCRIPT: src/features/phone_locations/barnett/main.R
|
||||
|
|
|
@ -244,12 +244,12 @@ PHONE_LOCATIONS:
|
|||
LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
|
||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
|
||||
PROVIDERS:
|
||||
DORYAB:
|
||||
COMPUTE: True
|
||||
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
DBSCAN_EPS: 10 # meters
|
||||
DBSCAN_MINSAMPLES: 3
|
||||
THRESHOLD_STATIC : 1 # km/h
|
||||
|
@ -265,7 +265,6 @@ PHONE_LOCATIONS:
|
|||
BARNETT:
|
||||
COMPUTE: False
|
||||
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
|
||||
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
|
||||
SRC_SCRIPT: src/features/phone_locations/barnett/main.R
|
||||
|
|
|
@ -244,12 +244,12 @@ PHONE_LOCATIONS:
|
|||
LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
|
||||
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
|
||||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
|
||||
PROVIDERS:
|
||||
DORYAB:
|
||||
COMPUTE: True
|
||||
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
DBSCAN_EPS: 10 # meters
|
||||
DBSCAN_MINSAMPLES: 3
|
||||
THRESHOLD_STATIC : 1 # km/h
|
||||
|
@ -265,7 +265,6 @@ PHONE_LOCATIONS:
|
|||
BARNETT:
|
||||
COMPUTE: False
|
||||
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
|
||||
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
|
||||
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
|
||||
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
|
||||
SRC_SCRIPT: src/features/phone_locations/barnett/main.R
|
||||
|
|
|
@ -626,7 +626,7 @@ properties:
|
|||
|
||||
PHONE_LOCATIONS:
|
||||
type: object
|
||||
required: [CONTAINER, LOCATIONS_TO_USE, FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD, PROVIDERS]
|
||||
required: [CONTAINER, LOCATIONS_TO_USE, FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD, FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION, ACCURACY_LIMIT, PROVIDERS]
|
||||
properties:
|
||||
CONTAINER:
|
||||
type: string
|
||||
|
@ -639,6 +639,9 @@ properties:
|
|||
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION:
|
||||
type: integer
|
||||
exclusiveMinimum: 0
|
||||
ACCURACY_LIMIT:
|
||||
type: integer
|
||||
exclusiveMinimum: 0
|
||||
PROVIDERS:
|
||||
type: ["null", object]
|
||||
properties:
|
||||
|
@ -652,9 +655,6 @@ properties:
|
|||
items:
|
||||
type: string
|
||||
enum: [locationvariance,loglocationvariance,totaldistance,avgspeed,varspeed,numberofsignificantplaces,numberlocationtransitions,radiusgyration,timeattop1location,timeattop2location,timeattop3location,movingtostaticratio,outlierstimepercent,maxlengthstayatclusters,minlengthstayatclusters,avglengthstayatclusters,stdlengthstayatclusters,locationentropy,normalizedlocationentropy,timeathome,homelabel]
|
||||
ACCURACY_LIMIT:
|
||||
type: integer
|
||||
exclusiveMinimum: 0
|
||||
DBSCAN_EPS:
|
||||
type: integer
|
||||
exclusiveMinimum: 0
|
||||
|
@ -695,9 +695,6 @@ properties:
|
|||
items:
|
||||
type: string
|
||||
enum: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
|
||||
ACCURACY_LIMIT:
|
||||
type: integer
|
||||
exclusiveMinimum: 0
|
||||
IF_MULTIPLE_TIMEZONES:
|
||||
type: string
|
||||
enum: [USE_MOST_COMMON]
|
||||
|
|
Loading…
Reference in New Issue