Merge branch 'feature/phone_locations_refactor' into develop

pull/167/head
Meng Li 2021-09-23 18:22:11 -04:00
commit c7e8777a6e
39 changed files with 358 additions and 334 deletions

View File

@ -217,7 +217,8 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
if provider == "BARNETT": if provider == "BARNETT":
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_barnett_daily.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_locations_barnett_daily.csv", pid=config["PIDS"]))
if provider == "DORYAB": if provider == "DORYAB":
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns_episodes.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"]))

View File

@ -241,12 +241,12 @@ PHONE_LOCATIONS:
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy equal or higher than this. This number means there's a 68% probability the true location is within this radius
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: False COMPUTE: False
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome", "homelabel"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome", "homelabel"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 100 # meters DBSCAN_EPS: 100 # meters
DBSCAN_MINSAMPLES: 5 DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
@ -262,7 +262,6 @@ PHONE_LOCATIONS:
BARNETT: BARNETT:
COMPUTE: False COMPUTE: False
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"] FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
SRC_SCRIPT: src/features/phone_locations/barnett/main.R SRC_SCRIPT: src/features/phone_locations/barnett/main.R

View File

@ -1,6 +1,8 @@
# Change Log # Change Log
## v1.6.0 ## v1.6.0
- Refactor PHONE_CALLS RAPIDS provider to compute features based on call episodes or events - Refactor PHONE_CALLS RAPIDS provider to compute features based on call episodes or events
- Refactor PHONE_LOCATIONS DORYAB provider to compute features based on location episodes
- Temporary revert PHONE_LOCATIONS BARNETT provider to use R script
## v1.5.0 ## v1.5.0
- Update Barnett location features with faster Python implementation - Update Barnett location features with faster Python implementation
- Fix rounding bug in data yield features - Fix rounding bug in data yield features

View File

@ -18,7 +18,7 @@ The following is a list of the sensors that testing is currently available.
| Phone Conversation | RAPIDS | Y | Y | Y | | Phone Conversation | RAPIDS | Y | Y | Y |
| Phone Data Yield | RAPIDS | Y | Y | Y | | Phone Data Yield | RAPIDS | Y | Y | Y |
| Phone Light | RAPIDS | Y | Y | Y | | Phone Light | RAPIDS | Y | Y | Y |
| Phone Locations | Doryab | N | N | N | | Phone Locations | Doryab | Y | Y | Y |
| Phone Locations | Barnett | N | N | N | | Phone Locations | Barnett | N | N | N |
| Phone Messages | RAPIDS | Y | Y | Y | | Phone Messages | RAPIDS | Y | Y | Y |
| Phone Screen | RAPIDS | Y | Y | Y | | Phone Screen | RAPIDS | Y | Y | Y |

View File

@ -23,7 +23,7 @@ def get_barnett_daily(wildcards):
def get_locations_python_input(wildcards): def get_locations_python_input(wildcards):
if wildcards.provider_key.upper() == "DORYAB": if wildcards.provider_key.upper() == "DORYAB":
return "data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns.csv" return "data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns_episodes_resampled_with_datetime.csv"
else: else:
return "data/interim/{pid}/phone_locations_processed_with_datetime.csv" return "data/interim/{pid}/phone_locations_processed_with_datetime.csv"

View File

@ -382,7 +382,7 @@ rule phone_locations_add_doryab_extra_columns:
params: params:
provider = config["PHONE_LOCATIONS"]["PROVIDERS"]["DORYAB"] provider = config["PHONE_LOCATIONS"]["PROVIDERS"]["DORYAB"]
output: output:
"data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns.csv" "data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns_episodes.csv"
script: script:
"../src/features/phone_locations/doryab/add_doryab_extra_columns.py" "../src/features/phone_locations/doryab/add_doryab_extra_columns.py"
@ -408,7 +408,7 @@ rule phone_locations_barnett_daily_features:
output: output:
"data/interim/{pid}/phone_locations_barnett_daily.csv" "data/interim/{pid}/phone_locations_barnett_daily.csv"
script: script:
"../src/features/phone_locations/barnett/daily_features.py" "../src/features/phone_locations/barnett/daily_features.R"
rule phone_locations_r_features: rule phone_locations_r_features:
input: input:

View File

@ -98,7 +98,8 @@ rule process_phone_locations_types:
params: params:
consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"], consecutive_threshold = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD"],
time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"], time_since_valid_location = config["PHONE_LOCATIONS"]["FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION"],
locations_to_use = config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] locations_to_use = config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"],
accuracy_limit = config["PHONE_LOCATIONS"]["ACCURACY_LIMIT"]
output: output:
"data/interim/{pid}/phone_locations_processed.csv" "data/interim/{pid}/phone_locations_processed.csv"
script: script:

View File

@ -6,9 +6,10 @@ library(tidyr)
consecutive_threshold <- snakemake@params[["consecutive_threshold"]] consecutive_threshold <- snakemake@params[["consecutive_threshold"]]
time_since_valid_location <- snakemake@params[["time_since_valid_location"]] time_since_valid_location <- snakemake@params[["time_since_valid_location"]]
locations_to_use <- snakemake@params[["locations_to_use"]] locations_to_use <- snakemake@params[["locations_to_use"]]
accuracy_limit <- snakemake@params[["accuracy_limit"]]
locations <- read.csv(snakemake@input[["locations"]]) %>% locations <- read.csv(snakemake@input[["locations"]]) %>%
filter(double_latitude != 0 & double_longitude != 0) %>% filter(double_latitude != 0 & double_longitude != 0 & accuracy < accuracy_limit) %>%
drop_na(double_longitude, double_latitude) %>% drop_na(double_longitude, double_latitude) %>%
group_by(timestamp) %>% # keep only the row with the best accuracy if two or more have the same timestamp group_by(timestamp) %>% # keep only the row with the best accuracy if two or more have the same timestamp
filter(accuracy == min(accuracy, na.rm=TRUE)) %>% filter(accuracy == min(accuracy, na.rm=TRUE)) %>%
@ -63,7 +64,7 @@ if(locations_to_use == "ALL"){
# you can think of consecutive_threshold as the period a location row is valid for # you can think of consecutive_threshold as the period a location row is valid for
mutate(limit = pmin(lead(timestamp, default = 9999999999999) - 1, limit + (1000 * 60 * consecutive_threshold)), mutate(limit = pmin(lead(timestamp, default = 9999999999999) - 1, limit + (1000 * 60 * consecutive_threshold)),
n_resample = (limit - timestamp)%/%60001, n_resample = (limit - timestamp)%/%60001,
n_resample = if_else(n_resample == 0, 1, n_resample)) %>% n_resample = n_resample + 1) %>%
drop_na(double_longitude, double_latitude) %>% drop_na(double_longitude, double_latitude) %>%
uncount(weights = n_resample, .id = "id") %>% uncount(weights = n_resample, .id = "id") %>%
mutate(provider = if_else(id > 1, "resampled", provider), mutate(provider = if_else(id > 1, "resampled", provider),

View File

@ -20,16 +20,14 @@ barnett_daily_features <- function(snakemake){
location_features <- NULL location_features <- NULL
location <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE) location <- read.csv(snakemake@input[["sensor_data"]], stringsAsFactors = FALSE)
segment_labels <- read.csv(snakemake@input[["time_segments_labels"]], stringsAsFactors = FALSE) segment_labels <- read.csv(snakemake@input[["time_segments_labels"]], stringsAsFactors = FALSE)
accuracy_limit <- snakemake@params[["provider"]][["ACCURACY_LIMIT"]] accuracy_limit = 999999999 # We filter rows based on accuracy in src/data/process_location_types.R script
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00" datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59" datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
location <- location %>% location <- location %>%
filter(accuracy < accuracy_limit) %>%
mutate(is_daily = str_detect(assigned_segments, paste0(".*#", datetime_start_regex, ",", datetime_end_regex, ".*"))) mutate(is_daily = str_detect(assigned_segments, paste0(".*#", datetime_start_regex, ",", datetime_end_regex, ".*")))
if(nrow(segment_labels) == 0 || nrow(location) == 0 || all(location$is_daily == FALSE) || (max(location$timestamp) - min(location$timestamp) < 86400000)){ if(nrow(segment_labels) == 0 || nrow(location) == 0 || all(location$is_daily == FALSE) || (max(location$timestamp) - min(location$timestamp) < 86400000)){
warning("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:", warning("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:",
"\nLocation data rows within accuracy: ", nrow(location %>% filter(accuracy < accuracy_limit)),
"\nLocation data rows within a daily time segment: ", nrow(filter(location, is_daily)), "\nLocation data rows within a daily time segment: ", nrow(filter(location, is_daily)),
"\nLocation data time span in days: ", round((max(location$timestamp) - min(location$timestamp)) / 86400000, 2) "\nLocation data time span in days: ", round((max(location$timestamp) - min(location$timestamp)) / 86400000, 2)
) )

View File

@ -3,33 +3,28 @@ from statistics import mode
import warnings import warnings
def barnett_daily_features(snakemake): def barnett_daily_features(snakemake):
accuracy_limit = 999999999 # We filter rows based on accuracy in src/data/process_location_types.R script
location_data = pd.read_csv(snakemake.input["sensor_data"]) location_data = pd.read_csv(snakemake.input["sensor_data"])
accuracy_limit = snakemake.params["provider"]["ACCURACY_LIMIT"]
datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
segment_regex = ".*#{},{}".format(datetime_start_regex, datetime_end_regex)
location_data = location_data[location_data["assigned_segments"].str.match(segment_regex)]
loc_daily_data_len = len(location_data)
location_data.query("accuracy < @accuracy_limit", inplace=True)
features_to_compute = ["local_date", "hometime", "disttravelled", "rog", "maxdiam", "maxhomedist", "siglocsvisited", "avgflightlen", "stdflightlen", "avgflightdur", "stdflightdur", "probpause", "siglocentropy", "minsmissing", "circdnrtn", "wkenddayrtn", "minutes_data_used"] features_to_compute = ["local_date", "hometime", "disttravelled", "rog", "maxdiam", "maxhomedist", "siglocsvisited", "avgflightlen", "stdflightlen", "avgflightdur", "stdflightdur", "probpause", "siglocentropy", "minsmissing", "circdnrtn", "wkenddayrtn", "minutes_data_used"]
nrows = len(location_data) location_features = pd.DataFrame(columns=features_to_compute)
if loc_daily_data_len == 0: if len(location_data) == 0:
warnings.warn("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). This participant does not have location data or it spans less than 24 hours.") warnings.warn("Barnett's location features cannot be computed because the input data is empty.")
location_features = pd.DataFrame(columns=features_to_compute)
elif nrows == 0:
warnings.warn("Barnett's location features cannot be computed because there are no rows with an accuracy value lower than ACCURACY_LIMIT: {}".format(accuracy_limit))
location_features = pd.DataFrame(columns=features_to_compute)
else: else:
location_minutes_used = location_data.groupby(["local_date", "local_hour"])[["local_minute"]].nunique().reset_index().groupby("local_date").sum()[["local_minute"]].rename(columns={"local_minute": "minutes_data_used"}) datetime_start_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 00:00:00"
timezone = mode(location_data["local_timezone"].values) datetime_end_regex = "[0-9]{4}[\\-|\\/][0-9]{2}[\\-|\\/][0-9]{2} 23:59:59"
location_df = location_data[["timestamp", "double_latitude", "double_longitude", "double_altitude", "accuracy"]] segment_regex = ".*#{},{}".format(datetime_start_regex, datetime_end_regex)
location_df.rename(columns={"double_latitude": "latitude", "double_longitude": "longitude", "double_altitude": "altitude"}) location_data = location_data[location_data["assigned_segments"].str.match(segment_regex)]
output_mobility = run_barnett_features_for_rapids(location_df, accuracy_limit=accuracy_limit, timezone=timezone) #make local_date as the index for the output_mobility dataframe
location_features = output_mobility.merge(location_minutes_used, on="local_date", how="left") if len(location_data) == 0:
warnings.warn("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59).")
else:
location_minutes_used = location_data.groupby(["local_date", "local_hour"])[["local_minute"]].nunique().reset_index().groupby("local_date").sum()[["local_minute"]].rename(columns={"local_minute": "minutes_data_used"})
timezone = mode(location_data["local_timezone"].values)
location_df = location_data[["timestamp", "double_latitude", "double_longitude", "double_altitude", "accuracy"]]
location_df.rename(columns={"double_latitude": "latitude", "double_longitude": "longitude", "double_altitude": "altitude"})
output_mobility = run_barnett_features_for_rapids(location_df, accuracy_limit=accuracy_limit, timezone=timezone) #make local_date as the index for the output_mobility dataframe
location_features = output_mobility.merge(location_minutes_used, on="local_date", how="left")
location_features.reset_index(inplace=True) location_features.reset_index(inplace=True)
location_features.to_csv(snakemake.output[0], index=False) location_features.to_csv(snakemake.output[0], index=False)

View File

@ -106,7 +106,6 @@ def infer_home_location(location_data, clustering_algorithm, hyperparameters, st
location_data = pd.read_csv(snakemake.input["sensor_input"]) location_data = pd.read_csv(snakemake.input["sensor_input"])
provider = snakemake.params["provider"] provider = snakemake.params["provider"]
accuracy_limit = provider["ACCURACY_LIMIT"]
maximum_row_gap = provider["MAXIMUM_ROW_GAP"] maximum_row_gap = provider["MAXIMUM_ROW_GAP"]
dbscan_eps = provider["DBSCAN_EPS"] dbscan_eps = provider["DBSCAN_EPS"]
dbscan_minsamples = provider["DBSCAN_MINSAMPLES"] dbscan_minsamples = provider["DBSCAN_MINSAMPLES"]
@ -116,12 +115,6 @@ cluster_on = provider["CLUSTER_ON"]
strategy = provider["INFER_HOME_LOCATION_STRATEGY"] strategy = provider["INFER_HOME_LOCATION_STRATEGY"]
days_threshold = provider["MINIMUM_DAYS_TO_DETECT_HOME_CHANGES"] days_threshold = provider["MINIMUM_DAYS_TO_DETECT_HOME_CHANGES"]
rows_before_accuracy_filter = len(location_data)
location_data = location_data[location_data["accuracy"] < accuracy_limit]
if rows_before_accuracy_filter > 0 and len(location_data) == 0:
warnings.warn("Cannot compute Doryab location features because there are no rows with an accuracy value lower than ACCURACY_LIMIT: {}".format(accuracy_limit))
if not location_data.timestamp.is_monotonic: if not location_data.timestamp.is_monotonic:
location_data.sort_values(by=["timestamp"], inplace=True) location_data.sort_values(by=["timestamp"], inplace=True)
@ -133,8 +126,12 @@ location_data = mark_as_stationary(location_data, threshold_static)
hyperparameters = create_clustering_hyperparameters(clustering_algorithm, dbscan_eps, dbscan_minsamples) hyperparameters = create_clustering_hyperparameters(clustering_algorithm, dbscan_eps, dbscan_minsamples)
location_data_with_doryab_columns = infer_home_location(location_data, clustering_algorithm, hyperparameters, strategy, days_threshold) location_data_with_doryab_columns = infer_home_location(location_data, clustering_algorithm, hyperparameters, strategy, days_threshold)
selected_columns = ["local_timezone", "device_id", "start_timestamp", "end_timestamp", "provider", "double_latitude", "double_longitude", "distance", "speed", "is_stationary", "distance_from_home", "home_label"]
if cluster_on == "PARTICIPANT_DATASET": if cluster_on == "PARTICIPANT_DATASET":
location_data_with_doryab_columns = cluster(location_data_with_doryab_columns, clustering_algorithm, **hyperparameters) location_data_with_doryab_columns = cluster(location_data_with_doryab_columns, clustering_algorithm, **hyperparameters)
selected_columns.append("cluster_label")
location_data_with_doryab_columns.to_csv(snakemake.output[0], index=False) # Prepare for episodes
location_data_with_doryab_columns = location_data_with_doryab_columns.rename(columns={"timestamp": "start_timestamp"})
location_data_with_doryab_columns["end_timestamp"] = (location_data_with_doryab_columns["start_timestamp"] + location_data_with_doryab_columns["duration_in_seconds"] * 1000 - 1).astype(int)
location_data_with_doryab_columns[selected_columns].to_csv(snakemake.output[0], index=False)

View File

@ -26,14 +26,14 @@ def meters_to_degrees(distance):
# Relabel clusters: -1 denotes the outliers (insignificant or rarely visited locations), 1 denotes the most visited significant location, 2 denotes the 2nd most significant location,... # Relabel clusters: -1 denotes the outliers (insignificant or rarely visited locations), 1 denotes the most visited significant location, 2 denotes the 2nd most significant location,...
def label(location_data): def label(location_data):
# Exclude outliers (cluster_label = -1) while counting number of locations in a cluster # Exclude outliers (cluster_label = -1) while calculating the total duration of locations in a cluster
label2count = pd.DataFrame({"count": location_data["cluster_label"].replace(-1, np.nan).value_counts(ascending=False, sort=True)}) label2duration = location_data[["cluster_label", "duration"]].replace(-1, np.nan).groupby("cluster_label")[["duration"]].sum().sort_values(by=["duration"], ascending=False)
# Add the row number as the new cluster label since value_counts() will order it by default # Add the row number as the new cluster label
label2count["new_cluster_label"] = np.arange(len(label2count)) + 1 label2duration["new_cluster_label"] = np.arange(len(label2duration)) + 1
# Still use -1 to denote the outliers # Still use -1 to denote the outliers
label2count.loc[-1, "new_cluster_label"] = -1 label2duration.loc[-1, "new_cluster_label"] = -1
# Merge the new cluster label with the original location data # Merge the new cluster label with the original location data
location_data = location_data.merge(label2count[["new_cluster_label"]], left_on="cluster_label", right_index=True, how="left") location_data = location_data.merge(label2duration[["new_cluster_label"]], left_on="cluster_label", right_index=True, how="left")
del location_data["cluster_label"] del location_data["cluster_label"]
location_data.rename(columns={"new_cluster_label": "cluster_label"}, inplace=True) location_data.rename(columns={"new_cluster_label": "cluster_label"}, inplace=True)
@ -54,24 +54,28 @@ def cluster(location_data, clustering_algorithm, **kwargs):
if location_data.empty: if location_data.empty:
return pd.DataFrame(columns=location_data.columns.tolist() + ["is_stationary", "cluster_label"]) return pd.DataFrame(columns=location_data.columns.tolist() + ["is_stationary", "cluster_label"])
if "duration" not in location_data.columns:
# Convert second to minute
location_data = location_data.assign(duration=location_data["duration_in_seconds"] / 60)
# Only keep stationary samples for clustering # Only keep stationary samples for clustering
stationary_data = location_data[location_data["is_stationary"] == 1][["double_latitude", "double_longitude", "is_stationary"]] stationary_data = location_data[location_data["is_stationary"] == 1][["double_latitude", "double_longitude", "duration"]]
# Remove duplicates and apply sample_weight (only available for DBSCAN currently) to reduce memory usage # Remove duplicates and apply sample_weight (only available for DBSCAN currently) to reduce memory usage
stationary_data_dedup = stationary_data.groupby(["double_latitude", "double_longitude", "is_stationary"]).size().reset_index() stationary_data_dedup = stationary_data.groupby(["double_latitude", "double_longitude"])[["duration"]].sum().reset_index()
lat_lon = stationary_data_dedup[["double_latitude", "double_longitude"]].values lat_lon_dedup = stationary_data_dedup[["double_latitude", "double_longitude"]].values
if stationary_data_dedup.shape[0] < kwargs["min_samples"]: if stationary_data_dedup.shape[0] < kwargs["min_samples"]:
cluster_results = np.array([-1] * stationary_data_dedup.shape[0]) cluster_results = np.array([-1] * stationary_data_dedup.shape[0])
elif clustering_algorithm == "DBSCAN": elif clustering_algorithm == "DBSCAN":
clusterer = DBSCAN(**kwargs) clusterer = DBSCAN(**kwargs)
cluster_results = clusterer.fit_predict(lat_lon, sample_weight=stationary_data_dedup[0]) cluster_results = clusterer.fit_predict(lat_lon_dedup, sample_weight=stationary_data_dedup["duration"])
else: # OPTICS else: # OPTICS
clusterer = OPTICS(**kwargs) clusterer = OPTICS(**kwargs)
cluster_results = clusterer.fit_predict(lat_lon) cluster_results = clusterer.fit_predict(lat_lon_dedup)
# Add cluster labels # Add cluster labels
stationary_data_dedup["cluster_label"] = cluster_results stationary_data_dedup["cluster_label"] = cluster_results
location_data_with_labels = label(location_data.merge(stationary_data_dedup[["double_latitude", "double_longitude", "is_stationary", "cluster_label"]], how="left", on=["double_latitude", "double_longitude", "is_stationary"])) location_data_with_labels = label(location_data.merge(stationary_data_dedup[["double_latitude", "double_longitude", "cluster_label"]], how="left", on=["double_latitude", "double_longitude"]))
return location_data_with_labels return location_data_with_labels

View File

@ -1,3 +1,4 @@
from datetime import time
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from phone_locations.doryab.doryab_clustering import haversine, create_clustering_hyperparameters, cluster from phone_locations.doryab.doryab_clustering import haversine, create_clustering_hyperparameters, cluster
@ -9,7 +10,7 @@ def apply_cluster_strategy(location_data, time_segment, clustering_algorithm, db
hyperparameters = create_clustering_hyperparameters(clustering_algorithm, dbscan_eps, dbscan_minsamples) hyperparameters = create_clustering_hyperparameters(clustering_algorithm, dbscan_eps, dbscan_minsamples)
if cluster_on == "PARTICIPANT_DATASET": if cluster_on == "PARTICIPANT_DATASET":
# clusters are created in cluster_accross_participant_dataset.py script # clusters are created in add_doryab_extra_columns.py script
location_data = filter_data_by_segment(location_data, time_segment) location_data = filter_data_by_segment(location_data, time_segment)
elif cluster_on == "TIME_SEGMENT": elif cluster_on == "TIME_SEGMENT":
location_data = filter_data_by_segment(location_data, time_segment) location_data = filter_data_by_segment(location_data, time_segment)
@ -21,28 +22,59 @@ def apply_cluster_strategy(location_data, time_segment, clustering_algorithm, db
location_data_per_group = cluster(instance_data, clustering_algorithm, **hyperparameters) location_data_per_group = cluster(instance_data, clustering_algorithm, **hyperparameters)
location_data_clusters = pd.concat([location_data_per_group, location_data_clusters]) location_data_clusters = pd.concat([location_data_per_group, location_data_clusters])
location_data = location_data_clusters location_data = location_data_clusters
return location_data return location_data
def variance_and_logvariance_features(location_data, location_features):
location_data_grouped = location_data.groupby("local_segment")
location_data["latitude_X_duration"] = location_data["double_latitude"] * location_data["duration"]
location_data["longitude_X_duration"] = location_data["double_longitude"] * location_data["duration"]
location_data["latitude_wavg"] = location_data_grouped["latitude_X_duration"].transform("sum") / location_data_grouped["duration"].transform("sum")
location_data["longitude_wavg"] = location_data_grouped["longitude_X_duration"].transform("sum") / location_data_grouped["duration"].transform("sum")
location_data["latitude_for_wvar"] = (location_data["double_latitude"] - location_data["latitude_wavg"]) ** 2 * location_data["duration"] * 60
location_data["longitude_for_wvar"] = (location_data["double_longitude"] - location_data["longitude_wavg"]) ** 2 * location_data["duration"] * 60
location_features["locationvariance"] = ((location_data_grouped["latitude_for_wvar"].sum() + location_data_grouped["longitude_for_wvar"].sum()) / (location_data_grouped["duration"].sum() * 60 - 1)).fillna(0)
location_features["loglocationvariance"] = np.log10(location_features["locationvariance"]).replace(-np.inf, np.nan)
return location_features
def distance_and_speed_features(moving_data): def distance_and_speed_features(moving_data):
distance_and_speed = moving_data[["local_segment", "distance"]].groupby(["local_segment"]).sum().rename(columns={"distance": "totaldistance"}) distance_and_speed = moving_data[["local_segment", "distance"]].groupby(["local_segment"]).sum().rename(columns={"distance": "totaldistance"})
moving_data_grouped = moving_data[["local_segment", "speed"]].groupby(["local_segment"]) moving_data_grouped = moving_data.groupby(["local_segment"])
distance_and_speed["avgspeed"] = moving_data_grouped["speed"].mean()
distance_and_speed["varspeed"] = moving_data_grouped["speed"].var() moving_data["speed_X_duration"] = moving_data["speed"] * moving_data["duration"]
distance_and_speed["avgspeed"] = moving_data_grouped["speed_X_duration"].sum() / moving_data_grouped["duration"].sum()
moving_data["speed_wavg"] = moving_data_grouped["speed_X_duration"].transform("sum") / moving_data_grouped["duration"].transform("sum")
moving_data["speed_for_wvar"] = (moving_data["speed"] - moving_data["speed_wavg"]) ** 2 * moving_data["duration"] * 60
distance_and_speed["varspeed"] = moving_data_grouped["speed_for_wvar"].sum() / (moving_data_grouped["duration"].sum() * 60 - 1)
return distance_and_speed return distance_and_speed
def radius_of_gyration(location_data): def radius_of_gyration(location_data):
if location_data.empty:
return np.nan
# define a lambda function to compute the weighted mean for each cluster
weighted_mean = lambda x: np.average(x, weights=location_data.loc[x.index, "duration"])
# center is the centroid of the places visited during a segment instance, not the home location # center is the centroid of the places visited during a segment instance, not the home location
clusters = location_data.groupby(["local_segment", "cluster_label"]).agg( clusters = location_data.groupby(["local_segment", "cluster_label"]).agg(
double_latitude=("double_latitude", "mean"), double_latitude=("double_latitude", weighted_mean),
double_longitude=("double_longitude", "mean"), double_longitude=("double_longitude", weighted_mean),
time_in_a_cluster=("duration_in_seconds", "sum") time_in_a_cluster=("duration", "sum")
).reset_index() ).reset_index()
clusters[["centroid_double_latitude", "centroid_double_longitude"]] = clusters.groupby(["local_segment"], sort=False)[["double_latitude", "double_longitude"]].transform("mean") # redefine the lambda function to compute the weighted mean across clusters
weighted_mean = lambda x: np.average(x, weights=clusters.loc[x.index, "time_in_a_cluster"])
clusters[["centroid_double_latitude", "centroid_double_longitude"]] = clusters.groupby(["local_segment"], sort=False)[["double_latitude", "double_longitude"]].transform(weighted_mean)
clusters["distance_squared"] = haversine(clusters["double_longitude"], clusters["double_latitude"], clusters["centroid_double_longitude"], clusters["centroid_double_latitude"]) ** 2 clusters["distance_squared"] = haversine(clusters["double_longitude"], clusters["double_latitude"], clusters["centroid_double_longitude"], clusters["centroid_double_latitude"]) ** 2
clusters["distance_squared_X_time_in_a_cluster"] = clusters["distance_squared"] * clusters["time_in_a_cluster"] clusters["distance_squared_X_time_in_a_cluster"] = clusters["distance_squared"] * clusters["time_in_a_cluster"]
@ -57,24 +89,23 @@ def cluster_stay(x, stay_at_clusters, cluster_n):
def stay_at_topn_clusters(location_data): def stay_at_topn_clusters(location_data):
stay_at_clusters = location_data[["local_segment", "cluster_label", "duration_in_seconds"]].groupby(["local_segment", "cluster_label"], sort=True).sum().reset_index() stay_at_clusters = location_data[["local_segment", "cluster_label", "duration"]].groupby(["local_segment", "cluster_label"], sort=True).sum().reset_index()
stay_at_clusters["duration_in_minutes"] = stay_at_clusters["duration_in_seconds"] / 60
stay_at_clusters_features = stay_at_clusters.groupby(["local_segment"]).agg( stay_at_clusters_features = stay_at_clusters.groupby(["local_segment"]).agg(
timeattop1location=("duration_in_minutes", lambda x: cluster_stay(x, stay_at_clusters, 1)), timeattop1location=("duration", lambda x: cluster_stay(x, stay_at_clusters, 1)),
timeattop2location=("duration_in_minutes", lambda x: cluster_stay(x, stay_at_clusters, 2)), timeattop2location=("duration", lambda x: cluster_stay(x, stay_at_clusters, 2)),
timeattop3location=("duration_in_minutes", lambda x: cluster_stay(x, stay_at_clusters, 3)), timeattop3location=("duration", lambda x: cluster_stay(x, stay_at_clusters, 3)),
maxlengthstayatclusters=("duration_in_minutes", "max"), maxlengthstayatclusters=("duration", "max"),
minlengthstayatclusters=("duration_in_minutes", "min"), minlengthstayatclusters=("duration", "min"),
avglengthstayatclusters=("duration_in_minutes", "mean"), avglengthstayatclusters=("duration", "mean"),
stdlengthstayatclusters=("duration_in_minutes", "std") stdlengthstayatclusters=("duration", "std")
).fillna(0) ).fillna(0)
return stay_at_clusters_features return stay_at_clusters_features
def location_entropy(location_data): def location_entropy(location_data):
location_data = location_data.groupby(["local_segment", "cluster_label"])[["duration_in_seconds"]].sum().reset_index().rename(columns={"duration_in_seconds": "cluster_duration"}) location_data = location_data.groupby(["local_segment", "cluster_label"])[["duration"]].sum().reset_index().rename(columns={"duration": "cluster_duration"})
location_data["all_clusters_duration"] = location_data.groupby(["local_segment"])["cluster_duration"].transform("sum") location_data["all_clusters_duration"] = location_data.groupby(["local_segment"])["cluster_duration"].transform("sum")
location_data["plogp"] = (location_data["cluster_duration"] / location_data["all_clusters_duration"]).apply(lambda x: x * np.log(x)) location_data["plogp"] = (location_data["cluster_duration"] / location_data["all_clusters_duration"]).apply(lambda x: x * np.log(x))
@ -111,18 +142,21 @@ def doryab_features(sensor_data_files, time_segment, provider, filter_data_by_se
return pd.DataFrame(columns=["local_segment"] + features_to_compute) return pd.DataFrame(columns=["local_segment"] + features_to_compute)
location_features = pd.DataFrame() location_features = pd.DataFrame()
location_features["minutesdataused"] = location_data.drop_duplicates(subset=["local_segment", "local_date", "local_hour", "local_minute"])[["local_segment", "local_minute"]].groupby(["local_segment"])["local_minute"].count() # update distance after chunk_episodes() function
location_data["distance"] = location_data["speed"] * (location_data["duration"] / 60) * 1000 # in meters
location_features["minutesdataused"] = location_data[["local_segment", "duration"]].groupby(["local_segment"])["duration"].sum()
# variance features # variance features
location_features["locationvariance"] = location_data.groupby(["local_segment"])["double_latitude"].var() + location_data.groupby(["local_segment"])["double_longitude"].var() location_features = variance_and_logvariance_features(location_data, location_features)
location_features["loglocationvariance"] = np.log10(location_features["locationvariance"]).replace(-np.inf, np.nan)
# distance and speed features # distance and speed features
moving_data = location_data[location_data["is_stationary"] == 0] moving_data = location_data[location_data["is_stationary"] == 0].copy()
location_features = location_features.merge(distance_and_speed_features(moving_data), how="outer", left_index=True, right_index=True) location_features = location_features.merge(distance_and_speed_features(moving_data), how="outer", left_index=True, right_index=True)
location_features[["totaldistance", "avgspeed", "varspeed"]] = location_features[["totaldistance", "avgspeed", "varspeed"]].fillna(0)
# stationary features # stationary features
stationary_data = location_data[location_data["is_stationary"] == 1] stationary_data = location_data[location_data["is_stationary"] == 1].copy()
stationary_data_without_outliers = stationary_data[stationary_data["cluster_label"] != -1] stationary_data_without_outliers = stationary_data[stationary_data["cluster_label"] != -1]
location_features["numberofsignificantplaces"] = stationary_data_without_outliers.groupby(["local_segment"])["cluster_label"].nunique() location_features["numberofsignificantplaces"] = stationary_data_without_outliers.groupby(["local_segment"])["cluster_label"].nunique()
@ -134,19 +168,20 @@ def doryab_features(sensor_data_files, time_segment, provider, filter_data_by_se
location_features = location_features.merge(stay_at_topn_clusters(stationary_data_without_outliers), how="outer", left_index=True, right_index=True) location_features = location_features.merge(stay_at_topn_clusters(stationary_data_without_outliers), how="outer", left_index=True, right_index=True)
# moving to static ratio # moving to static ratio
static_time = stationary_data.groupby(["local_segment"])["duration_in_seconds"].sum() static_time = stationary_data.groupby(["local_segment"])["duration"].sum()
total_time = location_data.groupby(["local_segment"])["duration_in_seconds"].sum() total_time = location_data.groupby(["local_segment"])["duration"].sum()
location_features["movingtostaticratio"] = static_time / total_time location_features["movingtostaticratio"] = static_time / total_time
# outliers time percent # outliers time percent
outliers_time = stationary_data[stationary_data["cluster_label"] == -1].groupby(["local_segment"])["duration_in_seconds"].sum() outliers_time = stationary_data[stationary_data["cluster_label"] == -1].groupby(["local_segment"])["duration"].sum()
location_features["outlierstimepercent"] = outliers_time / static_time location_features["outlierstimepercent"] = (outliers_time / static_time).fillna(0)
# entropy features # entropy features
location_features = location_features.merge(location_entropy(stationary_data_without_outliers), how="outer", left_index=True, right_index=True) location_features = location_features.merge(location_entropy(stationary_data_without_outliers), how="outer", left_index=True, right_index=True)
# time at home # time at home
location_features["timeathome"] = stationary_data[stationary_data["distance_from_home"] <= radius_from_home].groupby(["local_segment"])["duration_in_seconds"].sum() / 60 stationary_data["time_at_home"] = stationary_data.apply(lambda row: row["duration"] if row["distance_from_home"] <= radius_from_home else 0, axis=1)
location_features["timeathome"] = stationary_data[["local_segment", "time_at_home"]].groupby(["local_segment"])["time_at_home"].sum()
# home label # home label
location_features["homelabel"] = stationary_data[["local_segment", "home_label"]].groupby(["local_segment"]).agg(lambda x: pd.Series.mode(x)[0]) location_features["homelabel"] = stationary_data[["local_segment", "home_label"]].groupby(["local_segment"]).agg(lambda x: pd.Series.mode(x)[0])

View File

@ -71,7 +71,7 @@ def chunk_episodes(sensor_episodes):
# Merge episodes # Merge episodes
cols_for_groupby = [col for col in sensor_episodes.columns if col not in ["timestamps_segment", "timestamp", "assigned_segments", "start_datetime", "end_datetime", "start_timestamp", "end_timestamp", "duration", "chunked_start_timestamp", "chunked_end_timestamp"]] cols_for_groupby = [col for col in sensor_episodes.columns if col not in ["timestamps_segment", "timestamp", "assigned_segments", "start_datetime", "end_datetime", "start_timestamp", "end_timestamp", "duration", "chunked_start_timestamp", "chunked_end_timestamp"]]
sensor_episodes_grouped = sensor_episodes.groupby(by=cols_for_groupby, sort=False) sensor_episodes_grouped = sensor_episodes.groupby(by=cols_for_groupby, sort=False, dropna=False)
merged_sensor_episodes = sensor_episodes_grouped[["duration"]].sum() merged_sensor_episodes = sensor_episodes_grouped[["duration"]].sum()
merged_sensor_episodes["start_timestamp"] = sensor_episodes_grouped["chunked_start_timestamp"].first() merged_sensor_episodes["start_timestamp"] = sensor_episodes_grouped["chunked_start_timestamp"].first()

File diff suppressed because one or more lines are too long

View File

@ -1,3 +1,3 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_loglocationvariance","phone_locations_doryab_totaldistance","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_varspeed","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_timeattop2location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeattop1location","phone_locations_doryab_timeathome","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_avgspeed","phone_locations_doryab_locationvariance" "local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_radiusgyration","phone_locations_doryab_loglocationvariance","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_timeathome","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_timeattop3location","phone_locations_doryab_timeattop1location","phone_locations_doryab_varspeed","phone_locations_doryab_totaldistance","phone_locations_doryab_locationentropy","phone_locations_doryab_avgspeed","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_stdlengthstayatclusters"
"beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00","beforeMarchEvent","2020-03-07 16:00:00","2020-03-08 15:00:00",1,3.71826910068082,NA,6,NA,0.346573590279973,0,0.693147180559945,6,6,2,6268829.80206745,6,6,1,0,6,NA,NA,5227.19980200003 "beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00","beforeMarchEvent","2020-03-07 16:00:00","2020-03-08 15:00:00",0.346573590279973,1,5.9999,6268829.80447871,3.69084399356179,2,5.9999,1,5.9999,5.9999,0,0,0,0.693147180559945,0,0,5.9999,4907.31564827534,5.9999,0
"beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00","beforeNovemberEvent","2020-10-31 16:00:00","2020-11-01 13:00:00",1,3.71826910068082,NA,6,NA,0.346573590279973,0,0.693147180559945,6,6,2,6268829.80206745,6,6,1,0,6,NA,NA,5227.19980200003 "beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00","beforeNovemberEvent","2020-10-31 16:00:00","2020-11-01 13:00:00",0.346573590279973,1,5.9999,6268829.80447871,3.69084399356179,2,5.9999,1,5.9999,5.9999,0,0,0,0.693147180559945,0,0,5.9999,4907.31564827534,5.9999,0

1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_normalizedlocationentropy phone_locations_doryab_movingtostaticratio phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_radiusgyration phone_locations_doryab_loglocationvariance phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_timeathome phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_timeattop3location phone_locations_doryab_timeattop1location phone_locations_doryab_varspeed phone_locations_doryab_totaldistance phone_locations_doryab_locationentropy phone_locations_doryab_avgspeed phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_stdlengthstayatclusters
2 beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00 beforeMarchEvent 2020-03-07 16:00:00 2020-03-08 15:00:00 0.346573590279973 1 6 5.9999 6268829.80206745 6268829.80447871 3.71826910068082 3.69084399356179 2 6 5.9999 1 6 5.9999 0 5.9999 6 0 NA 0 NA 0 0.693147180559945 NA 0 NA 0 6 5.9999 5227.19980200003 4907.31564827534 6 5.9999 0
3 beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00 beforeNovemberEvent 2020-10-31 16:00:00 2020-11-01 13:00:00 0.346573590279973 1 6 5.9999 6268829.80206745 6268829.80447871 3.71826910068082 3.69084399356179 2 6 5.9999 1 6 5.9999 0 5.9999 6 0 NA 0 NA 0 0.693147180559945 NA 0 NA 0 6 5.9999 5227.19980200003 4907.31564827534 6 5.9999 0

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,8 +1,8 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop3location","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_timeattop1location","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_loglocationvariance","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_timeathome","phone_locations_doryab_locationentropy","phone_locations_doryab_radiusgyration","phone_locations_doryab_totaldistance","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_timeattop2location","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_avgspeed","phone_locations_doryab_varspeed","phone_locations_doryab_minlengthstayatclusters" "local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_totaldistance","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_varspeed","phone_locations_doryab_locationentropy","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeattop1location","phone_locations_doryab_timeattop3location","phone_locations_doryab_loglocationvariance"
"thirtyminutes0017#2020-03-08 08:30:00,2020-03-08 08:59:59","thirtyminutes0017","2020-03-08 08:30:00","2020-03-08 08:59:59",1.58333332533827e-12,0,6,0,0,6,-11.8004276472878,1,NA,0,0,NA,NA,6,0,1,0,NA,NA,6 "thirtyminutes0017#2020-03-08 08:30:00,2020-03-08 08:59:59","thirtyminutes0017","2020-03-08 08:30:00","2020-03-08 08:59:59",5.9999,0,0,0,0,0,1,5.9999,0,0,0,0,8.91364939388538e-13,0,1,5.9999,0,0,5.9999,-12.0499444522888
"thirtyminutes0017#2020-11-01 08:30:00,2020-11-01 08:59:59","thirtyminutes0017","2020-11-01 08:30:00","2020-11-01 08:59:59",1.58333332533827e-12,0,6,0,0,6,-11.8004276472878,1,NA,0,0,NA,NA,6,0,1,0,NA,NA,6 "thirtyminutes0017#2020-11-01 08:30:00,2020-11-01 08:59:59","thirtyminutes0017","2020-11-01 08:30:00","2020-11-01 08:59:59",5.9999,0,0,0,0,0,1,5.9999,0,0,0,0,8.91364939388538e-13,0,1,5.9999,0,0,5.9999,-12.0499444522888
"thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59","thirtyminutes0021","2020-03-07 10:30:00","2020-03-07 10:59:59",4.99999997475243e-13,3,3,0,0,3,-12.301029997857,1,NA,0,0,NA,NA,0,0,1,0,NA,NA,3 "thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59","thirtyminutes0021","2020-03-07 10:30:00","2020-03-07 10:59:59",1.99996666666667,0,0,0,0,0,1,1.99996666666667,0,0,0,0,2.52100874371982e-13,0,1,1.99996666666667,0,1.99996666666667,0,-12.5984256480393
"thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59","thirtyminutes0021","2020-10-31 10:30:00","2020-10-31 10:59:59",4.99999997475243e-13,3,3,0,0,3,-12.301029997857,1,NA,0,0,NA,NA,0,0,1,0,NA,NA,3 "thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59","thirtyminutes0021","2020-10-31 10:30:00","2020-10-31 10:59:59",1.99996666666667,0,0,0,0,0,1,1.99996666666667,0,0,0,0,2.52100874371982e-13,0,1,1.99996666666667,0,1.99996666666667,0,-12.5984256480393
"thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59","thirtyminutes0022","2020-03-07 11:00:00","2020-03-07 11:29:59",9.99999994950485e-13,3,3,0,0,3,-12.000000002193,1,NA,0,0,NA,NA,0,0,1,0,NA,NA,3 "thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59","thirtyminutes0022","2020-03-07 11:00:00","2020-03-07 11:29:59",3.99993333333333,0,0,0,0,0,1,3.99993333333333,0,0,0,0,6.90376613695811e-13,0,1,3.99993333333333,0,3.99993333333333,0,-12.1609139286412
"thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59","thirtyminutes0022","2020-10-31 11:00:00","2020-10-31 11:29:59",9.99999994950485e-13,3,3,0,0,3,-12.000000002193,1,NA,0,0,NA,NA,0,0,1,0,NA,NA,3 "thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59","thirtyminutes0022","2020-10-31 11:00:00","2020-10-31 11:29:59",3.99993333333333,0,0,0,0,0,1,3.99993333333333,0,0,0,0,6.90376613695811e-13,0,1,3.99993333333333,0,3.99993333333333,0,-12.1609139286412
"thirtyminutes0047#2020-03-07 23:30:00,2020-03-07 23:59:59","thirtyminutes0047","2020-03-07 23:30:00","2020-03-07 23:59:59",3.8799999999484e-11,0,6,0,6,6,-10.4111682744116,1,6,0,0,NA,NA,0,0,1,0,NA,NA,6 "thirtyminutes0047#2020-03-07 23:30:00,2020-03-07 23:59:59","thirtyminutes0047","2020-03-07 23:30:00","2020-03-07 23:59:59",5.9999,0,0,0,0,0,1,5.9999,5.9999,0,0,0,3.24233998346206e-11,5.9999,1,5.9999,0,0,0,-10.4891414480933

1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_totaldistance phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy phone_locations_doryab_numberlocationtransitions phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_timeathome phone_locations_doryab_varspeed phone_locations_doryab_locationentropy phone_locations_doryab_outlierstimepercent phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_movingtostaticratio phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_radiusgyration phone_locations_doryab_timeattop1location phone_locations_doryab_timeattop3location phone_locations_doryab_loglocationvariance
2 thirtyminutes0017#2020-03-08 08:30:00,2020-03-08 08:59:59 thirtyminutes0017 2020-03-08 08:30:00 2020-03-08 08:59:59 6 5.9999 0 NA 0 NA 0 0 0 1 6 5.9999 NA 0 NA 0 0 NA 0 1.58333332533827e-12 8.91364939388538e-13 6 0 1 6 5.9999 0 0 0 5.9999 -11.8004276472878 -12.0499444522888
3 thirtyminutes0017#2020-11-01 08:30:00,2020-11-01 08:59:59 thirtyminutes0017 2020-11-01 08:30:00 2020-11-01 08:59:59 6 5.9999 0 NA 0 NA 0 0 0 1 6 5.9999 NA 0 NA 0 0 NA 0 1.58333332533827e-12 8.91364939388538e-13 6 0 1 6 5.9999 0 0 0 5.9999 -11.8004276472878 -12.0499444522888
4 thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59 thirtyminutes0021 2020-03-07 10:30:00 2020-03-07 10:59:59 3 1.99996666666667 0 NA 0 NA 0 0 0 1 3 1.99996666666667 NA 0 NA 0 0 NA 0 4.99999997475243e-13 2.52100874371982e-13 0 1 3 1.99996666666667 0 0 1.99996666666667 3 0 -12.301029997857 -12.5984256480393
5 thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59 thirtyminutes0021 2020-10-31 10:30:00 2020-10-31 10:59:59 3 1.99996666666667 0 NA 0 NA 0 0 0 1 3 1.99996666666667 NA 0 NA 0 0 NA 0 4.99999997475243e-13 2.52100874371982e-13 0 1 3 1.99996666666667 0 0 1.99996666666667 3 0 -12.301029997857 -12.5984256480393
6 thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59 thirtyminutes0022 2020-03-07 11:00:00 2020-03-07 11:29:59 3 3.99993333333333 0 NA 0 NA 0 0 0 1 3 3.99993333333333 NA 0 NA 0 0 NA 0 9.99999994950485e-13 6.90376613695811e-13 0 1 3 3.99993333333333 0 0 3.99993333333333 3 0 -12.000000002193 -12.1609139286412
7 thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59 thirtyminutes0022 2020-10-31 11:00:00 2020-10-31 11:29:59 3 3.99993333333333 0 NA 0 NA 0 0 0 1 3 3.99993333333333 NA 0 NA 0 0 NA 0 9.99999994950485e-13 6.90376613695811e-13 0 1 3 3.99993333333333 0 0 3.99993333333333 3 0 -12.000000002193 -12.1609139286412
8 thirtyminutes0047#2020-03-07 23:30:00,2020-03-07 23:59:59 thirtyminutes0047 2020-03-07 23:30:00 2020-03-07 23:59:59 6 5.9999 0 NA 0 NA 0 0 0 1 6 5.9999 6 5.9999 NA 0 0 NA 0 3.8799999999484e-11 3.24233998346206e-11 0 5.9999 1 6 5.9999 0 6 0 0 -10.4111682744116 -10.4891414480933

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,16 +1,16 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_timeattop1location","phone_locations_doryab_totaldistance","phone_locations_doryab_timeattop3location","phone_locations_doryab_avgspeed","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_radiusgyration","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_locationvariance" "local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_loglocationvariance","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_varspeed","phone_locations_doryab_avgspeed","phone_locations_doryab_totaldistance","phone_locations_doryab_locationentropy","phone_locations_doryab_timeattop3location","phone_locations_doryab_timeattop2location","phone_locations_doryab_radiusgyration","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_locationvariance","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_timeathome"
"daily#2020-03-07 00:00:00,2020-03-07 23:59:59","daily","2020-03-07 00:00:00","2020-03-07 23:59:59",6,0.693147180559945,NA,6,NA,6,NA,1,6416036.08057409,1,0,NA,3.73564149377632,0,6,2,6,6,0.346573590279973,5440.53356226669 "daily#2020-03-07 00:00:00,2020-03-07 23:59:59","daily","2020-03-07 00:00:00","2020-03-07 23:59:59",5.9999,2,3.70821638665005,1,5.9999,0.346573590279973,0,0,5.9999,0,0,0,0.693147180559945,0,5.9999,6416036.08292984,1,5107.5942179249,5.9999,5.9999
"daily#2020-03-08 00:00:00,2020-03-08 23:59:59","daily","2020-03-08 00:00:00","2020-03-08 23:59:59",6,0,NA,0,NA,0,NA,0,0,1,6,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12 "daily#2020-03-08 00:00:00,2020-03-08 23:59:59","daily","2020-03-08 00:00:00","2020-03-08 23:59:59",0,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,5.9999,0,0,1,8.91364939388538e-13,5.9999,0
"daily#2020-10-31 00:00:00,2020-10-31 23:59:59","daily","2020-10-31 00:00:00","2020-10-31 23:59:59",6,0,NA,0,NA,6,NA,0,0,1,0,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12 "daily#2020-10-31 00:00:00,2020-10-31 23:59:59","daily","2020-10-31 00:00:00","2020-10-31 23:59:59",5.9999,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,0,0,0,1,8.91364939388538e-13,5.9999,0
"daily#2020-11-01 00:00:00,2020-11-01 23:59:59","daily","2020-11-01 00:00:00","2020-11-01 23:59:59",6,0.693147180559945,NA,6,NA,0,NA,1,6268829.80206745,1,6,NA,3.71826910068082,0,6,2,6,6,0.346573590279973,5227.19980200003 "daily#2020-11-01 00:00:00,2020-11-01 23:59:59","daily","2020-11-01 00:00:00","2020-11-01 23:59:59",0,2,3.69084399356179,1,5.9999,0.346573590279973,0,0,5.9999,0,0,0,0.693147180559945,5.9999,5.9999,6268829.80447871,1,4907.31564827534,5.9999,5.9999
"morning#2020-03-07 06:00:00,2020-03-07 11:59:59","morning","2020-03-07 06:00:00","2020-03-07 11:59:59",6,0,NA,0,NA,6,NA,0,0,1,0,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12 "morning#2020-03-07 06:00:00,2020-03-07 11:59:59","morning","2020-03-07 06:00:00","2020-03-07 11:59:59",5.9999,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,0,0,0,1,8.91364939388538e-13,5.9999,0
"morning#2020-03-08 06:00:00,2020-03-08 11:59:59","morning","2020-03-08 06:00:00","2020-03-08 11:59:59",6,0,NA,0,NA,0,NA,0,0,1,6,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12 "morning#2020-03-08 06:00:00,2020-03-08 11:59:59","morning","2020-03-08 06:00:00","2020-03-08 11:59:59",0,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,5.9999,0,0,1,8.91364939388538e-13,5.9999,0
"morning#2020-10-31 06:00:00,2020-10-31 11:59:59","morning","2020-10-31 06:00:00","2020-10-31 11:59:59",6,0,NA,0,NA,6,NA,0,0,1,0,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12 "morning#2020-10-31 06:00:00,2020-10-31 11:59:59","morning","2020-10-31 06:00:00","2020-10-31 11:59:59",5.9999,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,0,0,0,1,8.91364939388538e-13,5.9999,0
"morning#2020-11-01 06:00:00,2020-11-01 11:59:59","morning","2020-11-01 06:00:00","2020-11-01 11:59:59",6,0,NA,0,NA,0,NA,0,0,1,6,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12 "morning#2020-11-01 06:00:00,2020-11-01 11:59:59","morning","2020-11-01 06:00:00","2020-11-01 11:59:59",0,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,5.9999,0,0,1,8.91364939388538e-13,5.9999,0
"threeday#2020-03-07 00:00:00,2020-03-09 23:59:59","threeday","2020-03-07 00:00:00","2020-03-09 23:59:59",6,1.09861228866811,NA,6,NA,6,NA,2,8014514.68387131,1,6,NA,4.09019524373105,0,6,3,6,6,0.366204096222703,12308.2198130989 "threeday#2020-03-07 00:00:00,2020-03-09 23:59:59","threeday","2020-03-07 00:00:00","2020-03-09 23:59:59",5.9999,3,4.1253555318677,2,5.9999,0.366204096222703,0,0,5.9999,0,0,0,1.09861228866811,5.9999,5.9999,8014514.68257681,1,13346.1355628909,5.9999,5.9999
"threeday#2020-03-08 00:00:00,2020-03-10 23:59:59","threeday","2020-03-08 00:00:00","2020-03-10 23:59:59",6,0,NA,0,NA,0,NA,0,0,1,6,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12 "threeday#2020-03-08 00:00:00,2020-03-10 23:59:59","threeday","2020-03-08 00:00:00","2020-03-10 23:59:59",0,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,5.9999,0,0,1,8.91364939388538e-13,5.9999,0
"threeday#2020-10-29 00:00:00,2020-10-31 23:59:59","threeday","2020-10-29 00:00:00","2020-10-31 23:59:59",6,0,NA,0,NA,6,NA,0,0,1,0,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12 "threeday#2020-10-29 00:00:00,2020-10-31 23:59:59","threeday","2020-10-29 00:00:00","2020-10-31 23:59:59",5.9999,1,-12.0499444522888,0,5.9999,0,0,0,5.9999,0,0,0,0,0,0,0,1,8.91364939388538e-13,5.9999,0
"threeday#2020-10-30 00:00:00,2020-11-01 23:59:59","threeday","2020-10-30 00:00:00","2020-11-01 23:59:59",6,1.09861228866811,NA,6,NA,6,NA,2,8014514.68387131,1,6,NA,4.09019524373105,0,6,3,6,6,0.366204096222703,12308.2198130989 "threeday#2020-10-30 00:00:00,2020-11-01 23:59:59","threeday","2020-10-30 00:00:00","2020-11-01 23:59:59",5.9999,3,4.1253555318677,2,5.9999,0.366204096222703,0,0,5.9999,0,0,0,1.09861228866811,5.9999,5.9999,8014514.68257681,1,13346.1355628909,5.9999,5.9999
"threeday#2020-10-31 00:00:00,2020-11-02 23:59:59","threeday","2020-10-31 00:00:00","2020-11-02 23:59:59",6,1.09861228866811,NA,6,NA,6,NA,2,8014514.68387131,1,6,NA,4.09019524373105,0,6,3,6,6,0.366204096222703,12308.2198130989 "threeday#2020-10-31 00:00:00,2020-11-02 23:59:59","threeday","2020-10-31 00:00:00","2020-11-02 23:59:59",5.9999,3,4.1253555318677,2,5.9999,0.366204096222703,0,0,5.9999,0,0,0,1.09861228866811,5.9999,5.9999,8014514.68257681,1,13346.1355628909,5.9999,5.9999
"threeday#2020-11-01 00:00:00,2020-11-03 23:59:59","threeday","2020-11-01 00:00:00","2020-11-03 23:59:59",6,0.693147180559945,NA,6,NA,0,NA,1,6268829.80206745,1,6,NA,3.71826910068082,0,6,2,6,6,0.346573590279973,5227.19980200003 "threeday#2020-11-01 00:00:00,2020-11-03 23:59:59","threeday","2020-11-01 00:00:00","2020-11-03 23:59:59",0,2,3.69084399356179,1,5.9999,0.346573590279973,0,0,5.9999,0,0,0,0.693147180559945,5.9999,5.9999,6268829.80447871,1,4907.31564827534,5.9999,5.9999
"weekend#2020-10-30 00:00:00,2020-11-01 23:59:59","weekend","2020-10-30 00:00:00","2020-11-01 23:59:59",6,1.09861228866811,NA,6,NA,6,NA,2,8014514.68387131,1,6,NA,4.09019524373105,0,6,3,6,6,0.366204096222703,12308.2198130989 "weekend#2020-10-30 00:00:00,2020-11-01 23:59:59","weekend","2020-10-30 00:00:00","2020-11-01 23:59:59",5.9999,3,4.1253555318677,2,5.9999,0.366204096222703,0,0,5.9999,0,0,0,1.09861228866811,5.9999,5.9999,8014514.68257681,1,13346.1355628909,5.9999,5.9999

1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_timeattop1location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_loglocationvariance phone_locations_doryab_numberlocationtransitions phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_normalizedlocationentropy phone_locations_doryab_outlierstimepercent phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_varspeed phone_locations_doryab_avgspeed phone_locations_doryab_totaldistance phone_locations_doryab_locationentropy phone_locations_doryab_timeattop3location phone_locations_doryab_timeattop2location phone_locations_doryab_radiusgyration phone_locations_doryab_movingtostaticratio phone_locations_doryab_locationvariance phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_timeathome
2 daily#2020-03-07 00:00:00,2020-03-07 23:59:59 daily 2020-03-07 00:00:00 2020-03-07 23:59:59 6 5.9999 2 3.73564149377632 3.70821638665005 1 6 5.9999 0.346573590279973 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 0.693147180559945 6 0 0 5.9999 6416036.08057409 6416036.08292984 1 5440.53356226669 5107.5942179249 6 5.9999 6 5.9999
3 daily#2020-03-08 00:00:00,2020-03-08 23:59:59 daily 2020-03-08 00:00:00 2020-03-08 23:59:59 0 1 -11.8004276472878 -12.0499444522888 0 6 5.9999 0 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 0 0 5.9999 6 0 0 1 1.58333332533827e-12 8.91364939388538e-13 6 5.9999 NA 0
4 daily#2020-10-31 00:00:00,2020-10-31 23:59:59 daily 2020-10-31 00:00:00 2020-10-31 23:59:59 0 5.9999 1 -11.8004276472878 -12.0499444522888 0 6 5.9999 0 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 0 6 0 0 0 1 1.58333332533827e-12 8.91364939388538e-13 6 5.9999 NA 0
5 daily#2020-11-01 00:00:00,2020-11-01 23:59:59 daily 2020-11-01 00:00:00 2020-11-01 23:59:59 6 0 2 3.71826910068082 3.69084399356179 1 6 5.9999 0.346573590279973 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 0.693147180559945 0 5.9999 6 5.9999 6268829.80206745 6268829.80447871 1 5227.19980200003 4907.31564827534 6 5.9999 6 5.9999
6 morning#2020-03-07 06:00:00,2020-03-07 11:59:59 morning 2020-03-07 06:00:00 2020-03-07 11:59:59 0 5.9999 1 -11.8004276472878 -12.0499444522888 0 6 5.9999 0 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 0 6 0 0 0 1 1.58333332533827e-12 8.91364939388538e-13 6 5.9999 NA 0
7 morning#2020-03-08 06:00:00,2020-03-08 11:59:59 morning 2020-03-08 06:00:00 2020-03-08 11:59:59 0 1 -11.8004276472878 -12.0499444522888 0 6 5.9999 0 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 0 0 5.9999 6 0 0 1 1.58333332533827e-12 8.91364939388538e-13 6 5.9999 NA 0
8 morning#2020-10-31 06:00:00,2020-10-31 11:59:59 morning 2020-10-31 06:00:00 2020-10-31 11:59:59 0 5.9999 1 -11.8004276472878 -12.0499444522888 0 6 5.9999 0 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 0 6 0 0 0 1 1.58333332533827e-12 8.91364939388538e-13 6 5.9999 NA 0
9 morning#2020-11-01 06:00:00,2020-11-01 11:59:59 morning 2020-11-01 06:00:00 2020-11-01 11:59:59 0 1 -11.8004276472878 -12.0499444522888 0 6 5.9999 0 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 0 0 5.9999 6 0 0 1 1.58333332533827e-12 8.91364939388538e-13 6 5.9999 NA 0
10 threeday#2020-03-07 00:00:00,2020-03-09 23:59:59 threeday 2020-03-07 00:00:00 2020-03-09 23:59:59 6 5.9999 3 4.09019524373105 4.1253555318677 2 6 5.9999 0.366204096222703 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 1.09861228866811 6 5.9999 6 5.9999 8014514.68387131 8014514.68257681 1 12308.2198130989 13346.1355628909 6 5.9999 6 5.9999
11 threeday#2020-03-08 00:00:00,2020-03-10 23:59:59 threeday 2020-03-08 00:00:00 2020-03-10 23:59:59 0 1 -11.8004276472878 -12.0499444522888 0 6 5.9999 0 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 0 0 5.9999 6 0 0 1 1.58333332533827e-12 8.91364939388538e-13 6 5.9999 NA 0
12 threeday#2020-10-29 00:00:00,2020-10-31 23:59:59 threeday 2020-10-29 00:00:00 2020-10-31 23:59:59 0 5.9999 1 -11.8004276472878 -12.0499444522888 0 6 5.9999 0 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 0 6 0 0 0 1 1.58333332533827e-12 8.91364939388538e-13 6 5.9999 NA 0
13 threeday#2020-10-30 00:00:00,2020-11-01 23:59:59 threeday 2020-10-30 00:00:00 2020-11-01 23:59:59 6 5.9999 3 4.09019524373105 4.1253555318677 2 6 5.9999 0.366204096222703 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 1.09861228866811 6 5.9999 6 5.9999 8014514.68387131 8014514.68257681 1 12308.2198130989 13346.1355628909 6 5.9999 6 5.9999
14 threeday#2020-10-31 00:00:00,2020-11-02 23:59:59 threeday 2020-10-31 00:00:00 2020-11-02 23:59:59 6 5.9999 3 4.09019524373105 4.1253555318677 2 6 5.9999 0.366204096222703 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 1.09861228866811 6 5.9999 6 5.9999 8014514.68387131 8014514.68257681 1 12308.2198130989 13346.1355628909 6 5.9999 6 5.9999
15 threeday#2020-11-01 00:00:00,2020-11-03 23:59:59 threeday 2020-11-01 00:00:00 2020-11-03 23:59:59 6 0 2 3.71826910068082 3.69084399356179 1 6 5.9999 0.346573590279973 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 0.693147180559945 0 5.9999 6 5.9999 6268829.80206745 6268829.80447871 1 5227.19980200003 4907.31564827534 6 5.9999 6 5.9999
16 weekend#2020-10-30 00:00:00,2020-11-01 23:59:59 weekend 2020-10-30 00:00:00 2020-11-01 23:59:59 6 5.9999 3 4.09019524373105 4.1253555318677 2 6 5.9999 0.366204096222703 NA 0 0 6 5.9999 NA 0 NA 0 NA 0 1.09861228866811 6 5.9999 6 5.9999 8014514.68387131 8014514.68257681 1 12308.2198130989 13346.1355628909 6 5.9999 6 5.9999

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,3 +1,3 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_loglocationvariance","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_locationvariance","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop2location","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_timeattop3location","phone_locations_doryab_locationentropy","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeattop1location","phone_locations_doryab_totaldistance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_timeathome" "local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_timeattop1location","phone_locations_doryab_loglocationvariance","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_totaldistance","phone_locations_doryab_timeattop3location","phone_locations_doryab_timeathome","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_locationentropy","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_timeattop2location","phone_locations_doryab_radiusgyration","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_locationvariance","phone_locations_doryab_varspeed","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_numberofsignificantplaces"
"beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00","beforeMarchEvent","2020-03-07 16:00:00","2020-03-08 15:00:00",3.71826910068082,2,1,6,NA,NA,6,5227.19980200003,NA,6,0.346573590279973,1,0,0.693147180559945,6,6268829.80206745,6,NA,0,6 "beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00","beforeMarchEvent","2020-03-07 16:00:00","2020-03-08 15:00:00",0,3.69084399356179,5.9999,5.9999,0,5.9999,5.9999,0,0.346573590279973,1,0.693147180559945,5.9999,5.9999,6268829.80447871,0,4907.31564827534,0,0,1,2
"beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00","beforeNovemberEvent","2020-10-31 16:00:00","2020-11-01 13:00:00",3.71826910068082,2,1,6,NA,NA,6,5227.19980200003,NA,6,0.346573590279973,1,0,0.693147180559945,6,6268829.80206745,6,NA,0,6 "beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00","beforeNovemberEvent","2020-10-31 16:00:00","2020-11-01 13:00:00",0,3.69084399356179,5.9999,5.9999,0,5.9999,5.9999,0,0.346573590279973,1,0.693147180559945,5.9999,5.9999,6268829.80447871,0,4907.31564827534,0,0,1,2

1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_timeattop1location phone_locations_doryab_loglocationvariance phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_totaldistance phone_locations_doryab_timeattop3location phone_locations_doryab_timeathome phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy phone_locations_doryab_numberlocationtransitions phone_locations_doryab_locationentropy phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_timeattop2location phone_locations_doryab_radiusgyration phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_locationvariance phone_locations_doryab_varspeed phone_locations_doryab_outlierstimepercent phone_locations_doryab_movingtostaticratio phone_locations_doryab_numberofsignificantplaces
2 beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00 beforeMarchEvent 2020-03-07 16:00:00 2020-03-08 15:00:00 6 0 3.71826910068082 3.69084399356179 6 5.9999 6 5.9999 NA 0 0 5.9999 6 5.9999 NA 0 0.346573590279973 1 0.693147180559945 6 5.9999 6 5.9999 6268829.80206745 6268829.80447871 0 5227.19980200003 4907.31564827534 NA 0 NA 0 1 2
3 beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00 beforeNovemberEvent 2020-10-31 16:00:00 2020-11-01 13:00:00 6 0 3.71826910068082 3.69084399356179 6 5.9999 6 5.9999 NA 0 0 5.9999 6 5.9999 NA 0 0.346573590279973 1 0.693147180559945 6 5.9999 6 5.9999 6268829.80206745 6268829.80447871 0 5227.19980200003 4907.31564827534 NA 0 NA 0 1 2

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,9 +1,9 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_timeattop1location","phone_locations_doryab_varspeed","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_locationvariance","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_loglocationvariance","phone_locations_doryab_totaldistance","phone_locations_doryab_radiusgyration","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_timeattop2location","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_timeattop3location" "local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_locationentropy","phone_locations_doryab_timeattop1location","phone_locations_doryab_locationvariance","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_avgspeed","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop2location","phone_locations_doryab_timeattop3location","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_normalizedlocationentropy"
"thirtyminutes0007#2020-03-08 03:30:00,2020-03-08 03:59:59","thirtyminutes0007","2020-03-08 03:30:00","2020-03-08 03:59:59",6,NA,6,0,3.8799999999484e-11,NA,6,6,1,1,-10.4111682744116,NA,0,0,0,0,0,6,NA,0 "thirtyminutes0007#2020-03-08 03:30:00,2020-03-08 03:59:59","thirtyminutes0007","2020-03-08 03:30:00","2020-03-08 03:59:59",0,5.9999,0,-10.4891414480933,1,0,5.9999,0,5.9999,0,0,3.24233998346206e-11,1,0,5.9999,0,5.9999,0,0,0
"thirtyminutes0007#2020-11-01 03:30:00,2020-11-01 03:59:59","thirtyminutes0007","2020-11-01 03:30:00","2020-11-01 03:59:59",6,NA,6,0,3.8799999999484e-11,NA,6,6,1,1,-10.4111682744116,NA,0,0,0,0,0,6,NA,0 "thirtyminutes0007#2020-11-01 03:30:00,2020-11-01 03:59:59","thirtyminutes0007","2020-11-01 03:30:00","2020-11-01 03:59:59",0,5.9999,0,-10.4891414480933,1,0,5.9999,0,5.9999,0,0,3.24233998346206e-11,1,0,5.9999,0,5.9999,0,0,0
"thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59","thirtyminutes0021","2020-03-07 10:30:00","2020-03-07 10:59:59",0,NA,3,0,4.99999997475243e-13,NA,3,NA,1,1,-12.301029997857,NA,0,0,0,0,0,3,NA,3 "thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59","thirtyminutes0021","2020-03-07 10:30:00","2020-03-07 10:59:59",0,1.99996666666667,0,-12.5984256480393,1,0,1.99996666666667,0,0,0,1.99996666666667,2.52100874371982e-13,1,0,1.99996666666667,0,0,0,0,0
"thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59","thirtyminutes0021","2020-10-31 10:30:00","2020-10-31 10:59:59",0,NA,3,0,4.99999997475243e-13,NA,3,NA,1,1,-12.301029997857,NA,0,0,0,0,0,3,NA,3 "thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59","thirtyminutes0021","2020-10-31 10:30:00","2020-10-31 10:59:59",0,1.99996666666667,0,-12.5984256480393,1,0,1.99996666666667,0,0,0,1.99996666666667,2.52100874371982e-13,1,0,1.99996666666667,0,0,0,0,0
"thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59","thirtyminutes0022","2020-03-07 11:00:00","2020-03-07 11:29:59",0,NA,3,0,9.99999994950485e-13,NA,3,NA,1,1,-12.000000002193,NA,0,0,0,0,0,3,NA,3 "thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59","thirtyminutes0022","2020-03-07 11:00:00","2020-03-07 11:29:59",0,3.99993333333333,0,-12.1609139286412,1,0,3.99993333333333,0,0,0,3.99993333333333,6.90376613695811e-13,1,0,3.99993333333333,0,0,0,0,0
"thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59","thirtyminutes0022","2020-10-31 11:00:00","2020-10-31 11:29:59",0,NA,3,0,9.99999994950485e-13,NA,3,NA,1,1,-12.000000002193,NA,0,0,0,0,0,3,NA,3 "thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59","thirtyminutes0022","2020-10-31 11:00:00","2020-10-31 11:29:59",0,3.99993333333333,0,-12.1609139286412,1,0,3.99993333333333,0,0,0,3.99993333333333,6.90376613695811e-13,1,0,3.99993333333333,0,0,0,0,0
"thirtyminutes0023#2020-03-08 11:30:00,2020-03-08 11:59:59","thirtyminutes0023","2020-03-08 11:30:00","2020-03-08 11:59:59",0,NA,6,0,1.58333332533827e-12,NA,6,NA,1,1,-11.8004276472878,NA,0,0,0,0,6,6,NA,0 "thirtyminutes0023#2020-03-08 11:30:00,2020-03-08 11:59:59","thirtyminutes0023","2020-03-08 11:30:00","2020-03-08 11:59:59",0,5.9999,0,-12.0499444522888,1,0,5.9999,0,0,0,0,8.91364939388538e-13,1,0,5.9999,0,0,5.9999,0,0
"thirtyminutes0023#2020-11-01 11:30:00,2020-11-01 11:59:59","thirtyminutes0023","2020-11-01 11:30:00","2020-11-01 11:59:59",0,NA,6,0,1.58333332533827e-12,NA,6,NA,1,1,-11.8004276472878,NA,0,0,0,0,6,6,NA,0 "thirtyminutes0023#2020-11-01 11:30:00,2020-11-01 11:59:59","thirtyminutes0023","2020-11-01 11:30:00","2020-11-01 11:59:59",0,5.9999,0,-12.0499444522888,1,0,5.9999,0,0,0,0,8.91364939388538e-13,1,0,5.9999,0,0,5.9999,0,0

1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_outlierstimepercent phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_numberlocationtransitions phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_locationentropy phone_locations_doryab_timeattop1location phone_locations_doryab_locationvariance phone_locations_doryab_movingtostaticratio phone_locations_doryab_avgspeed phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_varspeed phone_locations_doryab_timeattop2location phone_locations_doryab_timeattop3location phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_normalizedlocationentropy
2 thirtyminutes0007#2020-03-08 03:30:00,2020-03-08 03:59:59 thirtyminutes0007 2020-03-08 03:30:00 2020-03-08 03:59:59 NA 0 6 5.9999 NA 0 -10.4111682744116 -10.4891414480933 1 0 6 5.9999 0 6 5.9999 0 6 0 3.8799999999484e-11 3.24233998346206e-11 1 NA 0 6 5.9999 NA 0 0 5.9999 0 0 0
3 thirtyminutes0007#2020-11-01 03:30:00,2020-11-01 03:59:59 thirtyminutes0007 2020-11-01 03:30:00 2020-11-01 03:59:59 NA 0 6 5.9999 NA 0 -10.4111682744116 -10.4891414480933 1 0 6 5.9999 0 6 5.9999 0 6 0 3.8799999999484e-11 3.24233998346206e-11 1 NA 0 6 5.9999 NA 0 0 5.9999 0 0 0
4 thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59 thirtyminutes0021 2020-03-07 10:30:00 2020-03-07 10:59:59 NA 0 3 1.99996666666667 NA 0 -12.301029997857 -12.5984256480393 1 0 3 1.99996666666667 0 NA 0 0 0 1.99996666666667 4.99999997475243e-13 2.52100874371982e-13 1 NA 0 3 1.99996666666667 NA 0 0 3 0 0 0
5 thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59 thirtyminutes0021 2020-10-31 10:30:00 2020-10-31 10:59:59 NA 0 3 1.99996666666667 NA 0 -12.301029997857 -12.5984256480393 1 0 3 1.99996666666667 0 NA 0 0 0 1.99996666666667 4.99999997475243e-13 2.52100874371982e-13 1 NA 0 3 1.99996666666667 NA 0 0 3 0 0 0
6 thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59 thirtyminutes0022 2020-03-07 11:00:00 2020-03-07 11:29:59 NA 0 3 3.99993333333333 NA 0 -12.000000002193 -12.1609139286412 1 0 3 3.99993333333333 0 NA 0 0 0 3.99993333333333 9.99999994950485e-13 6.90376613695811e-13 1 NA 0 3 3.99993333333333 NA 0 0 3 0 0 0
7 thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59 thirtyminutes0022 2020-10-31 11:00:00 2020-10-31 11:29:59 NA 0 3 3.99993333333333 NA 0 -12.000000002193 -12.1609139286412 1 0 3 3.99993333333333 0 NA 0 0 0 3.99993333333333 9.99999994950485e-13 6.90376613695811e-13 1 NA 0 3 3.99993333333333 NA 0 0 3 0 0 0
8 thirtyminutes0023#2020-03-08 11:30:00,2020-03-08 11:59:59 thirtyminutes0023 2020-03-08 11:30:00 2020-03-08 11:59:59 NA 0 6 5.9999 NA 0 -11.8004276472878 -12.0499444522888 1 0 6 5.9999 0 NA 0 0 0 1.58333332533827e-12 8.91364939388538e-13 1 NA 0 6 5.9999 NA 0 6 0 0 5.9999 0 0
9 thirtyminutes0023#2020-11-01 11:30:00,2020-11-01 11:59:59 thirtyminutes0023 2020-11-01 11:30:00 2020-11-01 11:59:59 NA 0 6 5.9999 NA 0 -11.8004276472878 -12.0499444522888 1 0 6 5.9999 0 NA 0 0 0 1.58333332533827e-12 8.91364939388538e-13 1 NA 0 6 5.9999 NA 0 6 0 0 5.9999 0 0

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,16 +1,16 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_radiusgyration","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_locationvariance","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_timeattop2location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_totaldistance","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop3location","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_locationentropy","phone_locations_doryab_loglocationvariance","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_timeattop1location","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_avgspeed" "local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_radiusgyration","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_locationvariance","phone_locations_doryab_avgspeed","phone_locations_doryab_locationentropy","phone_locations_doryab_timeattop2location","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_timeattop3location","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop1location","phone_locations_doryab_loglocationvariance","phone_locations_doryab_varspeed"
"daily#2020-03-07 00:00:00,2020-03-07 23:59:59","daily","2020-03-07 00:00:00","2020-03-07 23:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA "daily#2020-03-07 00:00:00,2020-03-07 23:59:59","daily","2020-03-07 00:00:00","2020-03-07 23:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,0,0,5.9999,0,0,1,5.9999,-12.0499444522888,0
"daily#2020-03-08 00:00:00,2020-03-08 23:59:59","daily","2020-03-08 00:00:00","2020-03-08 23:59:59",0,6,6268829.80206745,NA,5227.19980200003,0.346573590279973,6,1,NA,NA,0,6,1,2,0.693147180559945,3.71826910068082,6,6,6,NA "daily#2020-03-08 00:00:00,2020-03-08 23:59:59","daily","2020-03-08 00:00:00","2020-03-08 23:59:59",5.9999,1,0,6268829.80447871,5.9999,4907.31564827534,0,0.693147180559945,5.9999,0.346573590279973,2,5.9999,0,5.9999,5.9999,0,1,0,3.69084399356179,0
"daily#2020-10-31 00:00:00,2020-10-31 23:59:59","daily","2020-10-31 00:00:00","2020-10-31 23:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA "daily#2020-10-31 00:00:00,2020-10-31 23:59:59","daily","2020-10-31 00:00:00","2020-10-31 23:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,0,0,5.9999,0,0,1,5.9999,-12.0499444522888,0
"daily#2020-11-01 00:00:00,2020-11-01 23:59:59","daily","2020-11-01 00:00:00","2020-11-01 23:59:59",0,6,6268829.80206745,NA,5227.19980200003,0.346573590279973,6,1,NA,NA,0,6,1,2,0.693147180559945,3.71826910068082,6,6,6,NA "daily#2020-11-01 00:00:00,2020-11-01 23:59:59","daily","2020-11-01 00:00:00","2020-11-01 23:59:59",5.9999,1,0,6268829.80447871,5.9999,4907.31564827534,0,0.693147180559945,5.9999,0.346573590279973,2,5.9999,0,5.9999,5.9999,0,1,0,3.69084399356179,0
"morning#2020-03-07 06:00:00,2020-03-07 11:59:59","morning","2020-03-07 06:00:00","2020-03-07 11:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA "morning#2020-03-07 06:00:00,2020-03-07 11:59:59","morning","2020-03-07 06:00:00","2020-03-07 11:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,0,0,5.9999,0,0,1,5.9999,-12.0499444522888,0
"morning#2020-03-08 06:00:00,2020-03-08 11:59:59","morning","2020-03-08 06:00:00","2020-03-08 11:59:59",0,NA,0,NA,1.58333332533827e-12,0,6,0,NA,NA,0,6,1,1,0,-11.8004276472878,6,0,6,NA "morning#2020-03-08 06:00:00,2020-03-08 11:59:59","morning","2020-03-08 06:00:00","2020-03-08 11:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,5.9999,0,5.9999,0,0,1,0,-12.0499444522888,0
"morning#2020-10-31 06:00:00,2020-10-31 11:59:59","morning","2020-10-31 06:00:00","2020-10-31 11:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA "morning#2020-10-31 06:00:00,2020-10-31 11:59:59","morning","2020-10-31 06:00:00","2020-10-31 11:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,0,0,5.9999,0,0,1,5.9999,-12.0499444522888,0
"morning#2020-11-01 06:00:00,2020-11-01 11:59:59","morning","2020-11-01 06:00:00","2020-11-01 11:59:59",0,NA,0,NA,1.58333332533827e-12,0,6,0,NA,NA,0,6,1,1,0,-11.8004276472878,6,0,6,NA "morning#2020-11-01 06:00:00,2020-11-01 11:59:59","morning","2020-11-01 06:00:00","2020-11-01 11:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,5.9999,0,5.9999,0,0,1,0,-12.0499444522888,0
"threeday#2020-03-07 00:00:00,2020-03-09 23:59:59","threeday","2020-03-07 00:00:00","2020-03-09 23:59:59",0,6,8014514.68387131,NA,12308.2198130989,0.366204096222703,6,2,NA,NA,6,6,1,3,1.09861228866811,4.09019524373105,6,6,6,NA "threeday#2020-03-07 00:00:00,2020-03-09 23:59:59","threeday","2020-03-07 00:00:00","2020-03-09 23:59:59",5.9999,2,0,8014514.68257681,5.9999,13346.1355628909,0,1.09861228866811,5.9999,0.366204096222703,3,5.9999,0,5.9999,5.9999,0,1,5.9999,4.1253555318677,0
"threeday#2020-03-08 00:00:00,2020-03-10 23:59:59","threeday","2020-03-08 00:00:00","2020-03-10 23:59:59",0,6,6268829.80206745,NA,5227.19980200003,0.346573590279973,6,1,NA,NA,0,6,1,2,0.693147180559945,3.71826910068082,6,6,6,NA "threeday#2020-03-08 00:00:00,2020-03-10 23:59:59","threeday","2020-03-08 00:00:00","2020-03-10 23:59:59",5.9999,1,0,6268829.80447871,5.9999,4907.31564827534,0,0.693147180559945,5.9999,0.346573590279973,2,5.9999,0,5.9999,5.9999,0,1,0,3.69084399356179,0
"threeday#2020-10-29 00:00:00,2020-10-31 23:59:59","threeday","2020-10-29 00:00:00","2020-10-31 23:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA "threeday#2020-10-29 00:00:00,2020-10-31 23:59:59","threeday","2020-10-29 00:00:00","2020-10-31 23:59:59",5.9999,0,0,0,5.9999,8.91364939388538e-13,0,0,0,0,1,0,0,5.9999,0,0,1,5.9999,-12.0499444522888,0
"threeday#2020-10-30 00:00:00,2020-11-01 23:59:59","threeday","2020-10-30 00:00:00","2020-11-01 23:59:59",0,6,8014514.68387131,NA,12308.2198130989,0.366204096222703,6,2,NA,NA,6,6,1,3,1.09861228866811,4.09019524373105,6,6,6,NA "threeday#2020-10-30 00:00:00,2020-11-01 23:59:59","threeday","2020-10-30 00:00:00","2020-11-01 23:59:59",5.9999,2,0,8014514.68257681,5.9999,13346.1355628909,0,1.09861228866811,5.9999,0.366204096222703,3,5.9999,0,5.9999,5.9999,0,1,5.9999,4.1253555318677,0
"threeday#2020-10-31 00:00:00,2020-11-02 23:59:59","threeday","2020-10-31 00:00:00","2020-11-02 23:59:59",0,6,8014514.68387131,NA,12308.2198130989,0.366204096222703,6,2,NA,NA,6,6,1,3,1.09861228866811,4.09019524373105,6,6,6,NA "threeday#2020-10-31 00:00:00,2020-11-02 23:59:59","threeday","2020-10-31 00:00:00","2020-11-02 23:59:59",5.9999,2,0,8014514.68257681,5.9999,13346.1355628909,0,1.09861228866811,5.9999,0.366204096222703,3,5.9999,0,5.9999,5.9999,0,1,5.9999,4.1253555318677,0
"threeday#2020-11-01 00:00:00,2020-11-03 23:59:59","threeday","2020-11-01 00:00:00","2020-11-03 23:59:59",0,6,6268829.80206745,NA,5227.19980200003,0.346573590279973,6,1,NA,NA,0,6,1,2,0.693147180559945,3.71826910068082,6,6,6,NA "threeday#2020-11-01 00:00:00,2020-11-03 23:59:59","threeday","2020-11-01 00:00:00","2020-11-03 23:59:59",5.9999,1,0,6268829.80447871,5.9999,4907.31564827534,0,0.693147180559945,5.9999,0.346573590279973,2,5.9999,0,5.9999,5.9999,0,1,0,3.69084399356179,0
"weekend#2020-10-30 00:00:00,2020-11-01 23:59:59","weekend","2020-10-30 00:00:00","2020-11-01 23:59:59",0,6,8014514.68387131,NA,12308.2198130989,0.366204096222703,6,2,NA,NA,6,6,1,3,1.09861228866811,4.09019524373105,6,6,6,NA "weekend#2020-10-30 00:00:00,2020-11-01 23:59:59","weekend","2020-10-30 00:00:00","2020-11-01 23:59:59",5.9999,2,0,8014514.68257681,5.9999,13346.1355628909,0,1.09861228866811,5.9999,0.366204096222703,3,5.9999,0,5.9999,5.9999,0,1,5.9999,4.1253555318677,0

1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_numberlocationtransitions phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_radiusgyration phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_locationvariance phone_locations_doryab_avgspeed phone_locations_doryab_locationentropy phone_locations_doryab_timeattop2location phone_locations_doryab_normalizedlocationentropy phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_timeattop3location phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop1location phone_locations_doryab_loglocationvariance phone_locations_doryab_varspeed
2 daily#2020-03-07 00:00:00,2020-03-07 23:59:59 daily 2020-03-07 00:00:00 2020-03-07 23:59:59 6 5.9999 0 0 0 6 5.9999 1.58333332533827e-12 8.91364939388538e-13 NA 0 0 0 0 1 6 0 NA 0 6 5.9999 NA 0 NA 0 1 0 5.9999 -11.8004276472878 -12.0499444522888 NA 0
3 daily#2020-03-08 00:00:00,2020-03-08 23:59:59 daily 2020-03-08 00:00:00 2020-03-08 23:59:59 6 5.9999 1 0 6268829.80206745 6268829.80447871 6 5.9999 5227.19980200003 4907.31564827534 NA 0 0.693147180559945 6 5.9999 0.346573590279973 2 0 5.9999 NA 0 6 5.9999 6 5.9999 NA 0 1 6 0 3.71826910068082 3.69084399356179 NA 0
4 daily#2020-10-31 00:00:00,2020-10-31 23:59:59 daily 2020-10-31 00:00:00 2020-10-31 23:59:59 6 5.9999 0 0 0 6 5.9999 1.58333332533827e-12 8.91364939388538e-13 NA 0 0 0 0 1 6 0 NA 0 6 5.9999 NA 0 NA 0 1 0 5.9999 -11.8004276472878 -12.0499444522888 NA 0
5 daily#2020-11-01 00:00:00,2020-11-01 23:59:59 daily 2020-11-01 00:00:00 2020-11-01 23:59:59 6 5.9999 1 0 6268829.80206745 6268829.80447871 6 5.9999 5227.19980200003 4907.31564827534 NA 0 0.693147180559945 6 5.9999 0.346573590279973 2 0 5.9999 NA 0 6 5.9999 6 5.9999 NA 0 1 6 0 3.71826910068082 3.69084399356179 NA 0
6 morning#2020-03-07 06:00:00,2020-03-07 11:59:59 morning 2020-03-07 06:00:00 2020-03-07 11:59:59 6 5.9999 0 0 0 6 5.9999 1.58333332533827e-12 8.91364939388538e-13 NA 0 0 0 0 1 6 0 NA 0 6 5.9999 NA 0 NA 0 1 0 5.9999 -11.8004276472878 -12.0499444522888 NA 0
7 morning#2020-03-08 06:00:00,2020-03-08 11:59:59 morning 2020-03-08 06:00:00 2020-03-08 11:59:59 6 5.9999 0 0 0 6 5.9999 1.58333332533827e-12 8.91364939388538e-13 NA 0 0 6 0 0 1 0 5.9999 NA 0 6 5.9999 NA 0 NA 0 1 0 -11.8004276472878 -12.0499444522888 NA 0
8 morning#2020-10-31 06:00:00,2020-10-31 11:59:59 morning 2020-10-31 06:00:00 2020-10-31 11:59:59 6 5.9999 0 0 0 6 5.9999 1.58333332533827e-12 8.91364939388538e-13 NA 0 0 0 0 1 6 0 NA 0 6 5.9999 NA 0 NA 0 1 0 5.9999 -11.8004276472878 -12.0499444522888 NA 0
9 morning#2020-11-01 06:00:00,2020-11-01 11:59:59 morning 2020-11-01 06:00:00 2020-11-01 11:59:59 6 5.9999 0 0 0 6 5.9999 1.58333332533827e-12 8.91364939388538e-13 NA 0 0 6 0 0 1 0 5.9999 NA 0 6 5.9999 NA 0 NA 0 1 0 -11.8004276472878 -12.0499444522888 NA 0
10 threeday#2020-03-07 00:00:00,2020-03-09 23:59:59 threeday 2020-03-07 00:00:00 2020-03-09 23:59:59 6 5.9999 2 0 8014514.68387131 8014514.68257681 6 5.9999 12308.2198130989 13346.1355628909 NA 0 1.09861228866811 6 5.9999 0.366204096222703 3 6 5.9999 NA 0 6 5.9999 6 5.9999 NA 0 1 6 5.9999 4.09019524373105 4.1253555318677 NA 0
11 threeday#2020-03-08 00:00:00,2020-03-10 23:59:59 threeday 2020-03-08 00:00:00 2020-03-10 23:59:59 6 5.9999 1 0 6268829.80206745 6268829.80447871 6 5.9999 5227.19980200003 4907.31564827534 NA 0 0.693147180559945 6 5.9999 0.346573590279973 2 0 5.9999 NA 0 6 5.9999 6 5.9999 NA 0 1 6 0 3.71826910068082 3.69084399356179 NA 0
12 threeday#2020-10-29 00:00:00,2020-10-31 23:59:59 threeday 2020-10-29 00:00:00 2020-10-31 23:59:59 6 5.9999 0 0 0 6 5.9999 1.58333332533827e-12 8.91364939388538e-13 NA 0 0 0 0 1 6 0 NA 0 6 5.9999 NA 0 NA 0 1 0 5.9999 -11.8004276472878 -12.0499444522888 NA 0
13 threeday#2020-10-30 00:00:00,2020-11-01 23:59:59 threeday 2020-10-30 00:00:00 2020-11-01 23:59:59 6 5.9999 2 0 8014514.68387131 8014514.68257681 6 5.9999 12308.2198130989 13346.1355628909 NA 0 1.09861228866811 6 5.9999 0.366204096222703 3 6 5.9999 NA 0 6 5.9999 6 5.9999 NA 0 1 6 5.9999 4.09019524373105 4.1253555318677 NA 0
14 threeday#2020-10-31 00:00:00,2020-11-02 23:59:59 threeday 2020-10-31 00:00:00 2020-11-02 23:59:59 6 5.9999 2 0 8014514.68387131 8014514.68257681 6 5.9999 12308.2198130989 13346.1355628909 NA 0 1.09861228866811 6 5.9999 0.366204096222703 3 6 5.9999 NA 0 6 5.9999 6 5.9999 NA 0 1 6 5.9999 4.09019524373105 4.1253555318677 NA 0
15 threeday#2020-11-01 00:00:00,2020-11-03 23:59:59 threeday 2020-11-01 00:00:00 2020-11-03 23:59:59 6 5.9999 1 0 6268829.80206745 6268829.80447871 6 5.9999 5227.19980200003 4907.31564827534 NA 0 0.693147180559945 6 5.9999 0.346573590279973 2 0 5.9999 NA 0 6 5.9999 6 5.9999 NA 0 1 6 0 3.71826910068082 3.69084399356179 NA 0
16 weekend#2020-10-30 00:00:00,2020-11-01 23:59:59 weekend 2020-10-30 00:00:00 2020-11-01 23:59:59 6 5.9999 2 0 8014514.68387131 8014514.68257681 6 5.9999 12308.2198130989 13346.1355628909 NA 0 1.09861228866811 6 5.9999 0.366204096222703 3 6 5.9999 NA 0 6 5.9999 6 5.9999 NA 0 1 6 5.9999 4.09019524373105 4.1253555318677 NA 0

File diff suppressed because one or more lines are too long

View File

@ -244,12 +244,12 @@ PHONE_LOCATIONS:
LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: True COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 3 DBSCAN_MINSAMPLES: 3
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
@ -265,7 +265,6 @@ PHONE_LOCATIONS:
BARNETT: BARNETT:
COMPUTE: False COMPUTE: False
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"] FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
SRC_SCRIPT: src/features/phone_locations/barnett/main.R SRC_SCRIPT: src/features/phone_locations/barnett/main.R

View File

@ -244,12 +244,12 @@ PHONE_LOCATIONS:
LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: True COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 3 DBSCAN_MINSAMPLES: 3
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
@ -265,7 +265,6 @@ PHONE_LOCATIONS:
BARNETT: BARNETT:
COMPUTE: False COMPUTE: False
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"] FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
SRC_SCRIPT: src/features/phone_locations/barnett/main.R SRC_SCRIPT: src/features/phone_locations/barnett/main.R

View File

@ -244,12 +244,12 @@ PHONE_LOCATIONS:
LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: True COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 3 DBSCAN_MINSAMPLES: 3
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
@ -265,7 +265,6 @@ PHONE_LOCATIONS:
BARNETT: BARNETT:
COMPUTE: False COMPUTE: False
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"] FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
SRC_SCRIPT: src/features/phone_locations/barnett/main.R SRC_SCRIPT: src/features/phone_locations/barnett/main.R

View File

@ -244,12 +244,12 @@ PHONE_LOCATIONS:
LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: True COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 3 DBSCAN_MINSAMPLES: 3
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
@ -265,7 +265,6 @@ PHONE_LOCATIONS:
BARNETT: BARNETT:
COMPUTE: False COMPUTE: False
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"] FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
SRC_SCRIPT: src/features/phone_locations/barnett/main.R SRC_SCRIPT: src/features/phone_locations/barnett/main.R

View File

@ -244,12 +244,12 @@ PHONE_LOCATIONS:
LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: True COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 3 DBSCAN_MINSAMPLES: 3
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
@ -265,7 +265,6 @@ PHONE_LOCATIONS:
BARNETT: BARNETT:
COMPUTE: False COMPUTE: False
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"] FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
SRC_SCRIPT: src/features/phone_locations/barnett/main.R SRC_SCRIPT: src/features/phone_locations/barnett/main.R

View File

@ -244,12 +244,12 @@ PHONE_LOCATIONS:
LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: True COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 3 DBSCAN_MINSAMPLES: 3
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
@ -265,7 +265,6 @@ PHONE_LOCATIONS:
BARNETT: BARNETT:
COMPUTE: False COMPUTE: False
FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"] FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
SRC_SCRIPT: src/features/phone_locations/barnett/main.R SRC_SCRIPT: src/features/phone_locations/barnett/main.R

View File

@ -626,7 +626,7 @@ properties:
PHONE_LOCATIONS: PHONE_LOCATIONS:
type: object type: object
required: [CONTAINER, LOCATIONS_TO_USE, FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD, PROVIDERS] required: [CONTAINER, LOCATIONS_TO_USE, FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD, FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION, ACCURACY_LIMIT, PROVIDERS]
properties: properties:
CONTAINER: CONTAINER:
type: string type: string
@ -639,6 +639,9 @@ properties:
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION:
type: integer type: integer
exclusiveMinimum: 0 exclusiveMinimum: 0
ACCURACY_LIMIT:
type: integer
exclusiveMinimum: 0
PROVIDERS: PROVIDERS:
type: ["null", object] type: ["null", object]
properties: properties:
@ -652,9 +655,6 @@ properties:
items: items:
type: string type: string
enum: [locationvariance,loglocationvariance,totaldistance,avgspeed,varspeed,numberofsignificantplaces,numberlocationtransitions,radiusgyration,timeattop1location,timeattop2location,timeattop3location,movingtostaticratio,outlierstimepercent,maxlengthstayatclusters,minlengthstayatclusters,avglengthstayatclusters,stdlengthstayatclusters,locationentropy,normalizedlocationentropy,timeathome,homelabel] enum: [locationvariance,loglocationvariance,totaldistance,avgspeed,varspeed,numberofsignificantplaces,numberlocationtransitions,radiusgyration,timeattop1location,timeattop2location,timeattop3location,movingtostaticratio,outlierstimepercent,maxlengthstayatclusters,minlengthstayatclusters,avglengthstayatclusters,stdlengthstayatclusters,locationentropy,normalizedlocationentropy,timeathome,homelabel]
ACCURACY_LIMIT:
type: integer
exclusiveMinimum: 0
DBSCAN_EPS: DBSCAN_EPS:
type: integer type: integer
exclusiveMinimum: 0 exclusiveMinimum: 0
@ -695,9 +695,6 @@ properties:
items: items:
type: string type: string
enum: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"] enum: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
ACCURACY_LIMIT:
type: integer
exclusiveMinimum: 0
IF_MULTIPLE_TIMEZONES: IF_MULTIPLE_TIMEZONES:
type: string type: string
enum: [USE_MOST_COMMON] enum: [USE_MOST_COMMON]