Update segment labels and fix the bug when we do not have any labels for event segments
parent
cefcb0635b
commit
1c57320ab3
|
@ -27,8 +27,7 @@ barnett_daily_features <- function(snakemake){
|
||||||
filter(accuracy < accuracy_limit) %>%
|
filter(accuracy < accuracy_limit) %>%
|
||||||
mutate(is_daily = str_detect(assigned_segments, paste0(".*#", datetime_start_regex, ",", datetime_end_regex, ".*")))
|
mutate(is_daily = str_detect(assigned_segments, paste0(".*#", datetime_start_regex, ",", datetime_end_regex, ".*")))
|
||||||
|
|
||||||
|
if(nrow(segment_labels) == 0 || nrow(location) == 0 || all(location$is_daily == FALSE) || (max(location$timestamp) - min(location$timestamp) < 86400000)){
|
||||||
if(nrow(location) == 0 || all(location$is_daily == FALSE) || (max(location$timestamp) - min(location$timestamp) < 86400000)){
|
|
||||||
warning("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:",
|
warning("Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:",
|
||||||
"\nLocation data rows within accuracy: ", nrow(location %>% filter(accuracy < accuracy_limit)),
|
"\nLocation data rows within accuracy: ", nrow(location %>% filter(accuracy < accuracy_limit)),
|
||||||
"\nLocation data rows within a daily time segment: ", nrow(filter(location, is_daily)),
|
"\nLocation data rows within a daily time segment: ", nrow(filter(location, is_daily)),
|
||||||
|
|
|
@ -60,6 +60,9 @@ fetch_provider_features <- function(provider, provider_key, sensor_key, sensor_d
|
||||||
source(provider[["SRC_SCRIPT"]])
|
source(provider[["SRC_SCRIPT"]])
|
||||||
features_function <- match.fun(paste0(tolower(provider_key), "_features"))
|
features_function <- match.fun(paste0(tolower(provider_key), "_features"))
|
||||||
time_segments <- time_segments_labels %>% pull(label)
|
time_segments <- time_segments_labels %>% pull(label)
|
||||||
|
if(length(time_segments) == 0){
|
||||||
|
time_segments <- c("")
|
||||||
|
}
|
||||||
for (time_segment in time_segments){
|
for (time_segment in time_segments){
|
||||||
print(paste(rapids_log_tag,"Processing", sensor_key, provider_key, time_segment))
|
print(paste(rapids_log_tag,"Processing", sensor_key, provider_key, time_segment))
|
||||||
|
|
||||||
|
|
|
@ -102,6 +102,8 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file
|
||||||
feature_module = import_path(provider["SRC_SCRIPT"])
|
feature_module = import_path(provider["SRC_SCRIPT"])
|
||||||
feature_function = getattr(feature_module, provider_key.lower() + "_features")
|
feature_function = getattr(feature_module, provider_key.lower() + "_features")
|
||||||
|
|
||||||
|
if time_segments_labels["label"].empty:
|
||||||
|
time_segments_labels["label"] = [""]
|
||||||
for time_segment in time_segments_labels["label"]:
|
for time_segment in time_segments_labels["label"]:
|
||||||
print("{} Processing {} {} {}".format(rapids_log_tag, sensor_key, provider_key, time_segment))
|
print("{} Processing {} {} {}".format(rapids_log_tag, sensor_key, provider_key, time_segment))
|
||||||
features = feature_function(sensor_data_files, time_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes)
|
features = feature_function(sensor_data_files, time_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes)
|
||||||
|
|
|
@ -26,7 +26,7 @@ features = pd.read_csv(snakemake.input["all_sensor_features"])
|
||||||
|
|
||||||
|
|
||||||
if time_segments_type == "FREQUENCY":
|
if time_segments_type == "FREQUENCY":
|
||||||
features["local_segment_label"] = features["local_segment_label"].str.replace(r"[0-9]{4}", "")
|
features["local_segment_label"] = features["local_segment_label"].str[:-4]
|
||||||
if time_segments_type == "EVENT":
|
if time_segments_type == "EVENT":
|
||||||
features["local_segment_label"] = "event"
|
features["local_segment_label"] = "event"
|
||||||
|
|
||||||
|
|
|
@ -58,7 +58,7 @@ time_segments = pd.read_csv(snakemake.input["time_segments_file"])["label"].uniq
|
||||||
|
|
||||||
phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], parse_dates=["local_segment_start_datetime", "local_segment_end_datetime"]).sort_values(by=["pid", "local_segment_start_datetime"])
|
phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], parse_dates=["local_segment_start_datetime", "local_segment_end_datetime"]).sort_values(by=["pid", "local_segment_start_datetime"])
|
||||||
if time_segments_type == "FREQUENCY":
|
if time_segments_type == "FREQUENCY":
|
||||||
phone_data_yield["local_segment_label"] = phone_data_yield["local_segment_label"].str.replace(r"[0-9]{4}", "")
|
phone_data_yield["local_segment_label"] = phone_data_yield["local_segment_label"].str[:-4]
|
||||||
|
|
||||||
html_file = open(snakemake.output[0], "w", encoding="utf-8")
|
html_file = open(snakemake.output[0], "w", encoding="utf-8")
|
||||||
if phone_data_yield.empty:
|
if phone_data_yield.empty:
|
||||||
|
|
|
@ -11,6 +11,25 @@ mod = util.module_from_spec(spec)
|
||||||
spec.loader.exec_module(mod)
|
spec.loader.exec_module(mod)
|
||||||
filter_data_by_segment = getattr(mod, "filter_data_by_segment")
|
filter_data_by_segment = getattr(mod, "filter_data_by_segment")
|
||||||
|
|
||||||
|
def getRowCount(sensor_paths, sensor_names, time_segments_labels):
|
||||||
|
sensors_row_count = pd.DataFrame()
|
||||||
|
for sensor_path, sensor_name in zip(sensor_paths, sensor_names):
|
||||||
|
sensor_data = pd.read_csv(sensor_path, usecols=["assigned_segments"])
|
||||||
|
|
||||||
|
sensor_row_count = pd.DataFrame()
|
||||||
|
if not sensor_data.empty:
|
||||||
|
for time_segment in time_segments_labels:
|
||||||
|
sensor_data_per_segment = filter_data_by_segment(sensor_data, time_segment)
|
||||||
|
|
||||||
|
if not sensor_data_per_segment.empty:
|
||||||
|
sensor_row_count = pd.concat([sensor_row_count, sensor_data_per_segment.groupby(["local_segment"])[["local_segment"]].count().rename(columns={"local_segment": sensor_name})], axis=0, sort=False)
|
||||||
|
sensors_row_count = pd.concat([sensors_row_count, sensor_row_count], axis=1, sort=False)
|
||||||
|
|
||||||
|
sensors_row_count.index.name = "local_segment"
|
||||||
|
sensors_row_count.index = sensors_row_count.index.str.replace(r"_RR\d+SS#", "#")
|
||||||
|
|
||||||
|
return sensors_row_count
|
||||||
|
|
||||||
def getRowCountHeatmap(data_for_plot, pid, time_segment, html_file):
|
def getRowCountHeatmap(data_for_plot, pid, time_segment, html_file):
|
||||||
|
|
||||||
fig = px.timeline(data_for_plot,
|
fig = px.timeline(data_for_plot,
|
||||||
|
@ -18,7 +37,7 @@ def getRowCountHeatmap(data_for_plot, pid, time_segment, html_file):
|
||||||
x_end="local_segment_end_datetime",
|
x_end="local_segment_end_datetime",
|
||||||
y="sensor",
|
y="sensor",
|
||||||
color="scaled_value",
|
color="scaled_value",
|
||||||
color_continuous_scale="Peach", #"Viridis",
|
color_continuous_scale="Peach",
|
||||||
opacity=0.7,
|
opacity=0.7,
|
||||||
hover_data={"local_segment_start_datetime":False, "local_segment_end_datetime":False, "local_segment":True, "value":True, "scaled_value":True})
|
hover_data={"local_segment_start_datetime":False, "local_segment_end_datetime":False, "local_segment":True, "value":True, "scaled_value":True})
|
||||||
|
|
||||||
|
@ -48,22 +67,7 @@ phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], index_col=["
|
||||||
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
|
if ("phone_data_yield_rapids_ratiovalidyieldedminutes" not in phone_data_yield.columns) or ("phone_data_yield_rapids_ratiovalidyieldedhours" not in phone_data_yield.columns):
|
||||||
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
|
raise ValueError("Please make sure [PHONE_DATA_YIELD][RAPIDS][COMPUTE] is True AND [PHONE_DATA_YIELD][RAPIDS][FEATURES] contains [ratiovalidyieldedminutes, ratiovalidyieldedhours].")
|
||||||
|
|
||||||
# extract row count
|
sensors_row_count = getRowCount(snakemake.input["all_sensors"], sensor_names, time_segments_labels)
|
||||||
sensors_row_count = pd.DataFrame()
|
|
||||||
for sensor_path, sensor_name in zip(snakemake.input["all_sensors"], sensor_names):
|
|
||||||
sensor_data = pd.read_csv(sensor_path, usecols=["assigned_segments"])
|
|
||||||
|
|
||||||
sensor_row_count = pd.DataFrame()
|
|
||||||
if not sensor_data.empty:
|
|
||||||
for time_segment in time_segments_labels:
|
|
||||||
sensor_data_per_segment = filter_data_by_segment(sensor_data, time_segment)
|
|
||||||
|
|
||||||
if not sensor_data_per_segment.empty:
|
|
||||||
sensor_row_count = pd.concat([sensor_row_count, sensor_data_per_segment.groupby(["local_segment"])[["local_segment"]].count().rename(columns={"local_segment": sensor_name})], axis=0, sort=False)
|
|
||||||
sensors_row_count = pd.concat([sensors_row_count, sensor_row_count], axis=1, sort=False)
|
|
||||||
|
|
||||||
sensors_row_count.index.name = "local_segment"
|
|
||||||
sensors_row_count.index = sensors_row_count.index.str.replace(r"_RR\d+SS", "")
|
|
||||||
data_for_plot = phone_data_yield.rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": "ratiovalidyieldedminutes","phone_data_yield_rapids_ratiovalidyieldedhours": "ratiovalidyieldedhours"}).merge(sensors_row_count, how="left", left_index=True, right_index=True).reset_index()
|
data_for_plot = phone_data_yield.rename(columns={"phone_data_yield_rapids_ratiovalidyieldedminutes": "ratiovalidyieldedminutes","phone_data_yield_rapids_ratiovalidyieldedhours": "ratiovalidyieldedhours"}).merge(sensors_row_count, how="left", left_index=True, right_index=True).reset_index()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -79,10 +79,10 @@ label = participant_file["PHONE"]["LABEL"]
|
||||||
phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], parse_dates=["local_date_time"])
|
phone_data_yield = pd.read_csv(snakemake.input["phone_data_yield"], parse_dates=["local_date_time"])
|
||||||
if time_segments_type == "FREQUENCY":
|
if time_segments_type == "FREQUENCY":
|
||||||
phone_data_yield["assigned_segments"] = phone_data_yield["assigned_segments"].str.replace(r"[0-9]{4}#", "#")
|
phone_data_yield["assigned_segments"] = phone_data_yield["assigned_segments"].str.replace(r"[0-9]{4}#", "#")
|
||||||
time_segments_labels["label"] = time_segments_labels["label"].str.replace(r"[0-9]{4}", "")
|
time_segments_labels["label"] = time_segments_labels["label"].str[:-4]
|
||||||
if time_segments_type == "PERIODIC":
|
if time_segments_type == "PERIODIC":
|
||||||
phone_data_yield["assigned_segments"] = phone_data_yield["assigned_segments"].str.replace(r"_RR\d+SS#", "#")
|
phone_data_yield["assigned_segments"] = phone_data_yield["assigned_segments"].str.replace(r"_RR\d+SS#", "#")
|
||||||
time_segments_labels["label"] = time_segments_labels["label"].str.replace(r"_RR\d+SS", "")
|
time_segments_labels["label"] = time_segments_labels["label"].str.replace(r"_RR\d+SS$", "")
|
||||||
|
|
||||||
html_file = open(snakemake.output[0], "a", encoding="utf-8")
|
html_file = open(snakemake.output[0], "a", encoding="utf-8")
|
||||||
if phone_data_yield.empty:
|
if phone_data_yield.empty:
|
||||||
|
|
|
@ -6,7 +6,7 @@ time_segments_type = snakemake.params["time_segments_type"]
|
||||||
phone_data_yield = pd.read_csv(snakemake.input[0])
|
phone_data_yield = pd.read_csv(snakemake.input[0])
|
||||||
|
|
||||||
if time_segments_type == "FREQUENCY":
|
if time_segments_type == "FREQUENCY":
|
||||||
phone_data_yield["local_segment_label"] = phone_data_yield["local_segment_label"].str.replace(r"[0-9]{4}", "")
|
phone_data_yield["local_segment_label"] = phone_data_yield["local_segment_label"].str[:-4]
|
||||||
if time_segments_type == "EVENT":
|
if time_segments_type == "EVENT":
|
||||||
phone_data_yield["local_segment_label"] = "event"
|
phone_data_yield["local_segment_label"] = "event"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue