diff --git a/Snakefile b/Snakefile index 4e78f750..e3617e42 100644 --- a/Snakefile +++ b/Snakefile @@ -26,6 +26,9 @@ rule all: expand("data/processed/{pid}/battery_{day_segment}.csv", pid = config["PIDS"], day_segment = config["BATTERY"]["DAY_SEGMENTS"]), + expand("data/processed/{pid}/screen_{day_segment}.csv", + pid = config["PIDS"], + day_segment = config["SCREEN"]["DAY_SEGMENTS"]), # Reports expand("reports/figures/{pid}/{sensor}_heatmap_rows.html", pid=config["PIDS"], sensor=config["SENSORS"]), expand("reports/figures/{pid}/compliance_heatmap.html", pid=config["PIDS"], sensor=config["SENSORS"]), diff --git a/config.yaml b/config.yaml index 4aff4877..ca9faf25 100644 --- a/config.yaml +++ b/config.yaml @@ -61,4 +61,9 @@ GOOGLE_ACTIVITY_RECOGNITION: BATTERY: DAY_SEGMENTS: *day_segments METRICS: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"] - \ No newline at end of file + +SCREEN: + DAY_SEGMENTS: *day_segments + METRICS_EVENT: ["counton", "countunlock"] + METRICS_EPISODE: ["sumduration", "maxduration", "minduration", "avgduration", "stdduration"] + EPISODES: ["unlock"] \ No newline at end of file diff --git a/rules/features.snakefile b/rules/features.snakefile index 99be51db..fd59e35c 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -73,3 +73,18 @@ rule battery_metrics: "data/processed/{pid}/battery_{day_segment}.csv" script: "../src/features/battery_metrics.py" + +rule screen_metrics: + input: + screen_events = "data/raw/{pid}/screen_with_datetime.csv", + screen_deltas = "data/processed/{pid}/screen_deltas.csv" + params: + day_segment = "{day_segment}", + metrics_event = config["SCREEN"]["METRICS_EVENT"], + metrics_episode = config["SCREEN"]["METRICS_EPISODE"], + episodes = config["SCREEN"]["EPISODES"] + output: + "data/processed/{pid}/screen_{day_segment}.csv" + script: + "../src/features/screen_metrics.py" + diff --git a/src/features/screen_metrics.py b/src/features/screen_metrics.py new file mode 100644 index 00000000..e5d75bd9 --- /dev/null +++ b/src/features/screen_metrics.py @@ -0,0 +1,83 @@ +import pandas as pd +import numpy as np +import datetime +from datetime import datetime, timedelta, time +from features_utils import splitOvernightEpisodes, splitMultiSegmentEpisodes + +def getEpisodeDurationFeatures(screen_deltas, episode, metrics): + screen_deltas_episode = screen_deltas[screen_deltas["episode"] == episode] + duration_helper = pd.DataFrame() + if "sumduration" in metrics: + duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).sum()[["time_diff"]].rename(columns = {"time_diff": "screen_" + day_segment + "_sumduration" + episode})], axis = 1) + if "maxduration" in metrics: + duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).max()[["time_diff"]].rename(columns = {"time_diff": "screen_" + day_segment + "_maxduration" + episode})], axis = 1) + if "minduration" in metrics: + duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).min()[["time_diff"]].rename(columns = {"time_diff": "screen_" + day_segment + "_minduration" + episode})], axis = 1) + if "avgduration" in metrics: + duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).mean()[["time_diff"]].rename(columns = {"time_diff":"screen_" + day_segment + "_avgduration" + episode})], axis = 1) + if "stdduration" in metrics: + duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).std()[["time_diff"]].rename(columns = {"time_diff":"screen_" + day_segment + "_stdduration" + episode})], axis = 1) + + duration_helper = duration_helper.fillna(0) + return duration_helper + +def getEventFeatures(screen_data, metrics_event): + # get count_helper + screen_status = screen_data.groupby(["local_date", "screen_status"]).count()[["timestamp"]].reset_index() + count_on = screen_status[screen_status["screen_status"] == 0].set_index("local_date")[["timestamp"]].rename(columns = {"timestamp": "count_on"}) + count_off = screen_status[screen_status["screen_status"] == 1].set_index("local_date")[["timestamp"]].rename(columns = {"timestamp": "count_off"}) + count_lock = screen_status[screen_status["screen_status"] == 2].set_index("local_date")[["timestamp"]].rename(columns = {"timestamp": "count_lock"}) + count_unlock = screen_status[screen_status["screen_status"] == 3].set_index("local_date")[["timestamp"]].rename(columns = {"timestamp": "count_unlock"}) + + count_helper = pd.concat([count_on, count_off, count_lock, count_unlock], axis = 1) + count_helper = count_helper.fillna(0).astype(np.int64) + + # count on-off; unlock-lock + count_helper["diff_count_on_off"] = count_helper["count_on"] - count_helper["count_off"] + count_helper["diff_count_unlock_lock"] = count_helper["count_unlock"] - count_helper["count_lock"] + + event_features = pd.DataFrame() + if "counton" in metrics_event: + event_features["screen_" + day_segment + "_counton"] = count_helper[["count_on", "count_off"]].max(axis=1) + if "countunlock" in metrics_event: + event_features["screen_" + day_segment + "_countunlock"] = count_helper[["count_lock", "count_unlock"]].max(axis=1) + + ############################################################################################ + # check missing values + event_features["screen_" + day_segment + "_diffcountonoff"] = count_helper["diff_count_on_off"] + event_features["screen_" + day_segment + "_diffcountunlocklock"] = count_helper["diff_count_unlock_lock"] + ############################################################################################ + + return event_features + +screen_data = pd.read_csv(snakemake.input["screen_events"], parse_dates=["local_date_time", "local_date"]) +screen_deltas = pd.read_csv(snakemake.input["screen_deltas"], parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"]) +day_segment = snakemake.params["day_segment"] +metrics_event = snakemake.params["metrics_event"] +metrics_episode = snakemake.params["metrics_episode"] +episodes = snakemake.params["episodes"] + +if screen_data.empty: + metrics_episode_name = ["".join(metric) for metric in itertools.product(metrics_episode,episodes)] + screen_features = pd.DataFrame(columns=["local_date"]+["screen_" + day_segment + "_" + x for x in metrics_event + metrics_episode_name]) +else: + # drop consecutive duplicates of screen_status keeping the last one + screen_data = screen_data.loc[(screen_data[["screen_status"]].shift(-1) != screen_data[["screen_status"]]).any(axis=1)].reset_index(drop=True) + + # preprocess day_segment and episodes + screen_deltas = splitOvernightEpisodes(screen_deltas, [], ["episode"]) + if day_segment != "daily": + screen_data = screen_data[screen_data["local_day_segment"] == day_segment] + screen_deltas = splitMultiSegmentEpisodes(screen_deltas, day_segment, []) + screen_deltas.set_index(["local_start_date"],inplace=True) + + # extract features for events and episodes + event_features = getEventFeatures(screen_data, metrics_event) + duration_features = pd.DataFrame() + for episode in episodes: + duration_features = pd.concat([duration_features, getEpisodeDurationFeatures(screen_deltas, episode, metrics_episode)], axis=1) + + screen_features = pd.concat([event_features, duration_features], axis = 1).fillna(0) + screen_features.reset_index(inplace=True) + +screen_features.to_csv(snakemake.output[0], index=False) \ No newline at end of file