From c8b7275084a4115590ba79fa8d3619ccfb872201 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Wed, 4 Mar 2020 12:21:36 -0500 Subject: [PATCH] Add firstuseafterTIME feature for screen sensor --- config.yaml | 3 ++- rules/features.snakefile | 1 + src/features/screen_metrics.py | 11 +++++++---- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/config.yaml b/config.yaml index 48af9b4b..fb3fbc74 100644 --- a/config.yaml +++ b/config.yaml @@ -87,7 +87,8 @@ BATTERY: SCREEN: DAY_SEGMENTS: *day_segments - METRICS_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration"] + REFERENCE_HOUR_FIRST_USE: 0 + METRICS_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"] EPISODE_TYPES: ["unlock"] LIGHT: diff --git a/rules/features.snakefile b/rules/features.snakefile index 04da5f5d..30d69fbc 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -101,6 +101,7 @@ rule screen_metrics: phone_sensed_bins = "data/interim/{pid}/phone_sensed_bins.csv" params: day_segment = "{day_segment}", + reference_hour_first_use = config["SCREEN"]["REFERENCE_HOUR_FIRST_USE"], metrics_deltas = config["SCREEN"]["METRICS_DELTAS"], episode_types = config["SCREEN"]["EPISODE_TYPES"], bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"] diff --git a/src/features/screen_metrics.py b/src/features/screen_metrics.py index b7a1824d..c662f4e9 100644 --- a/src/features/screen_metrics.py +++ b/src/features/screen_metrics.py @@ -5,7 +5,7 @@ import itertools from datetime import datetime, timedelta, time from features_utils import splitOvernightEpisodes, splitMultiSegmentEpisodes -def getEpisodeDurationFeatures(screen_deltas, episode, metrics, phone_sensed_bins, bin_size): +def getEpisodeDurationFeatures(screen_deltas, episode, metrics, phone_sensed_bins, bin_size, reference_hour_first_use): screen_deltas_episode = screen_deltas[screen_deltas["episode"] == episode] duration_helper = pd.DataFrame() if "countepisode" in metrics: @@ -25,8 +25,8 @@ def getEpisodeDurationFeatures(screen_deltas, episode, metrics, phone_sensed_bin duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).mean()[["time_diff"]].rename(columns = {"time_diff":"screen_" + day_segment + "_avgduration" + episode})], axis = 1) if "stdduration" in metrics: duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).std()[["time_diff"]].rename(columns = {"time_diff":"screen_" + day_segment + "_stdduration" + episode})], axis = 1) - - duration_helper = duration_helper.fillna(0) + if "firstuseafter" + "{0:0=2d}".format(reference_hour_first_use) in metrics: + duration_helper = pd.concat([duration_helper, pd.DataFrame(screen_deltas_episode.groupby(["local_start_date"]).first()[["local_start_date_time"]].local_start_date_time.apply(lambda x: (x.to_pydatetime().hour - reference_hour_first_use) * 3600 + x.to_pydatetime().minute * 60 + x.to_pydatetime().second)).rename(columns = {"local_start_date_time":"screen_" + day_segment + "_firstuseafter" + "{0:0=2d}".format(reference_hour_first_use) + episode})], axis = 1) return duration_helper @@ -35,10 +35,13 @@ phone_sensed_bins = pd.read_csv(snakemake.input["phone_sensed_bins"], parse_date phone_sensed_bins[phone_sensed_bins > 0] = 1 day_segment = snakemake.params["day_segment"] +reference_hour_first_use = snakemake.params["reference_hour_first_use"] metrics_deltas = snakemake.params["metrics_deltas"] episode_types = snakemake.params["episode_types"] bin_size = snakemake.params["bin_size"] +metrics_deltas = ["firstuseafter" + "{0:0=2d}".format(reference_hour_first_use) if feature_name == "firstuseafter" else feature_name for feature_name in metrics_deltas] + metrics_deltas_name = ["".join(metric) for metric in itertools.product(metrics_deltas, episode_types)] screen_features = pd.DataFrame(columns=["local_date"]+["screen_" + day_segment + "_" + x for x in metrics_deltas_name]) @@ -52,7 +55,7 @@ if not screen_deltas.empty: if not screen_deltas.empty: screen_features = pd.DataFrame() for episode in episode_types: - screen_features = pd.concat([screen_features, getEpisodeDurationFeatures(screen_deltas, episode, metrics_deltas, phone_sensed_bins, bin_size)], axis=1) + screen_features = pd.concat([screen_features, getEpisodeDurationFeatures(screen_deltas, episode, metrics_deltas, phone_sensed_bins, bin_size, reference_hour_first_use)], axis=1) screen_features = screen_features.rename_axis("local_date").reset_index()