diff --git a/config.yaml b/config.yaml index ecd9ec3d..c69e14e0 100644 --- a/config.yaml +++ b/config.yaml @@ -89,7 +89,7 @@ BATTERY: SCREEN: DAY_SEGMENTS: *day_segments REFERENCE_HOUR_FIRST_USE: 0 - METRICS_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"] + FEATURES_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"] EPISODE_TYPES: ["unlock"] LIGHT: diff --git a/docs/features/extracted.rst b/docs/features/extracted.rst index e47d9211..05f5497b 100644 --- a/docs/features/extracted.rst +++ b/docs/features/extracted.rst @@ -885,7 +885,7 @@ See `Screen Config Code`_ - Apply readable dateime to Screen dataset: ``expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=config["SENSORS"]),`` - Extract the deltas from the Screen dataset: expand("data/processed/{pid}/screen_deltas.csv", pid=config["PIDS"]), -- Extract Screen Metrics: +- Extract Screen Features: | ``expand("data/processed/{pid}/screen_{day_segment}.csv",`` | ``pid=config["PIDS"],`` @@ -905,9 +905,9 @@ See `Screen Config Code`_ - **Script:** ``src/features/screen_deltas.R`` - See the screen_deltas.R_ script. -- **Rule:** ``rules/features.snakefile/screen_metrics`` - See the screen_metrics_ rule. +- **Rule:** ``rules/features.snakefile/screen_features`` - See the screen_features_ rule. - - **Script:** ``src/features/screen_metrics.py`` - See the screen_metrics.py_ script. + - **Script:** ``src/features/screen_features.py`` - See the screen_features.py_ script. .. _screen-parameters: @@ -917,16 +917,16 @@ See `Screen Config Code`_ Name Description =============== =================== day_segment The particular ``day_segments`` that will be analyzed. The available options are ``daily``, ``morning``, ``afternoon``, ``evening``, ``night`` -metrics_events The different measures that can be retrieved from the events in the Screen dataset. See :ref:`Available Screen Events Metrics ` Table below -metrics_deltas The different measures that can be retrieved from the episodes extracted from the Screen dataset. See :ref:`Available Screen Episodes Metrics ` Table below +features_events The different measures that can be retrieved from the events in the Screen dataset. See :ref:`Available Screen Events Features ` Table below +features_deltas The different measures that can be retrieved from the episodes extracted from the Screen dataset. See :ref:`Available Screen Episodes Features ` Table below episodes The action that defines an episode =============== =================== -.. _screen-events-available-metrics: +.. _screen-events-available-features: .. - **Available Screen Events Metrics** - The following table shows a list of the available metrics for Screen Events. + **Available Screen Events Features** + The following table shows a list of the available features for Screen Events. ================= ============== ============= Name Units Description ================= ============== ============= @@ -935,11 +935,11 @@ episodes The action that defines an episode unlocksperminute Unlock events Unlock events per minute: The average of the number of unlock events that occur in a minute ================= ============== ============= -.. _screen-episodes-available-metrics: +.. _screen-episodes-available-features: -**Available Screen Episodes Metrics** +**Available Screen Episodes Features** -The following table shows a list of the available metrics for Screen Episodes. +The following table shows a list of the available features for Screen Episodes. ============= ========= ============= Name Units Description @@ -1187,8 +1187,8 @@ stddurationactivebout minutes Std duration active bout: The standard .. _`Screen Config Code`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L88 .. _screen_deltas: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L33 .. _screen_deltas.R: https://github.com/carissalow/rapids/blob/master/src/features/screen_deltas.R -.. _screen_metrics: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L97 -.. _screen_metrics.py: https://github.com/carissalow/rapids/blob/master/src/features/screen_metrics.py +.. _screen_features: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L97 +.. _screen_features.py: https://github.com/carissalow/rapids/blob/master/src/features/screen_features.py .. _`Fitbit: Heart Rate Config Code`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L113 .. _fitbit_with_datetime: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/preprocessing.snakefile#L94 .. _fitbit_readable_datetime.py: https://github.com/carissalow/rapids/blob/master/src/data/fitbit_readable_datetime.py diff --git a/rules/features.snakefile b/rules/features.snakefile index 37a8cb40..add363ce 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -96,20 +96,20 @@ rule battery_metrics: script: "../src/features/battery_metrics.py" -rule screen_metrics: +rule screen_features: input: screen_deltas = "data/processed/{pid}/screen_deltas.csv", phone_sensed_bins = "data/interim/{pid}/phone_sensed_bins.csv" params: day_segment = "{day_segment}", reference_hour_first_use = config["SCREEN"]["REFERENCE_HOUR_FIRST_USE"], - metrics_deltas = config["SCREEN"]["METRICS_DELTAS"], + features_deltas = config["SCREEN"]["FEATURES_DELTAS"], episode_types = config["SCREEN"]["EPISODE_TYPES"], bin_size = config["PHONE_VALID_SENSED_DAYS"]["BIN_SIZE"] output: "data/processed/{pid}/screen_{day_segment}.csv" script: - "../src/features/screen_metrics.py" + "../src/features/screen_features.py" rule light_metrics: input: diff --git a/src/features/screen_metrics.py b/src/features/screen_features.py similarity index 80% rename from src/features/screen_metrics.py rename to src/features/screen_features.py index 09d8c333..f84f2d0d 100644 --- a/src/features/screen_metrics.py +++ b/src/features/screen_features.py @@ -5,27 +5,27 @@ import itertools from datetime import datetime, timedelta, time from features_utils import splitOvernightEpisodes, splitMultiSegmentEpisodes -def getEpisodeDurationFeatures(screen_deltas, episode, metrics, phone_sensed_bins, bin_size, reference_hour_first_use): +def getEpisodeDurationFeatures(screen_deltas, episode, features, phone_sensed_bins, bin_size, reference_hour_first_use): screen_deltas_episode = screen_deltas[screen_deltas["episode"] == episode] duration_helper = pd.DataFrame() - if "countepisode" in metrics: + if "countepisode" in features: duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).count()[["time_diff"]].rename(columns = {"time_diff": "screen_" + day_segment + "_countepisode" + episode})], axis = 1) - if "episodepersensedminutes" in metrics: + if "episodepersensedminutes" in features: for date, row in screen_deltas_episode.groupby(["local_start_date"]).count()[["time_diff"]].iterrows(): sensed_minutes = phone_sensed_bins.loc[date, :].sum() * bin_size episode_per_sensedminutes = row["time_diff"] / (1 if sensed_minutes == 0 else sensed_minutes) duration_helper.loc[date, "screen_" + day_segment + "_episodepersensedminutes" + episode] = episode_per_sensedminutes - if "sumduration" in metrics: + if "sumduration" in features: duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).sum()[["time_diff"]].rename(columns = {"time_diff": "screen_" + day_segment + "_sumduration" + episode})], axis = 1) - if "maxduration" in metrics: + if "maxduration" in features: duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).max()[["time_diff"]].rename(columns = {"time_diff": "screen_" + day_segment + "_maxduration" + episode})], axis = 1) - if "minduration" in metrics: + if "minduration" in features: duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).min()[["time_diff"]].rename(columns = {"time_diff": "screen_" + day_segment + "_minduration" + episode})], axis = 1) - if "avgduration" in metrics: + if "avgduration" in features: duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).mean()[["time_diff"]].rename(columns = {"time_diff":"screen_" + day_segment + "_avgduration" + episode})], axis = 1) - if "stdduration" in metrics: + if "stdduration" in features: duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).std()[["time_diff"]].rename(columns = {"time_diff":"screen_" + day_segment + "_stdduration" + episode})], axis = 1) - if "firstuseafter" + "{0:0=2d}".format(reference_hour_first_use) in metrics: + if "firstuseafter" + "{0:0=2d}".format(reference_hour_first_use) in features: duration_helper = pd.concat([duration_helper, pd.DataFrame(screen_deltas_episode.groupby(["local_start_date"]).first()[["local_start_date_time"]].local_start_date_time.apply(lambda x: (x.to_pydatetime().hour - reference_hour_first_use) * 3600 + x.to_pydatetime().minute * 60 + x.to_pydatetime().second)).rename(columns = {"local_start_date_time":"screen_" + day_segment + "_firstuseafter" + "{0:0=2d}".format(reference_hour_first_use) + episode})], axis = 1) return duration_helper @@ -36,15 +36,15 @@ phone_sensed_bins[phone_sensed_bins > 0] = 1 day_segment = snakemake.params["day_segment"] reference_hour_first_use = snakemake.params["reference_hour_first_use"] -metrics_deltas = snakemake.params["metrics_deltas"] +features_deltas = snakemake.params["features_deltas"] episode_types = snakemake.params["episode_types"] bin_size = snakemake.params["bin_size"] -metrics_deltas = ["firstuseafter" + "{0:0=2d}".format(reference_hour_first_use) if feature_name == "firstuseafter" else feature_name for feature_name in metrics_deltas] +features_deltas = ["firstuseafter" + "{0:0=2d}".format(reference_hour_first_use) if feature_name == "firstuseafter" else feature_name for feature_name in features_deltas] -metrics_deltas_name = ["".join(metric) for metric in itertools.product(metrics_deltas, episode_types)] +features_deltas_name = ["".join(feature) for feature in itertools.product(features_deltas, episode_types)] -screen_features = pd.DataFrame(columns=["local_date"]+["screen_" + day_segment + "_" + x for x in metrics_deltas_name]) +screen_features = pd.DataFrame(columns=["local_date"]+["screen_" + day_segment + "_" + x for x in features_deltas_name]) if not screen_deltas.empty: # preprocess day_segment and episodes screen_deltas = splitOvernightEpisodes(screen_deltas, [], ["episode"]) @@ -55,7 +55,7 @@ if not screen_deltas.empty: if not screen_deltas.empty: screen_features = pd.DataFrame() for episode in episode_types: - screen_features = pd.concat([screen_features, getEpisodeDurationFeatures(screen_deltas, episode, metrics_deltas, phone_sensed_bins, bin_size, reference_hour_first_use)], axis=1) + screen_features = pd.concat([screen_features, getEpisodeDurationFeatures(screen_deltas, episode, features_deltas, phone_sensed_bins, bin_size, reference_hour_first_use)], axis=1) if not screen_features.empty: screen_features = screen_features.rename_axis("local_date").reset_index()