From f46e8c066671abf60a898f0ccebd52352159c4f2 Mon Sep 17 00:00:00 2001 From: Mingze Cao <29229557+Martinze@users.noreply.github.com> Date: Wed, 8 Apr 2020 12:31:43 -0500 Subject: [PATCH] Refactor applications_foreground features: replace "metrics" with "features" Co-authored-by: Meng Li --- config.yaml | 2 +- docs/features/extracted.rst | 18 +++++++-------- rules/features.snakefile | 6 ++--- ...py => applications_foreground_features.py} | 22 +++++++++---------- 4 files changed, 24 insertions(+), 24 deletions(-) rename src/features/{applications_foreground_metrics.py => applications_foreground_features.py} (81%) diff --git a/config.yaml b/config.yaml index ba3a4e07..b0321f2c 100644 --- a/config.yaml +++ b/config.yaml @@ -109,7 +109,7 @@ APPLICATIONS_FOREGROUND: SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps EXCLUDED_CATEGORIES: ["system_apps", "video"] EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"] - METRICS: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] + FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] HEARTRATE: DAY_SEGMENTS: *day_segments diff --git a/docs/features/extracted.rst b/docs/features/extracted.rst index 99ba1a62..3cc8260d 100644 --- a/docs/features/extracted.rst +++ b/docs/features/extracted.rst @@ -451,7 +451,7 @@ See `Applications Foreground Config Code`_ .. - Genre categorization of Applications Foreground dataset: ``expand("data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv", pid=config["PIDS"]),`` -- Extract Applications Foreground Metrics: +- Extract Applications Foreground Features: | ``expand("data/processed/{pid}/applications_foreground_{day_segment}.csv",`` | ``pid=config["PIDS"],`` @@ -471,9 +471,9 @@ See `Applications Foreground Config Code`_ - **Script:** ``../src/data/application_genres.R`` - See the application_genres.R_ script -- **Rule:** ``rules/features.snakefile/applications_foreground_metrics`` - See the applications_foreground_metrics_ rule. +- **Rule:** ``rules/features.snakefile/applications_foreground_features`` - See the applications_foreground_features_ rule. - - **Script:** ``src/features/applications_foreground_metrics.py`` - See the applications_foreground_metrics.py_ script. + - **Script:** ``src/features/applications_foreground_features.py`` - See the applications_foreground_features.py_ script. .. _applications-foreground-parameters: @@ -488,14 +488,14 @@ multiple_categories Categories of apps that will be included for the data c single_apps Any Android app can be included in the list of apps used to collect data by adding the package name to this list. (E.g. Youtube) excluded_categories Categories of apps that will be excluded for the data collection. The available categories can be defined in the ``APPLICATION_GENRES`` in the ``config`` file. See :ref:`Assumtions and Observations `. excluded_apps Any Android app can be excluded from the list of apps used to collect data by adding the package name to this list. -metrics The different measures that can be retrieved from the dataset. See :ref:`Available Applications Foreground Metrics ` Table below +features The different measures that can be retrieved from the dataset. See :ref:`Available Applications Foreground Features ` Table below ==================== =================== -.. _applications-foreground-available-metrics: +.. _applications-foreground-available-features: -**Available Applications Foreground Metrics** +**Available Applications Foreground Features** -The following table shows a list of the available metrics for the Applications Foreground dataset +The following table shows a list of the available features for the Applications Foreground dataset ================== ========= ============= Name Units Description @@ -1162,8 +1162,8 @@ stddurationactivebout minutes Std duration active bout: The standard .. _`Application Genres Config`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L54 .. _application_genres: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/preprocessing.snakefile#L81 .. _application_genres.R: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/src/data/application_genres.R -.. _applications_foreground_metrics: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L135 -.. _applications_foreground_metrics.py: https://github.com/carissalow/rapids/blob/master/src/features/accelerometer_metrics.py +.. _applications_foreground_features: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L135 +.. _applications_foreground_features.py: https://github.com/carissalow/rapids/blob/master/src/features/accelerometer_features.py .. _`Battery Config Code`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L84 .. _battery_deltas: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L25 .. _battery_deltas.R: https://github.com/carissalow/rapids/blob/master/src/features/battery_deltas.R diff --git a/rules/features.snakefile b/rules/features.snakefile index 07b79f9a..3083b6b1 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -133,7 +133,7 @@ rule accelerometer_features: script: "../src/features/accelerometer_features.py" -rule applications_foreground_metrics: +rule applications_foreground_features: input: "data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv", params: @@ -143,11 +143,11 @@ rule applications_foreground_metrics: single_apps = config["APPLICATIONS_FOREGROUND"]["SINGLE_APPS"], excluded_categories = config["APPLICATIONS_FOREGROUND"]["EXCLUDED_CATEGORIES"], excluded_apps = config["APPLICATIONS_FOREGROUND"]["EXCLUDED_APPS"], - metrics = config["APPLICATIONS_FOREGROUND"]["METRICS"], + features = config["APPLICATIONS_FOREGROUND"]["FEATURES"], output: "data/processed/{pid}/applications_foreground_{day_segment}.csv" script: - "../src/features/applications_foreground_metrics.py" + "../src/features/applications_foreground_features.py" rule fitbit_heartrate_metrics: input: diff --git a/src/features/applications_foreground_metrics.py b/src/features/applications_foreground_features.py similarity index 81% rename from src/features/applications_foreground_metrics.py rename to src/features/applications_foreground_features.py index e41fac35..3bba2e70 100644 --- a/src/features/applications_foreground_metrics.py +++ b/src/features/applications_foreground_features.py @@ -4,17 +4,17 @@ import itertools from scipy.stats import entropy -def compute_metrics(filtered_data, apps_type, metrics, apps_features): - if "timeoffirstuse" in metrics: +def compute_features(filtered_data, apps_type, requested_features, apps_features): + if "timeoffirstuse" in requested_features: time_first_event = filtered_data.sort_values(by="timestamp", ascending=True).drop_duplicates(subset="local_date", keep="first").set_index("local_date") apps_features["apps_" + day_segment + "_timeoffirstuse" + apps_type] = time_first_event["local_hour"] * 60 + time_first_event["local_minute"] - if "timeoflastuse" in metrics: + if "timeoflastuse" in requested_features: time_last_event = filtered_data.sort_values(by="timestamp", ascending=False).drop_duplicates(subset="local_date", keep="first").set_index("local_date") apps_features["apps_" + day_segment + "_timeoflastuse" + apps_type] = time_last_event["local_hour"] * 60 + time_last_event["local_minute"] - if "frequencyentropy" in metrics: + if "frequencyentropy" in requested_features: apps_with_count = filtered_data.groupby(["local_date","application_name"]).count().sort_values(by="timestamp", ascending=False).reset_index() apps_features["apps_" + day_segment + "_frequencyentropy" + apps_type] = apps_with_count.groupby("local_date")["timestamp"].agg(entropy) - if "count" in metrics: + if "count" in requested_features: apps_features["apps_" + day_segment + "_count" + apps_type] = filtered_data.groupby(["local_date"]).count()["timestamp"] apps_features.fillna(value={"apps_" + day_segment + "_count" + apps_type: 0}, inplace=True) return apps_features @@ -27,7 +27,7 @@ multiple_categories_with_genres = snakemake.params["multiple_categories"] single_apps = snakemake.params["single_apps"] excluded_categories = snakemake.params["excluded_categories"] excluded_apps = snakemake.params["excluded_apps"] -metrics = snakemake.params["metrics"] +features = snakemake.params["features"] single_categories = list(set(single_categories) - set(excluded_categories)) multiple_categories = list(multiple_categories_with_genres.keys() - set(excluded_categories)) @@ -43,7 +43,7 @@ apps_data = apps_data[~apps_data["application_name"].isin(excluded_apps)] # deep copy the apps_data for the top1global computation apps_data_global = apps_data.copy() -apps_features = pd.DataFrame(columns=["local_date"] + ["apps_" + day_segment + "_" + x for x in ["".join(metric) for metric in itertools.product(metrics, single_categories + multiple_categories + apps)]]) +apps_features = pd.DataFrame(columns=["local_date"] + ["apps_" + day_segment + "_" + x for x in ["".join(feature) for feature in itertools.product(features, single_categories + multiple_categories + apps)]]) if not apps_data.empty: apps_features = pd.DataFrame() if day_segment != "daily": @@ -52,14 +52,14 @@ if not apps_data.empty: # single category for sc in single_categories: if sc == "all": - apps_features = compute_metrics(apps_data, "all", metrics, apps_features) + apps_features = compute_features(apps_data, "all", features, apps_features) else: filtered_data = apps_data[apps_data["genre"].isin([sc])] - apps_features = compute_metrics(filtered_data, sc, metrics, apps_features) + apps_features = compute_features(filtered_data, sc, features, apps_features) # multiple category for mc in multiple_categories: filtered_data = apps_data[apps_data["genre"].isin(multiple_categories_with_genres[mc])] - apps_features = compute_metrics(filtered_data, mc, metrics, apps_features) + apps_features = compute_features(filtered_data, mc, features, apps_features) # single apps for app in apps: col_name = app @@ -70,7 +70,7 @@ if not apps_data.empty: col_name = "top1global" filtered_data = apps_data[apps_data["package_name"].isin([app])] - apps_features = compute_metrics(filtered_data, col_name, metrics, apps_features) + apps_features = compute_features(filtered_data, col_name, features, apps_features) apps_features = apps_features.reset_index()