Refactor applications_foreground features: replace "metrics" with "features"
Co-authored-by: Meng Li <AnnieLM1996@gmail.com>pull/95/head
parent
d1c38016de
commit
f46e8c0666
|
@ -109,7 +109,7 @@ APPLICATIONS_FOREGROUND:
|
|||
SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps
|
||||
EXCLUDED_CATEGORIES: ["system_apps", "video"]
|
||||
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
|
||||
METRICS: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
|
||||
FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
|
||||
|
||||
HEARTRATE:
|
||||
DAY_SEGMENTS: *day_segments
|
||||
|
|
|
@ -451,7 +451,7 @@ See `Applications Foreground Config Code`_
|
|||
|
||||
.. - Genre categorization of Applications Foreground dataset: ``expand("data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv", pid=config["PIDS"]),``
|
||||
|
||||
- Extract Applications Foreground Metrics:
|
||||
- Extract Applications Foreground Features:
|
||||
|
||||
| ``expand("data/processed/{pid}/applications_foreground_{day_segment}.csv",``
|
||||
| ``pid=config["PIDS"],``
|
||||
|
@ -471,9 +471,9 @@ See `Applications Foreground Config Code`_
|
|||
|
||||
- **Script:** ``../src/data/application_genres.R`` - See the application_genres.R_ script
|
||||
|
||||
- **Rule:** ``rules/features.snakefile/applications_foreground_metrics`` - See the applications_foreground_metrics_ rule.
|
||||
- **Rule:** ``rules/features.snakefile/applications_foreground_features`` - See the applications_foreground_features_ rule.
|
||||
|
||||
- **Script:** ``src/features/applications_foreground_metrics.py`` - See the applications_foreground_metrics.py_ script.
|
||||
- **Script:** ``src/features/applications_foreground_features.py`` - See the applications_foreground_features.py_ script.
|
||||
|
||||
.. _applications-foreground-parameters:
|
||||
|
||||
|
@ -488,14 +488,14 @@ multiple_categories Categories of apps that will be included for the data c
|
|||
single_apps Any Android app can be included in the list of apps used to collect data by adding the package name to this list. (E.g. Youtube)
|
||||
excluded_categories Categories of apps that will be excluded for the data collection. The available categories can be defined in the ``APPLICATION_GENRES`` in the ``config`` file. See :ref:`Assumtions and Observations <applications-foreground-observations>`.
|
||||
excluded_apps Any Android app can be excluded from the list of apps used to collect data by adding the package name to this list.
|
||||
metrics The different measures that can be retrieved from the dataset. See :ref:`Available Applications Foreground Metrics <applications-foreground-available-metrics>` Table below
|
||||
features The different measures that can be retrieved from the dataset. See :ref:`Available Applications Foreground Features <applications-foreground-available-features>` Table below
|
||||
==================== ===================
|
||||
|
||||
.. _applications-foreground-available-metrics:
|
||||
.. _applications-foreground-available-features:
|
||||
|
||||
**Available Applications Foreground Metrics**
|
||||
**Available Applications Foreground Features**
|
||||
|
||||
The following table shows a list of the available metrics for the Applications Foreground dataset
|
||||
The following table shows a list of the available features for the Applications Foreground dataset
|
||||
|
||||
================== ========= =============
|
||||
Name Units Description
|
||||
|
@ -1162,8 +1162,8 @@ stddurationactivebout minutes Std duration active bout: The standard
|
|||
.. _`Application Genres Config`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L54
|
||||
.. _application_genres: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/preprocessing.snakefile#L81
|
||||
.. _application_genres.R: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/src/data/application_genres.R
|
||||
.. _applications_foreground_metrics: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L135
|
||||
.. _applications_foreground_metrics.py: https://github.com/carissalow/rapids/blob/master/src/features/accelerometer_metrics.py
|
||||
.. _applications_foreground_features: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L135
|
||||
.. _applications_foreground_features.py: https://github.com/carissalow/rapids/blob/master/src/features/accelerometer_features.py
|
||||
.. _`Battery Config Code`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L84
|
||||
.. _battery_deltas: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L25
|
||||
.. _battery_deltas.R: https://github.com/carissalow/rapids/blob/master/src/features/battery_deltas.R
|
||||
|
|
|
@ -133,7 +133,7 @@ rule accelerometer_features:
|
|||
script:
|
||||
"../src/features/accelerometer_features.py"
|
||||
|
||||
rule applications_foreground_metrics:
|
||||
rule applications_foreground_features:
|
||||
input:
|
||||
"data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv",
|
||||
params:
|
||||
|
@ -143,11 +143,11 @@ rule applications_foreground_metrics:
|
|||
single_apps = config["APPLICATIONS_FOREGROUND"]["SINGLE_APPS"],
|
||||
excluded_categories = config["APPLICATIONS_FOREGROUND"]["EXCLUDED_CATEGORIES"],
|
||||
excluded_apps = config["APPLICATIONS_FOREGROUND"]["EXCLUDED_APPS"],
|
||||
metrics = config["APPLICATIONS_FOREGROUND"]["METRICS"],
|
||||
features = config["APPLICATIONS_FOREGROUND"]["FEATURES"],
|
||||
output:
|
||||
"data/processed/{pid}/applications_foreground_{day_segment}.csv"
|
||||
script:
|
||||
"../src/features/applications_foreground_metrics.py"
|
||||
"../src/features/applications_foreground_features.py"
|
||||
|
||||
rule fitbit_heartrate_metrics:
|
||||
input:
|
||||
|
|
|
@ -4,17 +4,17 @@ import itertools
|
|||
from scipy.stats import entropy
|
||||
|
||||
|
||||
def compute_metrics(filtered_data, apps_type, metrics, apps_features):
|
||||
if "timeoffirstuse" in metrics:
|
||||
def compute_features(filtered_data, apps_type, requested_features, apps_features):
|
||||
if "timeoffirstuse" in requested_features:
|
||||
time_first_event = filtered_data.sort_values(by="timestamp", ascending=True).drop_duplicates(subset="local_date", keep="first").set_index("local_date")
|
||||
apps_features["apps_" + day_segment + "_timeoffirstuse" + apps_type] = time_first_event["local_hour"] * 60 + time_first_event["local_minute"]
|
||||
if "timeoflastuse" in metrics:
|
||||
if "timeoflastuse" in requested_features:
|
||||
time_last_event = filtered_data.sort_values(by="timestamp", ascending=False).drop_duplicates(subset="local_date", keep="first").set_index("local_date")
|
||||
apps_features["apps_" + day_segment + "_timeoflastuse" + apps_type] = time_last_event["local_hour"] * 60 + time_last_event["local_minute"]
|
||||
if "frequencyentropy" in metrics:
|
||||
if "frequencyentropy" in requested_features:
|
||||
apps_with_count = filtered_data.groupby(["local_date","application_name"]).count().sort_values(by="timestamp", ascending=False).reset_index()
|
||||
apps_features["apps_" + day_segment + "_frequencyentropy" + apps_type] = apps_with_count.groupby("local_date")["timestamp"].agg(entropy)
|
||||
if "count" in metrics:
|
||||
if "count" in requested_features:
|
||||
apps_features["apps_" + day_segment + "_count" + apps_type] = filtered_data.groupby(["local_date"]).count()["timestamp"]
|
||||
apps_features.fillna(value={"apps_" + day_segment + "_count" + apps_type: 0}, inplace=True)
|
||||
return apps_features
|
||||
|
@ -27,7 +27,7 @@ multiple_categories_with_genres = snakemake.params["multiple_categories"]
|
|||
single_apps = snakemake.params["single_apps"]
|
||||
excluded_categories = snakemake.params["excluded_categories"]
|
||||
excluded_apps = snakemake.params["excluded_apps"]
|
||||
metrics = snakemake.params["metrics"]
|
||||
features = snakemake.params["features"]
|
||||
|
||||
single_categories = list(set(single_categories) - set(excluded_categories))
|
||||
multiple_categories = list(multiple_categories_with_genres.keys() - set(excluded_categories))
|
||||
|
@ -43,7 +43,7 @@ apps_data = apps_data[~apps_data["application_name"].isin(excluded_apps)]
|
|||
# deep copy the apps_data for the top1global computation
|
||||
apps_data_global = apps_data.copy()
|
||||
|
||||
apps_features = pd.DataFrame(columns=["local_date"] + ["apps_" + day_segment + "_" + x for x in ["".join(metric) for metric in itertools.product(metrics, single_categories + multiple_categories + apps)]])
|
||||
apps_features = pd.DataFrame(columns=["local_date"] + ["apps_" + day_segment + "_" + x for x in ["".join(feature) for feature in itertools.product(features, single_categories + multiple_categories + apps)]])
|
||||
if not apps_data.empty:
|
||||
apps_features = pd.DataFrame()
|
||||
if day_segment != "daily":
|
||||
|
@ -52,14 +52,14 @@ if not apps_data.empty:
|
|||
# single category
|
||||
for sc in single_categories:
|
||||
if sc == "all":
|
||||
apps_features = compute_metrics(apps_data, "all", metrics, apps_features)
|
||||
apps_features = compute_features(apps_data, "all", features, apps_features)
|
||||
else:
|
||||
filtered_data = apps_data[apps_data["genre"].isin([sc])]
|
||||
apps_features = compute_metrics(filtered_data, sc, metrics, apps_features)
|
||||
apps_features = compute_features(filtered_data, sc, features, apps_features)
|
||||
# multiple category
|
||||
for mc in multiple_categories:
|
||||
filtered_data = apps_data[apps_data["genre"].isin(multiple_categories_with_genres[mc])]
|
||||
apps_features = compute_metrics(filtered_data, mc, metrics, apps_features)
|
||||
apps_features = compute_features(filtered_data, mc, features, apps_features)
|
||||
# single apps
|
||||
for app in apps:
|
||||
col_name = app
|
||||
|
@ -70,7 +70,7 @@ if not apps_data.empty:
|
|||
col_name = "top1global"
|
||||
|
||||
filtered_data = apps_data[apps_data["package_name"].isin([app])]
|
||||
apps_features = compute_metrics(filtered_data, col_name, metrics, apps_features)
|
||||
apps_features = compute_features(filtered_data, col_name, features, apps_features)
|
||||
|
||||
apps_features = apps_features.reset_index()
|
||||
|
Loading…
Reference in New Issue