Refactor applications_foreground features: replace "metrics" with "features"
Co-authored-by: Meng Li <AnnieLM1996@gmail.com>pull/95/head
parent
d1c38016de
commit
f46e8c0666
|
@ -109,7 +109,7 @@ APPLICATIONS_FOREGROUND:
|
||||||
SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps
|
SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps
|
||||||
EXCLUDED_CATEGORIES: ["system_apps", "video"]
|
EXCLUDED_CATEGORIES: ["system_apps", "video"]
|
||||||
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
|
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
|
||||||
METRICS: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
|
FEATURES: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
|
||||||
|
|
||||||
HEARTRATE:
|
HEARTRATE:
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
|
|
|
@ -451,7 +451,7 @@ See `Applications Foreground Config Code`_
|
||||||
|
|
||||||
.. - Genre categorization of Applications Foreground dataset: ``expand("data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv", pid=config["PIDS"]),``
|
.. - Genre categorization of Applications Foreground dataset: ``expand("data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv", pid=config["PIDS"]),``
|
||||||
|
|
||||||
- Extract Applications Foreground Metrics:
|
- Extract Applications Foreground Features:
|
||||||
|
|
||||||
| ``expand("data/processed/{pid}/applications_foreground_{day_segment}.csv",``
|
| ``expand("data/processed/{pid}/applications_foreground_{day_segment}.csv",``
|
||||||
| ``pid=config["PIDS"],``
|
| ``pid=config["PIDS"],``
|
||||||
|
@ -471,9 +471,9 @@ See `Applications Foreground Config Code`_
|
||||||
|
|
||||||
- **Script:** ``../src/data/application_genres.R`` - See the application_genres.R_ script
|
- **Script:** ``../src/data/application_genres.R`` - See the application_genres.R_ script
|
||||||
|
|
||||||
- **Rule:** ``rules/features.snakefile/applications_foreground_metrics`` - See the applications_foreground_metrics_ rule.
|
- **Rule:** ``rules/features.snakefile/applications_foreground_features`` - See the applications_foreground_features_ rule.
|
||||||
|
|
||||||
- **Script:** ``src/features/applications_foreground_metrics.py`` - See the applications_foreground_metrics.py_ script.
|
- **Script:** ``src/features/applications_foreground_features.py`` - See the applications_foreground_features.py_ script.
|
||||||
|
|
||||||
.. _applications-foreground-parameters:
|
.. _applications-foreground-parameters:
|
||||||
|
|
||||||
|
@ -488,14 +488,14 @@ multiple_categories Categories of apps that will be included for the data c
|
||||||
single_apps Any Android app can be included in the list of apps used to collect data by adding the package name to this list. (E.g. Youtube)
|
single_apps Any Android app can be included in the list of apps used to collect data by adding the package name to this list. (E.g. Youtube)
|
||||||
excluded_categories Categories of apps that will be excluded for the data collection. The available categories can be defined in the ``APPLICATION_GENRES`` in the ``config`` file. See :ref:`Assumtions and Observations <applications-foreground-observations>`.
|
excluded_categories Categories of apps that will be excluded for the data collection. The available categories can be defined in the ``APPLICATION_GENRES`` in the ``config`` file. See :ref:`Assumtions and Observations <applications-foreground-observations>`.
|
||||||
excluded_apps Any Android app can be excluded from the list of apps used to collect data by adding the package name to this list.
|
excluded_apps Any Android app can be excluded from the list of apps used to collect data by adding the package name to this list.
|
||||||
metrics The different measures that can be retrieved from the dataset. See :ref:`Available Applications Foreground Metrics <applications-foreground-available-metrics>` Table below
|
features The different measures that can be retrieved from the dataset. See :ref:`Available Applications Foreground Features <applications-foreground-available-features>` Table below
|
||||||
==================== ===================
|
==================== ===================
|
||||||
|
|
||||||
.. _applications-foreground-available-metrics:
|
.. _applications-foreground-available-features:
|
||||||
|
|
||||||
**Available Applications Foreground Metrics**
|
**Available Applications Foreground Features**
|
||||||
|
|
||||||
The following table shows a list of the available metrics for the Applications Foreground dataset
|
The following table shows a list of the available features for the Applications Foreground dataset
|
||||||
|
|
||||||
================== ========= =============
|
================== ========= =============
|
||||||
Name Units Description
|
Name Units Description
|
||||||
|
@ -1162,8 +1162,8 @@ stddurationactivebout minutes Std duration active bout: The standard
|
||||||
.. _`Application Genres Config`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L54
|
.. _`Application Genres Config`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L54
|
||||||
.. _application_genres: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/preprocessing.snakefile#L81
|
.. _application_genres: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/preprocessing.snakefile#L81
|
||||||
.. _application_genres.R: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/src/data/application_genres.R
|
.. _application_genres.R: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/src/data/application_genres.R
|
||||||
.. _applications_foreground_metrics: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L135
|
.. _applications_foreground_features: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L135
|
||||||
.. _applications_foreground_metrics.py: https://github.com/carissalow/rapids/blob/master/src/features/accelerometer_metrics.py
|
.. _applications_foreground_features.py: https://github.com/carissalow/rapids/blob/master/src/features/accelerometer_features.py
|
||||||
.. _`Battery Config Code`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L84
|
.. _`Battery Config Code`: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/config.yaml#L84
|
||||||
.. _battery_deltas: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L25
|
.. _battery_deltas: https://github.com/carissalow/rapids/blob/765bb462636d5029a05f54d4c558487e3786b90b/rules/features.snakefile#L25
|
||||||
.. _battery_deltas.R: https://github.com/carissalow/rapids/blob/master/src/features/battery_deltas.R
|
.. _battery_deltas.R: https://github.com/carissalow/rapids/blob/master/src/features/battery_deltas.R
|
||||||
|
|
|
@ -133,7 +133,7 @@ rule accelerometer_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/accelerometer_features.py"
|
"../src/features/accelerometer_features.py"
|
||||||
|
|
||||||
rule applications_foreground_metrics:
|
rule applications_foreground_features:
|
||||||
input:
|
input:
|
||||||
"data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv",
|
"data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv",
|
||||||
params:
|
params:
|
||||||
|
@ -143,11 +143,11 @@ rule applications_foreground_metrics:
|
||||||
single_apps = config["APPLICATIONS_FOREGROUND"]["SINGLE_APPS"],
|
single_apps = config["APPLICATIONS_FOREGROUND"]["SINGLE_APPS"],
|
||||||
excluded_categories = config["APPLICATIONS_FOREGROUND"]["EXCLUDED_CATEGORIES"],
|
excluded_categories = config["APPLICATIONS_FOREGROUND"]["EXCLUDED_CATEGORIES"],
|
||||||
excluded_apps = config["APPLICATIONS_FOREGROUND"]["EXCLUDED_APPS"],
|
excluded_apps = config["APPLICATIONS_FOREGROUND"]["EXCLUDED_APPS"],
|
||||||
metrics = config["APPLICATIONS_FOREGROUND"]["METRICS"],
|
features = config["APPLICATIONS_FOREGROUND"]["FEATURES"],
|
||||||
output:
|
output:
|
||||||
"data/processed/{pid}/applications_foreground_{day_segment}.csv"
|
"data/processed/{pid}/applications_foreground_{day_segment}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/applications_foreground_metrics.py"
|
"../src/features/applications_foreground_features.py"
|
||||||
|
|
||||||
rule fitbit_heartrate_metrics:
|
rule fitbit_heartrate_metrics:
|
||||||
input:
|
input:
|
||||||
|
|
|
@ -4,17 +4,17 @@ import itertools
|
||||||
from scipy.stats import entropy
|
from scipy.stats import entropy
|
||||||
|
|
||||||
|
|
||||||
def compute_metrics(filtered_data, apps_type, metrics, apps_features):
|
def compute_features(filtered_data, apps_type, requested_features, apps_features):
|
||||||
if "timeoffirstuse" in metrics:
|
if "timeoffirstuse" in requested_features:
|
||||||
time_first_event = filtered_data.sort_values(by="timestamp", ascending=True).drop_duplicates(subset="local_date", keep="first").set_index("local_date")
|
time_first_event = filtered_data.sort_values(by="timestamp", ascending=True).drop_duplicates(subset="local_date", keep="first").set_index("local_date")
|
||||||
apps_features["apps_" + day_segment + "_timeoffirstuse" + apps_type] = time_first_event["local_hour"] * 60 + time_first_event["local_minute"]
|
apps_features["apps_" + day_segment + "_timeoffirstuse" + apps_type] = time_first_event["local_hour"] * 60 + time_first_event["local_minute"]
|
||||||
if "timeoflastuse" in metrics:
|
if "timeoflastuse" in requested_features:
|
||||||
time_last_event = filtered_data.sort_values(by="timestamp", ascending=False).drop_duplicates(subset="local_date", keep="first").set_index("local_date")
|
time_last_event = filtered_data.sort_values(by="timestamp", ascending=False).drop_duplicates(subset="local_date", keep="first").set_index("local_date")
|
||||||
apps_features["apps_" + day_segment + "_timeoflastuse" + apps_type] = time_last_event["local_hour"] * 60 + time_last_event["local_minute"]
|
apps_features["apps_" + day_segment + "_timeoflastuse" + apps_type] = time_last_event["local_hour"] * 60 + time_last_event["local_minute"]
|
||||||
if "frequencyentropy" in metrics:
|
if "frequencyentropy" in requested_features:
|
||||||
apps_with_count = filtered_data.groupby(["local_date","application_name"]).count().sort_values(by="timestamp", ascending=False).reset_index()
|
apps_with_count = filtered_data.groupby(["local_date","application_name"]).count().sort_values(by="timestamp", ascending=False).reset_index()
|
||||||
apps_features["apps_" + day_segment + "_frequencyentropy" + apps_type] = apps_with_count.groupby("local_date")["timestamp"].agg(entropy)
|
apps_features["apps_" + day_segment + "_frequencyentropy" + apps_type] = apps_with_count.groupby("local_date")["timestamp"].agg(entropy)
|
||||||
if "count" in metrics:
|
if "count" in requested_features:
|
||||||
apps_features["apps_" + day_segment + "_count" + apps_type] = filtered_data.groupby(["local_date"]).count()["timestamp"]
|
apps_features["apps_" + day_segment + "_count" + apps_type] = filtered_data.groupby(["local_date"]).count()["timestamp"]
|
||||||
apps_features.fillna(value={"apps_" + day_segment + "_count" + apps_type: 0}, inplace=True)
|
apps_features.fillna(value={"apps_" + day_segment + "_count" + apps_type: 0}, inplace=True)
|
||||||
return apps_features
|
return apps_features
|
||||||
|
@ -27,7 +27,7 @@ multiple_categories_with_genres = snakemake.params["multiple_categories"]
|
||||||
single_apps = snakemake.params["single_apps"]
|
single_apps = snakemake.params["single_apps"]
|
||||||
excluded_categories = snakemake.params["excluded_categories"]
|
excluded_categories = snakemake.params["excluded_categories"]
|
||||||
excluded_apps = snakemake.params["excluded_apps"]
|
excluded_apps = snakemake.params["excluded_apps"]
|
||||||
metrics = snakemake.params["metrics"]
|
features = snakemake.params["features"]
|
||||||
|
|
||||||
single_categories = list(set(single_categories) - set(excluded_categories))
|
single_categories = list(set(single_categories) - set(excluded_categories))
|
||||||
multiple_categories = list(multiple_categories_with_genres.keys() - set(excluded_categories))
|
multiple_categories = list(multiple_categories_with_genres.keys() - set(excluded_categories))
|
||||||
|
@ -43,7 +43,7 @@ apps_data = apps_data[~apps_data["application_name"].isin(excluded_apps)]
|
||||||
# deep copy the apps_data for the top1global computation
|
# deep copy the apps_data for the top1global computation
|
||||||
apps_data_global = apps_data.copy()
|
apps_data_global = apps_data.copy()
|
||||||
|
|
||||||
apps_features = pd.DataFrame(columns=["local_date"] + ["apps_" + day_segment + "_" + x for x in ["".join(metric) for metric in itertools.product(metrics, single_categories + multiple_categories + apps)]])
|
apps_features = pd.DataFrame(columns=["local_date"] + ["apps_" + day_segment + "_" + x for x in ["".join(feature) for feature in itertools.product(features, single_categories + multiple_categories + apps)]])
|
||||||
if not apps_data.empty:
|
if not apps_data.empty:
|
||||||
apps_features = pd.DataFrame()
|
apps_features = pd.DataFrame()
|
||||||
if day_segment != "daily":
|
if day_segment != "daily":
|
||||||
|
@ -52,14 +52,14 @@ if not apps_data.empty:
|
||||||
# single category
|
# single category
|
||||||
for sc in single_categories:
|
for sc in single_categories:
|
||||||
if sc == "all":
|
if sc == "all":
|
||||||
apps_features = compute_metrics(apps_data, "all", metrics, apps_features)
|
apps_features = compute_features(apps_data, "all", features, apps_features)
|
||||||
else:
|
else:
|
||||||
filtered_data = apps_data[apps_data["genre"].isin([sc])]
|
filtered_data = apps_data[apps_data["genre"].isin([sc])]
|
||||||
apps_features = compute_metrics(filtered_data, sc, metrics, apps_features)
|
apps_features = compute_features(filtered_data, sc, features, apps_features)
|
||||||
# multiple category
|
# multiple category
|
||||||
for mc in multiple_categories:
|
for mc in multiple_categories:
|
||||||
filtered_data = apps_data[apps_data["genre"].isin(multiple_categories_with_genres[mc])]
|
filtered_data = apps_data[apps_data["genre"].isin(multiple_categories_with_genres[mc])]
|
||||||
apps_features = compute_metrics(filtered_data, mc, metrics, apps_features)
|
apps_features = compute_features(filtered_data, mc, features, apps_features)
|
||||||
# single apps
|
# single apps
|
||||||
for app in apps:
|
for app in apps:
|
||||||
col_name = app
|
col_name = app
|
||||||
|
@ -70,7 +70,7 @@ if not apps_data.empty:
|
||||||
col_name = "top1global"
|
col_name = "top1global"
|
||||||
|
|
||||||
filtered_data = apps_data[apps_data["package_name"].isin([app])]
|
filtered_data = apps_data[apps_data["package_name"].isin([app])]
|
||||||
apps_features = compute_metrics(filtered_data, col_name, metrics, apps_features)
|
apps_features = compute_features(filtered_data, col_name, features, apps_features)
|
||||||
|
|
||||||
apps_features = apps_features.reset_index()
|
apps_features = apps_features.reset_index()
|
||||||
|
|
Loading…
Reference in New Issue