From 0840235280bd87d0cad8c6b182992ec41b843650 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Fri, 7 Feb 2020 11:52:55 -0500 Subject: [PATCH] Add applications_foreground features --- Snakefile | 3 + config.yaml | 11 +++ rules/features.snakefile | 16 ++++ .../applications_foreground_metrics.py | 76 +++++++++++++++++++ 4 files changed, 106 insertions(+) create mode 100644 src/features/applications_foreground_metrics.py diff --git a/Snakefile b/Snakefile index 3e1647e9..85a1a86e 100644 --- a/Snakefile +++ b/Snakefile @@ -41,6 +41,9 @@ rule all: expand("data/processed/{pid}/accelerometer_{day_segment}.csv", pid = config["PIDS"], day_segment = config["ACCELEROMETER"]["DAY_SEGMENTS"]), + expand("data/processed/{pid}/applications_foreground_{day_segment}.csv", + pid = config["PIDS"], + day_segment = config["APPLICATIONS_FOREGROUND"]["DAY_SEGMENTS"]), expand("data/raw/{pid}/fitbit_{fitbit_sensor}_with_datetime.csv", pid=config["PIDS"], fitbit_sensor=config["FITBIT_SENSORS"]), diff --git a/config.yaml b/config.yaml index a30b8c4f..f4d07658 100644 --- a/config.yaml +++ b/config.yaml @@ -91,6 +91,17 @@ ACCELEROMETER: DAY_SEGMENTS: *day_segments METRICS: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"] +APPLICATIONS_FOREGROUND: + DAY_SEGMENTS: *day_segments + SINGLE_CATEGORIES: ["all", "video"] + MULTIPLE_CATEGORIES: + social: ["socialnetworks", "socialmediatools"] + entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"] + SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps + EXCLUDED_CATEGORIES: ["system_apps", "video"] + EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"] + METRICS: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"] + HEARTRATE: DAY_SEGMENTS: *day_segments METRICS: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"] diff --git a/rules/features.snakefile b/rules/features.snakefile index 1681e915..c7218e85 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -131,6 +131,22 @@ rule accelerometer_metrics: script: "../src/features/accelerometer_metrics.py" +rule applications_foreground_metrics: + input: + "data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv", + params: + day_segment = "{day_segment}", + single_categories = config["APPLICATIONS_FOREGROUND"]["SINGLE_CATEGORIES"], + multiple_categories = config["APPLICATIONS_FOREGROUND"]["MULTIPLE_CATEGORIES"], + single_apps = config["APPLICATIONS_FOREGROUND"]["SINGLE_APPS"], + excluded_categories = config["APPLICATIONS_FOREGROUND"]["EXCLUDED_CATEGORIES"], + excluded_apps = config["APPLICATIONS_FOREGROUND"]["EXCLUDED_APPS"], + metrics = config["APPLICATIONS_FOREGROUND"]["METRICS"], + output: + "data/processed/{pid}/applications_foreground_{day_segment}.csv" + script: + "../src/features/applications_foreground_metrics.py" + rule fitbit_heartrate_metrics: input: "data/raw/{pid}/fitbit_heartrate_with_datetime.csv", diff --git a/src/features/applications_foreground_metrics.py b/src/features/applications_foreground_metrics.py new file mode 100644 index 00000000..f1ef68b7 --- /dev/null +++ b/src/features/applications_foreground_metrics.py @@ -0,0 +1,76 @@ +import pandas as pd +import numpy as np +import itertools +from scipy.stats import entropy + + +def compute_metrics(filtered_data, apps_type, metrics, apps_features): + if "count" in metrics: + apps_features["apps_" + day_segment + "_count" + apps_type] = filtered_data.groupby(["local_date"]).count()["timestamp"] + if "timeoffirstuse" in metrics: + time_first_event = filtered_data.sort_values(by="timestamp", ascending=True).drop_duplicates(subset="local_date", keep="first").set_index("local_date") + apps_features["apps_" + day_segment + "_timeoffirstuse" + apps_type] = time_first_event["local_hour"] * 60 + time_first_event["local_minute"] + if "timeoflastuse" in metrics: + time_last_event = filtered_data.sort_values(by="timestamp", ascending=False).drop_duplicates(subset="local_date", keep="first").set_index("local_date") + apps_features["apps_" + day_segment + "_timeoflastuse" + apps_type] = time_last_event["local_hour"] * 60 + time_last_event["local_minute"] + if "frequencyentropy" in metrics: + apps_with_count = filtered_data.groupby(["local_date","application_name"]).count().sort_values(by="timestamp", ascending=False).reset_index() + apps_features["apps_" + day_segment + "_frequencyentropy" + apps_type] = apps_with_count.groupby("local_date")["timestamp"].agg(entropy) + return apps_features + + +apps_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"]) +day_segment = snakemake.params["day_segment"] +single_categories = snakemake.params["single_categories"] +multiple_categories_with_genres = snakemake.params["multiple_categories"] +single_apps = snakemake.params["single_apps"] +excluded_categories = snakemake.params["excluded_categories"] +excluded_apps = snakemake.params["excluded_apps"] +metrics = snakemake.params["metrics"] + +single_categories = list(set(single_categories) - set(excluded_categories)) +multiple_categories = list(multiple_categories_with_genres.keys() - set(excluded_categories)) +apps = list(set(single_apps) - set(excluded_apps)) + +# exclude categories in the excluded_categories list +if "system_apps" in excluded_categories: + apps_data = apps_data[apps_data["is_system_app"] == 0] +apps_data = apps_data[~apps_data["genre"].isin(excluded_categories)] +# exclude apps in the excluded_apps list +apps_data = apps_data[~apps_data["application_name"].isin(excluded_apps)] + +# deep copy the apps_data for the top1global computation +apps_data_global = apps_data.copy() + +apps_features = pd.DataFrame(columns=["local_date"] + ["apps_" + day_segment + "_" + x for x in ["".join(metric) for metric in itertools.product(metrics, single_categories + multiple_categories + apps)]]) +if not apps_data.empty: + apps_features = pd.DataFrame() + if day_segment != "daily": + apps_data =apps_data[apps_data["local_day_segment"] == day_segment] + + # single category + for sc in single_categories: + if sc == "all": + apps_features = compute_metrics(apps_data, "all", metrics, apps_features) + else: + filtered_data = apps_data[apps_data["genre"].isin([sc])] + apps_features = compute_metrics(filtered_data, sc, metrics, apps_features) + # multiple category + for mc in multiple_categories: + filtered_data = apps_data[apps_data["genre"].isin(multiple_categories_with_genres[mc])] + apps_features = compute_metrics(filtered_data, mc, metrics, apps_features) + # single apps + for app in apps: + col_name = app + if app == "top1global": + # get the most used app + apps_with_count = apps_data_global.groupby(["local_date","package_name"]).count().sort_values(by="timestamp", ascending=False).reset_index() + app = apps_with_count.iloc[0]["package_name"] + col_name = "top1global" + + filtered_data = apps_data[apps_data["package_name"].isin([app])] + apps_features = compute_metrics(filtered_data, col_name, metrics, apps_features) + + apps_features = apps_features.reset_index() + +apps_features.to_csv(snakemake.output[0], index=False)