Add applications_foreground features
parent
770764ec8a
commit
0840235280
|
@ -41,6 +41,9 @@ rule all:
|
||||||
expand("data/processed/{pid}/accelerometer_{day_segment}.csv",
|
expand("data/processed/{pid}/accelerometer_{day_segment}.csv",
|
||||||
pid = config["PIDS"],
|
pid = config["PIDS"],
|
||||||
day_segment = config["ACCELEROMETER"]["DAY_SEGMENTS"]),
|
day_segment = config["ACCELEROMETER"]["DAY_SEGMENTS"]),
|
||||||
|
expand("data/processed/{pid}/applications_foreground_{day_segment}.csv",
|
||||||
|
pid = config["PIDS"],
|
||||||
|
day_segment = config["APPLICATIONS_FOREGROUND"]["DAY_SEGMENTS"]),
|
||||||
expand("data/raw/{pid}/fitbit_{fitbit_sensor}_with_datetime.csv",
|
expand("data/raw/{pid}/fitbit_{fitbit_sensor}_with_datetime.csv",
|
||||||
pid=config["PIDS"],
|
pid=config["PIDS"],
|
||||||
fitbit_sensor=config["FITBIT_SENSORS"]),
|
fitbit_sensor=config["FITBIT_SENSORS"]),
|
||||||
|
|
11
config.yaml
11
config.yaml
|
@ -91,6 +91,17 @@ ACCELEROMETER:
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
METRICS: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"]
|
METRICS: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude", "ratioexertionalactivityepisodes", "sumexertionalactivityepisodes", "longestexertionalactivityepisode", "longestnonexertionalactivityepisode", "countexertionalactivityepisodes", "countnonexertionalactivityepisodes"]
|
||||||
|
|
||||||
|
APPLICATIONS_FOREGROUND:
|
||||||
|
DAY_SEGMENTS: *day_segments
|
||||||
|
SINGLE_CATEGORIES: ["all", "video"]
|
||||||
|
MULTIPLE_CATEGORIES:
|
||||||
|
social: ["socialnetworks", "socialmediatools"]
|
||||||
|
entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"]
|
||||||
|
SINGLE_APPS: ["top1global", "com.facebook.moments", "com.google.android.youtube", "com.twitter.android"] # There's no entropy for single apps
|
||||||
|
EXCLUDED_CATEGORIES: ["system_apps", "video"]
|
||||||
|
EXCLUDED_APPS: ["com.fitbit.FitbitMobile", "com.aware.plugin.upmc.cancer"]
|
||||||
|
METRICS: ["count", "timeoffirstuse", "timeoflastuse", "frequencyentropy"]
|
||||||
|
|
||||||
HEARTRATE:
|
HEARTRATE:
|
||||||
DAY_SEGMENTS: *day_segments
|
DAY_SEGMENTS: *day_segments
|
||||||
METRICS: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]
|
METRICS: ["maxhr", "minhr", "avghr", "medianhr", "modehr", "stdhr", "diffmaxmodehr", "diffminmodehr", "entropyhr", "lengthoutofrange", "lengthfatburn", "lengthcardio", "lengthpeak"]
|
||||||
|
|
|
@ -131,6 +131,22 @@ rule accelerometer_metrics:
|
||||||
script:
|
script:
|
||||||
"../src/features/accelerometer_metrics.py"
|
"../src/features/accelerometer_metrics.py"
|
||||||
|
|
||||||
|
rule applications_foreground_metrics:
|
||||||
|
input:
|
||||||
|
"data/interim/{pid}/applications_foreground_with_datetime_with_genre.csv",
|
||||||
|
params:
|
||||||
|
day_segment = "{day_segment}",
|
||||||
|
single_categories = config["APPLICATIONS_FOREGROUND"]["SINGLE_CATEGORIES"],
|
||||||
|
multiple_categories = config["APPLICATIONS_FOREGROUND"]["MULTIPLE_CATEGORIES"],
|
||||||
|
single_apps = config["APPLICATIONS_FOREGROUND"]["SINGLE_APPS"],
|
||||||
|
excluded_categories = config["APPLICATIONS_FOREGROUND"]["EXCLUDED_CATEGORIES"],
|
||||||
|
excluded_apps = config["APPLICATIONS_FOREGROUND"]["EXCLUDED_APPS"],
|
||||||
|
metrics = config["APPLICATIONS_FOREGROUND"]["METRICS"],
|
||||||
|
output:
|
||||||
|
"data/processed/{pid}/applications_foreground_{day_segment}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/applications_foreground_metrics.py"
|
||||||
|
|
||||||
rule fitbit_heartrate_metrics:
|
rule fitbit_heartrate_metrics:
|
||||||
input:
|
input:
|
||||||
"data/raw/{pid}/fitbit_heartrate_with_datetime.csv",
|
"data/raw/{pid}/fitbit_heartrate_with_datetime.csv",
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import itertools
|
||||||
|
from scipy.stats import entropy
|
||||||
|
|
||||||
|
|
||||||
|
def compute_metrics(filtered_data, apps_type, metrics, apps_features):
|
||||||
|
if "count" in metrics:
|
||||||
|
apps_features["apps_" + day_segment + "_count" + apps_type] = filtered_data.groupby(["local_date"]).count()["timestamp"]
|
||||||
|
if "timeoffirstuse" in metrics:
|
||||||
|
time_first_event = filtered_data.sort_values(by="timestamp", ascending=True).drop_duplicates(subset="local_date", keep="first").set_index("local_date")
|
||||||
|
apps_features["apps_" + day_segment + "_timeoffirstuse" + apps_type] = time_first_event["local_hour"] * 60 + time_first_event["local_minute"]
|
||||||
|
if "timeoflastuse" in metrics:
|
||||||
|
time_last_event = filtered_data.sort_values(by="timestamp", ascending=False).drop_duplicates(subset="local_date", keep="first").set_index("local_date")
|
||||||
|
apps_features["apps_" + day_segment + "_timeoflastuse" + apps_type] = time_last_event["local_hour"] * 60 + time_last_event["local_minute"]
|
||||||
|
if "frequencyentropy" in metrics:
|
||||||
|
apps_with_count = filtered_data.groupby(["local_date","application_name"]).count().sort_values(by="timestamp", ascending=False).reset_index()
|
||||||
|
apps_features["apps_" + day_segment + "_frequencyentropy" + apps_type] = apps_with_count.groupby("local_date")["timestamp"].agg(entropy)
|
||||||
|
return apps_features
|
||||||
|
|
||||||
|
|
||||||
|
apps_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"])
|
||||||
|
day_segment = snakemake.params["day_segment"]
|
||||||
|
single_categories = snakemake.params["single_categories"]
|
||||||
|
multiple_categories_with_genres = snakemake.params["multiple_categories"]
|
||||||
|
single_apps = snakemake.params["single_apps"]
|
||||||
|
excluded_categories = snakemake.params["excluded_categories"]
|
||||||
|
excluded_apps = snakemake.params["excluded_apps"]
|
||||||
|
metrics = snakemake.params["metrics"]
|
||||||
|
|
||||||
|
single_categories = list(set(single_categories) - set(excluded_categories))
|
||||||
|
multiple_categories = list(multiple_categories_with_genres.keys() - set(excluded_categories))
|
||||||
|
apps = list(set(single_apps) - set(excluded_apps))
|
||||||
|
|
||||||
|
# exclude categories in the excluded_categories list
|
||||||
|
if "system_apps" in excluded_categories:
|
||||||
|
apps_data = apps_data[apps_data["is_system_app"] == 0]
|
||||||
|
apps_data = apps_data[~apps_data["genre"].isin(excluded_categories)]
|
||||||
|
# exclude apps in the excluded_apps list
|
||||||
|
apps_data = apps_data[~apps_data["application_name"].isin(excluded_apps)]
|
||||||
|
|
||||||
|
# deep copy the apps_data for the top1global computation
|
||||||
|
apps_data_global = apps_data.copy()
|
||||||
|
|
||||||
|
apps_features = pd.DataFrame(columns=["local_date"] + ["apps_" + day_segment + "_" + x for x in ["".join(metric) for metric in itertools.product(metrics, single_categories + multiple_categories + apps)]])
|
||||||
|
if not apps_data.empty:
|
||||||
|
apps_features = pd.DataFrame()
|
||||||
|
if day_segment != "daily":
|
||||||
|
apps_data =apps_data[apps_data["local_day_segment"] == day_segment]
|
||||||
|
|
||||||
|
# single category
|
||||||
|
for sc in single_categories:
|
||||||
|
if sc == "all":
|
||||||
|
apps_features = compute_metrics(apps_data, "all", metrics, apps_features)
|
||||||
|
else:
|
||||||
|
filtered_data = apps_data[apps_data["genre"].isin([sc])]
|
||||||
|
apps_features = compute_metrics(filtered_data, sc, metrics, apps_features)
|
||||||
|
# multiple category
|
||||||
|
for mc in multiple_categories:
|
||||||
|
filtered_data = apps_data[apps_data["genre"].isin(multiple_categories_with_genres[mc])]
|
||||||
|
apps_features = compute_metrics(filtered_data, mc, metrics, apps_features)
|
||||||
|
# single apps
|
||||||
|
for app in apps:
|
||||||
|
col_name = app
|
||||||
|
if app == "top1global":
|
||||||
|
# get the most used app
|
||||||
|
apps_with_count = apps_data_global.groupby(["local_date","package_name"]).count().sort_values(by="timestamp", ascending=False).reset_index()
|
||||||
|
app = apps_with_count.iloc[0]["package_name"]
|
||||||
|
col_name = "top1global"
|
||||||
|
|
||||||
|
filtered_data = apps_data[apps_data["package_name"].isin([app])]
|
||||||
|
apps_features = compute_metrics(filtered_data, col_name, metrics, apps_features)
|
||||||
|
|
||||||
|
apps_features = apps_features.reset_index()
|
||||||
|
|
||||||
|
apps_features.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue