From e74c745f86e99557debe03fb26d442affb464166 Mon Sep 17 00:00:00 2001 From: JulioV Date: Tue, 22 Jun 2021 16:45:25 -0400 Subject: [PATCH] Add own categories to app foreground features --- config.yaml | 3 ++ .../features/phone-applications-foreground.md | 1 + .../rapids/main.py | 30 ++++++++++++------- tools/config.schema.yaml | 9 +++++- 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/config.yaml b/config.yaml index 71a804c1..c1f13209 100644 --- a/config.yaml +++ b/config.yaml @@ -113,6 +113,9 @@ PHONE_APPLICATIONS_FOREGROUND: COMPUTE: False INCLUDE_EPISODE_FEATURES: False SINGLE_CATEGORIES: ["all", "email"] + OWN_CATEGORIES: + social_media: ['com.google.android.youtube','com.snapchat.android','com.instagram.android','com.zhiliaoapp.musically','com.facebook.katana'] + dating: ['com.tinder','com.relance.happycouple','com.kiwi.joyride'] MULTIPLE_CATEGORIES: social: ["socialnetworks", "socialmediatools"] entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"] diff --git a/docs/features/phone-applications-foreground.md b/docs/features/phone-applications-foreground.md index 0ed73029..92b3e605 100644 --- a/docs/features/phone-applications-foreground.md +++ b/docs/features/phone-applications-foreground.md @@ -36,6 +36,7 @@ Parameters description for `[PHONE_APPLICATIONS_FOREGROUND][PROVIDERS][RAPIDS]`: |`[INCLUDE_EPISODE_FEATURES]`| Set to `True` to extract application episode features as well from the `RAPIDS` provider| |`[FEATURES]` | Features to be computed, see table below |`[SINGLE_CATEGORIES]` | An array of app categories to be *included* in the feature extraction computation. The special keyword `all` represents a category with all the apps from each participant. By default we use the category catalogue pointed by `[APPLICATION_CATEGORIES][CATALOGUE_FILE]` (see the Sensor parameters description table above) +|`[OWN_CATEGORIES]` | An array of collections representing your own app categories. They key of each element is the name of the in-house category and the value is an array of the package names (apps) included in that category. |`[MULTIPLE_CATEGORIES]` | An array of collections representing meta-categories (a group of categories). They key of each element is the name of the `meta-category` and the value is an array of member app categories. By default we use the category catalogue pointed by `[APPLICATION_CATEGORIES][CATALOGUE_FILE]` (see the Sensor parameters description table above) |`[SINGLE_APPS]` | An array of apps to be *included* in the feature extraction computation. Use their package name (e.g. `com.google.android.youtube`) or the reserved keyword `top1global` (the most used app by a participant over the whole monitoring study) |`[EXCLUDED_CATEGORIES]` | An array of app categories to be *excluded* from the feature extraction computation. By default we use the category catalogue pointed by `[APPLICATION_CATEGORIES][CATALOGUE_FILE]` (see the Sensor parameters description table above) diff --git a/src/features/phone_applications_foreground/rapids/main.py b/src/features/phone_applications_foreground/rapids/main.py index a3ace2a2..c1d1f409 100644 --- a/src/features/phone_applications_foreground/rapids/main.py +++ b/src/features/phone_applications_foreground/rapids/main.py @@ -55,20 +55,26 @@ def compute_features(filtered_data, apps_type, requested_features, apps_features apps_features["sumduration" + apps_type] = np.nan else: apps_features["sumduration" + apps_type] = grouped_data - + apps_features.index.names = ['local_segment'] return apps_features def process_app_features(data, requested_features, time_segment, provider, filter_data_by_segment): excluded_categories = provider["EXCLUDED_CATEGORIES"] excluded_apps = provider["EXCLUDED_APPS"] - multiple_categories_with_genres = provider["MULTIPLE_CATEGORIES"] single_categories = provider["SINGLE_CATEGORIES"] - multiple_categories = provider["MULTIPLE_CATEGORIES"] + multiple_categories = {} + if isinstance(provider["MULTIPLE_CATEGORIES"], dict): + for mcategory_name, mcategory_content in provider["MULTIPLE_CATEGORIES"].items(): + if len(mcategory_content) > 0 and mcategory_name not in excluded_categories: + multiple_categories[mcategory_name] = mcategory_content + own_categories = {} + if isinstance(provider["OWN_CATEGORIES"], dict): + for owncategory_name, owncategory_content in provider["OWN_CATEGORIES"].items(): + if len(owncategory_content) > 0 and owncategory_name not in excluded_categories: + own_categories[owncategory_name] = owncategory_content single_apps = provider["SINGLE_APPS"] - single_categories = list(set(single_categories) - set(excluded_categories)) - multiple_categories = list(multiple_categories_with_genres.keys() - set(excluded_categories)) single_apps = list(set(single_apps) - set(excluded_apps)) # exclude categories in the excluded_categories list @@ -78,7 +84,7 @@ def process_app_features(data, requested_features, time_segment, provider, filte # exclude apps in the excluded_apps list data = data[~data["package_name"].isin(excluded_apps)] - features = pd.DataFrame(columns=["local_segment"] + ["".join(feature) for feature in itertools.product(requested_features, single_categories + multiple_categories + single_apps)]) + features = pd.DataFrame(columns=["local_segment"] + ["".join(feature) for feature in itertools.product(requested_features, single_categories + list(own_categories.keys()) + list(multiple_categories.keys()) + single_apps)]) if not data.empty: # deep copy the data for the top1global computation data_global = data.copy() @@ -95,10 +101,14 @@ def process_app_features(data, requested_features, time_segment, provider, filte else: filtered_data = data[data["genre"].isin([sc])] features = compute_features(filtered_data, sc, requested_features, features, time_segment) - # multiple category - for mc in multiple_categories: - filtered_data = data[data["genre"].isin(multiple_categories_with_genres[mc])] - features = compute_features(filtered_data, mc, requested_features, features, time_segment) + # own categories + for owncategory_name, owncategory_content in own_categories.items(): + filtered_data = data[data["package_name"].isin(owncategory_content)] + features = compute_features(filtered_data, owncategory_name, requested_features, features, time_segment) + # multiple categories + for mcategory_name, mcategory_content in multiple_categories.items(): + filtered_data = data[data["genre"].isin(mcategory_content)] + features = compute_features(filtered_data, mcategory_name, requested_features, features, time_segment) # single apps for app in single_apps: col_name = app diff --git a/tools/config.schema.yaml b/tools/config.schema.yaml index d1c30054..7fc372a0 100644 --- a/tools/config.schema.yaml +++ b/tools/config.schema.yaml @@ -346,8 +346,15 @@ properties: uniqueItems: True items: type: string + OWN_CATEGORIES: + type: ["null", object] + additionalProperties: + type: array + uniqueItems: True + items: + type: string MULTIPLE_CATEGORIES: - type: object + type: ["null", object] additionalProperties: type: array uniqueItems: True