Add own categories to app foreground features

pull/149/head
JulioV 2021-06-22 16:45:25 -04:00 committed by Weiyu
parent 5892b6d838
commit e74c745f86
4 changed files with 32 additions and 11 deletions

View File

@ -113,6 +113,9 @@ PHONE_APPLICATIONS_FOREGROUND:
COMPUTE: False COMPUTE: False
INCLUDE_EPISODE_FEATURES: False INCLUDE_EPISODE_FEATURES: False
SINGLE_CATEGORIES: ["all", "email"] SINGLE_CATEGORIES: ["all", "email"]
OWN_CATEGORIES:
social_media: ['com.google.android.youtube','com.snapchat.android','com.instagram.android','com.zhiliaoapp.musically','com.facebook.katana']
dating: ['com.tinder','com.relance.happycouple','com.kiwi.joyride']
MULTIPLE_CATEGORIES: MULTIPLE_CATEGORIES:
social: ["socialnetworks", "socialmediatools"] social: ["socialnetworks", "socialmediatools"]
entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"] entertainment: ["entertainment", "gamingknowledge", "gamingcasual", "gamingadventure", "gamingstrategy", "gamingtoolscommunity", "gamingroleplaying", "gamingaction", "gaminglogic", "gamingsports", "gamingsimulation"]

View File

@ -36,6 +36,7 @@ Parameters description for `[PHONE_APPLICATIONS_FOREGROUND][PROVIDERS][RAPIDS]`:
|`[INCLUDE_EPISODE_FEATURES]`| Set to `True` to extract application episode features as well from the `RAPIDS` provider| |`[INCLUDE_EPISODE_FEATURES]`| Set to `True` to extract application episode features as well from the `RAPIDS` provider|
|`[FEATURES]` | Features to be computed, see table below |`[FEATURES]` | Features to be computed, see table below
|`[SINGLE_CATEGORIES]` | An array of app categories to be *included* in the feature extraction computation. The special keyword `all` represents a category with all the apps from each participant. By default we use the category catalogue pointed by `[APPLICATION_CATEGORIES][CATALOGUE_FILE]` (see the Sensor parameters description table above) |`[SINGLE_CATEGORIES]` | An array of app categories to be *included* in the feature extraction computation. The special keyword `all` represents a category with all the apps from each participant. By default we use the category catalogue pointed by `[APPLICATION_CATEGORIES][CATALOGUE_FILE]` (see the Sensor parameters description table above)
|`[OWN_CATEGORIES]` | An array of collections representing your own app categories. They key of each element is the name of the in-house category and the value is an array of the package names (apps) included in that category.
|`[MULTIPLE_CATEGORIES]` | An array of collections representing meta-categories (a group of categories). They key of each element is the name of the `meta-category` and the value is an array of member app categories. By default we use the category catalogue pointed by `[APPLICATION_CATEGORIES][CATALOGUE_FILE]` (see the Sensor parameters description table above) |`[MULTIPLE_CATEGORIES]` | An array of collections representing meta-categories (a group of categories). They key of each element is the name of the `meta-category` and the value is an array of member app categories. By default we use the category catalogue pointed by `[APPLICATION_CATEGORIES][CATALOGUE_FILE]` (see the Sensor parameters description table above)
|`[SINGLE_APPS]` | An array of apps to be *included* in the feature extraction computation. Use their package name (e.g. `com.google.android.youtube`) or the reserved keyword `top1global` (the most used app by a participant over the whole monitoring study) |`[SINGLE_APPS]` | An array of apps to be *included* in the feature extraction computation. Use their package name (e.g. `com.google.android.youtube`) or the reserved keyword `top1global` (the most used app by a participant over the whole monitoring study)
|`[EXCLUDED_CATEGORIES]` | An array of app categories to be *excluded* from the feature extraction computation. By default we use the category catalogue pointed by `[APPLICATION_CATEGORIES][CATALOGUE_FILE]` (see the Sensor parameters description table above) |`[EXCLUDED_CATEGORIES]` | An array of app categories to be *excluded* from the feature extraction computation. By default we use the category catalogue pointed by `[APPLICATION_CATEGORIES][CATALOGUE_FILE]` (see the Sensor parameters description table above)

View File

@ -55,20 +55,26 @@ def compute_features(filtered_data, apps_type, requested_features, apps_features
apps_features["sumduration" + apps_type] = np.nan apps_features["sumduration" + apps_type] = np.nan
else: else:
apps_features["sumduration" + apps_type] = grouped_data apps_features["sumduration" + apps_type] = grouped_data
apps_features.index.names = ['local_segment']
return apps_features return apps_features
def process_app_features(data, requested_features, time_segment, provider, filter_data_by_segment): def process_app_features(data, requested_features, time_segment, provider, filter_data_by_segment):
excluded_categories = provider["EXCLUDED_CATEGORIES"] excluded_categories = provider["EXCLUDED_CATEGORIES"]
excluded_apps = provider["EXCLUDED_APPS"] excluded_apps = provider["EXCLUDED_APPS"]
multiple_categories_with_genres = provider["MULTIPLE_CATEGORIES"]
single_categories = provider["SINGLE_CATEGORIES"] single_categories = provider["SINGLE_CATEGORIES"]
multiple_categories = provider["MULTIPLE_CATEGORIES"] multiple_categories = {}
if isinstance(provider["MULTIPLE_CATEGORIES"], dict):
for mcategory_name, mcategory_content in provider["MULTIPLE_CATEGORIES"].items():
if len(mcategory_content) > 0 and mcategory_name not in excluded_categories:
multiple_categories[mcategory_name] = mcategory_content
own_categories = {}
if isinstance(provider["OWN_CATEGORIES"], dict):
for owncategory_name, owncategory_content in provider["OWN_CATEGORIES"].items():
if len(owncategory_content) > 0 and owncategory_name not in excluded_categories:
own_categories[owncategory_name] = owncategory_content
single_apps = provider["SINGLE_APPS"] single_apps = provider["SINGLE_APPS"]
single_categories = list(set(single_categories) - set(excluded_categories)) single_categories = list(set(single_categories) - set(excluded_categories))
multiple_categories = list(multiple_categories_with_genres.keys() - set(excluded_categories))
single_apps = list(set(single_apps) - set(excluded_apps)) single_apps = list(set(single_apps) - set(excluded_apps))
# exclude categories in the excluded_categories list # exclude categories in the excluded_categories list
@ -78,7 +84,7 @@ def process_app_features(data, requested_features, time_segment, provider, filte
# exclude apps in the excluded_apps list # exclude apps in the excluded_apps list
data = data[~data["package_name"].isin(excluded_apps)] data = data[~data["package_name"].isin(excluded_apps)]
features = pd.DataFrame(columns=["local_segment"] + ["".join(feature) for feature in itertools.product(requested_features, single_categories + multiple_categories + single_apps)]) features = pd.DataFrame(columns=["local_segment"] + ["".join(feature) for feature in itertools.product(requested_features, single_categories + list(own_categories.keys()) + list(multiple_categories.keys()) + single_apps)])
if not data.empty: if not data.empty:
# deep copy the data for the top1global computation # deep copy the data for the top1global computation
data_global = data.copy() data_global = data.copy()
@ -95,10 +101,14 @@ def process_app_features(data, requested_features, time_segment, provider, filte
else: else:
filtered_data = data[data["genre"].isin([sc])] filtered_data = data[data["genre"].isin([sc])]
features = compute_features(filtered_data, sc, requested_features, features, time_segment) features = compute_features(filtered_data, sc, requested_features, features, time_segment)
# multiple category # own categories
for mc in multiple_categories: for owncategory_name, owncategory_content in own_categories.items():
filtered_data = data[data["genre"].isin(multiple_categories_with_genres[mc])] filtered_data = data[data["package_name"].isin(owncategory_content)]
features = compute_features(filtered_data, mc, requested_features, features, time_segment) features = compute_features(filtered_data, owncategory_name, requested_features, features, time_segment)
# multiple categories
for mcategory_name, mcategory_content in multiple_categories.items():
filtered_data = data[data["genre"].isin(mcategory_content)]
features = compute_features(filtered_data, mcategory_name, requested_features, features, time_segment)
# single apps # single apps
for app in single_apps: for app in single_apps:
col_name = app col_name = app

View File

@ -346,8 +346,15 @@ properties:
uniqueItems: True uniqueItems: True
items: items:
type: string type: string
OWN_CATEGORIES:
type: ["null", object]
additionalProperties:
type: array
uniqueItems: True
items:
type: string
MULTIPLE_CATEGORIES: MULTIPLE_CATEGORIES:
type: object type: ["null", object]
additionalProperties: additionalProperties:
type: array type: array
uniqueItems: True uniqueItems: True