rapids/src/features/applications_foreground_fea...

37 lines
2.0 KiB
Python

import pandas as pd
from applications_foreground.applications_foreground_base import base_applications_foreground_features
apps_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"], encoding="ISO-8859-1")
day_segment = snakemake.params["day_segment"]
single_categories = snakemake.params["single_categories"]
multiple_categories_with_genres = snakemake.params["multiple_categories"]
single_apps = snakemake.params["single_apps"]
excluded_categories = snakemake.params["excluded_categories"]
excluded_apps = snakemake.params["excluded_apps"]
requested_features = snakemake.params["features"]
apps_features = pd.DataFrame(columns=["local_date"])
single_categories = list(set(single_categories) - set(excluded_categories))
multiple_categories = list(multiple_categories_with_genres.keys() - set(excluded_categories))
apps = list(set(single_apps) - set(excluded_apps))
type_count = len(single_categories) + len(multiple_categories) + len(apps)
params = {}
params["multiple_categories_with_genres"] = multiple_categories_with_genres
params["single_categories"] = single_categories
params["multiple_categories"] = multiple_categories
params["apps"] = apps
# exclude categories in the excluded_categories list
if "system_apps" in excluded_categories:
apps_data = apps_data[apps_data["is_system_app"] == 0]
apps_data = apps_data[~apps_data["genre"].isin(excluded_categories)]
# exclude apps in the excluded_apps list
apps_data = apps_data[~apps_data["package_name"].isin(excluded_apps)]
apps_features = apps_features.merge(base_applications_foreground_features(apps_data, day_segment, requested_features, params), on="local_date", how="outer")
assert len(requested_features) * type_count + 1 == apps_features.shape[1], "The number of features in the output dataframe (=" + str(apps_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your application foreground feature extraction functions"
apps_features.to_csv(snakemake.output[0], index=False)