diff --git a/exploration/expl_app_categories.py b/exploration/expl_app_categories.py index c5cf744..1163cf4 100644 --- a/exploration/expl_app_categories.py +++ b/exploration/expl_app_categories.py @@ -33,7 +33,7 @@ from setup import db_engine, session # %% query_app_categories = session.query(AppCategories) with db_engine.connect() as connection: - df_app_categories = pd.read_sql(query_app_categories.statement, connection) + df_app_categories = pd.read_sql(query_app_categories.statement, connection) # %% df_app_categories.head() @@ -42,7 +42,9 @@ df_app_categories.head() df_app_categories["play_store_genre"].value_counts() # %% -df_category_not_found = df_app_categories[df_app_categories["play_store_genre"] == "not_found"] +df_category_not_found = df_app_categories[ + df_app_categories["play_store_genre"] == "not_found" +] # %% df_category_not_found["play_store_response"].value_counts() @@ -51,13 +53,24 @@ df_category_not_found["play_store_response"].value_counts() df_category_not_found["package_name"].value_counts() # %% -manufacturers = ["samsung","oneplus","huawei","xiaomi","lge","motorola","miui","lenovo","oppo","mediatek"] -custom_rom = ["coloros","lineageos","myos","cyanogenmod", "foundation.e"] +manufacturers = [ + "samsung", + "oneplus", + "huawei", + "xiaomi", + "lge", + "motorola", + "miui", + "lenovo", + "oppo", + "mediatek", +] +custom_rom = ["coloros", "lineageos", "myos", "cyanogenmod", "foundation.e"] other = ["android", "wssyncmldm"] -rows_os_manufacturer = df_category_not_found["package_name"].str.contains("|".join(manufacturers + custom_rom + other), case=False) +rows_os_manufacturer = df_category_not_found["package_name"].str.contains( + "|".join(manufacturers + custom_rom + other), case=False +) # %% -with pd.option_context( - "display.max_rows", None, "display.max_columns", None -): +with pd.option_context("display.max_rows", None, "display.max_columns", None): display(df_category_not_found.loc[~rows_os_manufacturer]) diff --git a/features/proximity.py b/features/proximity.py index c48621c..f14589b 100644 --- a/features/proximity.py +++ b/features/proximity.py @@ -28,3 +28,28 @@ def get_proximity_data(usernames: Collection) -> pd.DataFrame: with db_engine.connect() as connection: df_proximity = pd.read_sql(query_proximity.statement, connection) return df_proximity + + +def recode_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame: + """ + This function recodes proximity from a double to a boolean value. + Different proximity sensors report different values, + but in our data only several distinct values have ever been found. + These are therefore converted into "near" and "far" binary values. + See expl_proximity.ipynb for additional info. + + Parameters + ---------- + df_proximity: pd.DataFrame + A dataframe of proximity data. + + Returns + ------- + df_proximity: pd.DataFrame + The same dataframe with an additional column bool_prox_near, + indicating whether "near" proximity was reported. + False values correspond to "far" reported by this sensor. + + """ + df_proximity = df_proximity.assign(bool_prox_near=lambda x: x.double_proximity == 0) + return df_proximity