From a06ad0800f435d7e475c30d0e8740599b0826103 Mon Sep 17 00:00:00 2001 From: junos Date: Mon, 9 Aug 2021 16:02:23 +0200 Subject: [PATCH] Explore missing application categories. --- exploration/expl_app_categories.py | 63 ++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 exploration/expl_app_categories.py diff --git a/exploration/expl_app_categories.py b/exploration/expl_app_categories.py new file mode 100644 index 0000000..c5cf744 --- /dev/null +++ b/exploration/expl_app_categories.py @@ -0,0 +1,63 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.11.4 +# kernelspec: +# display_name: straw2analysis +# language: python +# name: straw2analysis +# --- + +# %% +# %matplotlib inline +import os +import sys + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns + +nb_dir = os.path.split(os.getcwd())[0] +if nb_dir not in sys.path: + sys.path.append(nb_dir) + +# %% +from config.models import AppCategories, Participant +from setup import db_engine, session + +# %% +query_app_categories = session.query(AppCategories) +with db_engine.connect() as connection: + df_app_categories = pd.read_sql(query_app_categories.statement, connection) + +# %% +df_app_categories.head() + +# %% +df_app_categories["play_store_genre"].value_counts() + +# %% +df_category_not_found = df_app_categories[df_app_categories["play_store_genre"] == "not_found"] + +# %% +df_category_not_found["play_store_response"].value_counts() + +# %% +df_category_not_found["package_name"].value_counts() + +# %% +manufacturers = ["samsung","oneplus","huawei","xiaomi","lge","motorola","miui","lenovo","oppo","mediatek"] +custom_rom = ["coloros","lineageos","myos","cyanogenmod", "foundation.e"] +other = ["android", "wssyncmldm"] +rows_os_manufacturer = df_category_not_found["package_name"].str.contains("|".join(manufacturers + custom_rom + other), case=False) + +# %% +with pd.option_context( + "display.max_rows", None, "display.max_columns", None +): + display(df_category_not_found.loc[~rows_os_manufacturer])