2021-08-09 16:02:23 +02:00
|
|
|
# ---
|
|
|
|
# jupyter:
|
|
|
|
# jupytext:
|
|
|
|
# formats: ipynb,py:percent
|
|
|
|
# text_representation:
|
|
|
|
# extension: .py
|
|
|
|
# format_name: percent
|
|
|
|
# format_version: '1.3'
|
|
|
|
# jupytext_version: 1.11.4
|
|
|
|
# kernelspec:
|
|
|
|
# display_name: straw2analysis
|
|
|
|
# language: python
|
|
|
|
# name: straw2analysis
|
|
|
|
# ---
|
|
|
|
|
|
|
|
# %%
|
|
|
|
# %matplotlib inline
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import pandas as pd
|
|
|
|
import seaborn as sns
|
|
|
|
|
|
|
|
nb_dir = os.path.split(os.getcwd())[0]
|
|
|
|
if nb_dir not in sys.path:
|
|
|
|
sys.path.append(nb_dir)
|
|
|
|
|
|
|
|
# %%
|
|
|
|
from config.models import AppCategories, Participant
|
|
|
|
from setup import db_engine, session
|
|
|
|
|
|
|
|
# %%
|
|
|
|
query_app_categories = session.query(AppCategories)
|
|
|
|
with db_engine.connect() as connection:
|
2021-08-11 15:04:27 +02:00
|
|
|
df_app_categories = pd.read_sql(query_app_categories.statement, connection)
|
2021-08-09 16:02:23 +02:00
|
|
|
|
|
|
|
# %%
|
|
|
|
df_app_categories.head()
|
|
|
|
|
|
|
|
# %%
|
|
|
|
df_app_categories["play_store_genre"].value_counts()
|
|
|
|
|
|
|
|
# %%
|
2021-08-11 15:04:27 +02:00
|
|
|
df_category_not_found = df_app_categories[
|
|
|
|
df_app_categories["play_store_genre"] == "not_found"
|
|
|
|
]
|
2021-08-09 16:02:23 +02:00
|
|
|
|
|
|
|
# %%
|
|
|
|
df_category_not_found["play_store_response"].value_counts()
|
|
|
|
|
|
|
|
# %%
|
|
|
|
df_category_not_found["package_name"].value_counts()
|
|
|
|
|
|
|
|
# %%
|
2021-08-11 15:04:27 +02:00
|
|
|
manufacturers = [
|
|
|
|
"samsung",
|
|
|
|
"oneplus",
|
|
|
|
"huawei",
|
|
|
|
"xiaomi",
|
|
|
|
"lge",
|
|
|
|
"motorola",
|
|
|
|
"miui",
|
|
|
|
"lenovo",
|
|
|
|
"oppo",
|
|
|
|
"mediatek",
|
|
|
|
]
|
|
|
|
custom_rom = ["coloros", "lineageos", "myos", "cyanogenmod", "foundation.e"]
|
2021-08-09 16:02:23 +02:00
|
|
|
other = ["android", "wssyncmldm"]
|
2021-08-11 15:04:27 +02:00
|
|
|
rows_os_manufacturer = df_category_not_found["package_name"].str.contains(
|
|
|
|
"|".join(manufacturers + custom_rom + other), case=False
|
|
|
|
)
|
2021-08-09 16:02:23 +02:00
|
|
|
|
|
|
|
# %%
|
2021-08-11 15:04:27 +02:00
|
|
|
with pd.option_context("display.max_rows", None, "display.max_columns", None):
|
2021-08-09 16:02:23 +02:00
|
|
|
display(df_category_not_found.loc[~rows_os_manufacturer])
|