stress_at_work_analysis/exploration/expl_app_categories.py

103 lines
2.5 KiB
Python

# ---
# jupyter:
# jupytext:
# formats: ipynb,py:percent
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.13.0
# kernelspec:
# display_name: straw2analysis
# language: python
# name: straw2analysis
# ---
# %%
# %matplotlib inline
import os
import sys
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
sys.path.append(nb_dir)
# %%
from config.models import AppCategories, Participant
from setup import db_engine, session
# %%
query_app_categories = session.query(AppCategories)
with db_engine.connect() as connection:
df_app_categories = pd.read_sql(query_app_categories.statement, connection)
# %%
df_app_categories.head()
# %%
df_app_categories["play_store_genre"].value_counts()
# %%
df_category_not_found = df_app_categories[
df_app_categories["play_store_genre"] == "not_found"
]
# %%
df_category_not_found["play_store_response"].value_counts()
# %%
df_category_not_found["package_name"].value_counts()
# %%
manufacturers = [
"samsung",
"oneplus",
"huawei",
"xiaomi",
"lge",
"motorola",
"miui",
"lenovo",
"oppo",
"mediatek",
]
custom_rom = ["coloros", "lineageos", "myos", "cyanogenmod", "foundation.e"]
other = ["android", "wssyncmldm"]
rows_os_manufacturer = df_category_not_found["package_name"].str.contains(
"|".join(manufacturers + custom_rom + other), case=False
)
# %%
with pd.option_context("display.max_rows", None, "display.max_columns", None):
display(df_category_not_found.loc[~rows_os_manufacturer])
# %% [markdown]
# # Export categories
# %% [markdown]
# Rename all of "not_found" to "system" or "other".
# %%
df_app_categories_to_export = df_app_categories.copy()
rows_os_manufacturer_full = (df_app_categories_to_export["package_name"].str.contains(
"|".join(manufacturers + custom_rom + other), case=False
)) & (df_app_categories_to_export["play_store_genre"] == "not_found")
df_app_categories_to_export.loc[rows_os_manufacturer_full, "play_store_genre"] = "System"
# %%
rows_not_found = (df_app_categories_to_export["play_store_genre"] == "not_found")
df_app_categories_to_export.loc[rows_not_found, "play_store_genre"] = "Other"
# %%
df_app_categories_to_export["play_store_genre"].value_counts()
# %%
df_app_categories_to_export.rename(columns={"play_store_genre": "genre"},inplace=True)
df_app_categories_to_export.to_csv("../data/app_categories.csv", columns=["package_hash","genre"],index=False)
# %%