Start to better classify system apps.

master
junos 2023-04-19 09:47:43 +02:00
parent 0e66a5a963
commit 711b451eff
1 changed files with 44 additions and 5 deletions

View File

@ -34,7 +34,45 @@ df_app_categories <- tbl(con, "app_categories") %>%
head(df_app_categories) head(df_app_categories)
table(df_app_categories$play_store_genre) table(df_app_categories$play_store_genre)
df_app_categories %>%
filter(play_store_genre == "not_found") %>%
group_by(play_store_response) %>%
count()
# All "not_found" have an HTTP status of 404.
df_app_categories %>%
filter(play_store_genre == "not_found") %>%
group_by(package_name) %>%
count() %>%
arrange(desc(n))
# All "not_found" apps are unique.
# Exclude phone manufacturers, custom ROM names and similar.
manufacturers <- c(
"samsung",
"oneplus",
"huawei",
"xiaomi",
"lge",
"motorola",
"miui",
"lenovo",
"oppo",
"mediatek"
)
custom_rom <- c("coloros", "lineageos", "myos", "cyanogenmod", "foundation.e")
other <- c("android", "wssyncmldm")
grep_pattern <- paste(c(manufacturers, custom_rom, other), collapse = "|")
rows_os_manufacturer <- grepl(grep_pattern, df_app_categories$package_name)
# Explore what remains after excluding above.
df_app_categories[!rows_os_manufacturer, ] %>%
filter(play_store_genre == "not_found")
# Correct some mistakes # Correct some mistakes
# And classify 'not_found'
df_app_categories %<>% df_app_categories %<>%
mutate( mutate(
play_store_genre = { play_store_genre = {
@ -51,7 +89,8 @@ df_app_categories %<>%
select(-package_name) %>% select(-package_name) %>%
rename( rename(
genre = play_store_genre, genre = play_store_genre,
package_name = package_hash) package_name = package_hash
)
table(df_app_categories$genre) table(df_app_categories$genre)