Start to better classify system apps.
parent
0e66a5a963
commit
711b451eff
|
@ -34,7 +34,45 @@ df_app_categories <- tbl(con, "app_categories") %>%
|
|||
head(df_app_categories)
|
||||
table(df_app_categories$play_store_genre)
|
||||
|
||||
df_app_categories %>%
|
||||
filter(play_store_genre == "not_found") %>%
|
||||
group_by(play_store_response) %>%
|
||||
count()
|
||||
# All "not_found" have an HTTP status of 404.
|
||||
|
||||
df_app_categories %>%
|
||||
filter(play_store_genre == "not_found") %>%
|
||||
group_by(package_name) %>%
|
||||
count() %>%
|
||||
arrange(desc(n))
|
||||
# All "not_found" apps are unique.
|
||||
|
||||
# Exclude phone manufacturers, custom ROM names and similar.
|
||||
manufacturers <- c(
|
||||
"samsung",
|
||||
"oneplus",
|
||||
"huawei",
|
||||
"xiaomi",
|
||||
"lge",
|
||||
"motorola",
|
||||
"miui",
|
||||
"lenovo",
|
||||
"oppo",
|
||||
"mediatek"
|
||||
)
|
||||
custom_rom <- c("coloros", "lineageos", "myos", "cyanogenmod", "foundation.e")
|
||||
other <- c("android", "wssyncmldm")
|
||||
|
||||
grep_pattern <- paste(c(manufacturers, custom_rom, other), collapse = "|")
|
||||
|
||||
rows_os_manufacturer <- grepl(grep_pattern, df_app_categories$package_name)
|
||||
|
||||
# Explore what remains after excluding above.
|
||||
df_app_categories[!rows_os_manufacturer, ] %>%
|
||||
filter(play_store_genre == "not_found")
|
||||
|
||||
# Correct some mistakes
|
||||
# And classify 'not_found'
|
||||
df_app_categories %<>%
|
||||
mutate(
|
||||
play_store_genre = {
|
||||
|
@ -51,7 +89,8 @@ df_app_categories %<>%
|
|||
select(-package_name) %>%
|
||||
rename(
|
||||
genre = play_store_genre,
|
||||
package_name = package_hash)
|
||||
package_name = package_hash
|
||||
)
|
||||
|
||||
table(df_app_categories$genre)
|
||||
|
||||
|
|
Loading…
Reference in New Issue