Start to better classify system apps.
parent
0e66a5a963
commit
711b451eff
|
@ -34,7 +34,45 @@ df_app_categories <- tbl(con, "app_categories") %>%
|
||||||
head(df_app_categories)
|
head(df_app_categories)
|
||||||
table(df_app_categories$play_store_genre)
|
table(df_app_categories$play_store_genre)
|
||||||
|
|
||||||
|
df_app_categories %>%
|
||||||
|
filter(play_store_genre == "not_found") %>%
|
||||||
|
group_by(play_store_response) %>%
|
||||||
|
count()
|
||||||
|
# All "not_found" have an HTTP status of 404.
|
||||||
|
|
||||||
|
df_app_categories %>%
|
||||||
|
filter(play_store_genre == "not_found") %>%
|
||||||
|
group_by(package_name) %>%
|
||||||
|
count() %>%
|
||||||
|
arrange(desc(n))
|
||||||
|
# All "not_found" apps are unique.
|
||||||
|
|
||||||
|
# Exclude phone manufacturers, custom ROM names and similar.
|
||||||
|
manufacturers <- c(
|
||||||
|
"samsung",
|
||||||
|
"oneplus",
|
||||||
|
"huawei",
|
||||||
|
"xiaomi",
|
||||||
|
"lge",
|
||||||
|
"motorola",
|
||||||
|
"miui",
|
||||||
|
"lenovo",
|
||||||
|
"oppo",
|
||||||
|
"mediatek"
|
||||||
|
)
|
||||||
|
custom_rom <- c("coloros", "lineageos", "myos", "cyanogenmod", "foundation.e")
|
||||||
|
other <- c("android", "wssyncmldm")
|
||||||
|
|
||||||
|
grep_pattern <- paste(c(manufacturers, custom_rom, other), collapse = "|")
|
||||||
|
|
||||||
|
rows_os_manufacturer <- grepl(grep_pattern, df_app_categories$package_name)
|
||||||
|
|
||||||
|
# Explore what remains after excluding above.
|
||||||
|
df_app_categories[!rows_os_manufacturer, ] %>%
|
||||||
|
filter(play_store_genre == "not_found")
|
||||||
|
|
||||||
# Correct some mistakes
|
# Correct some mistakes
|
||||||
|
# And classify 'not_found'
|
||||||
df_app_categories %<>%
|
df_app_categories %<>%
|
||||||
mutate(
|
mutate(
|
||||||
play_store_genre = {
|
play_store_genre = {
|
||||||
|
@ -48,16 +86,17 @@ df_app_categories %<>%
|
||||||
}
|
}
|
||||||
}(play_store_genre)
|
}(play_store_genre)
|
||||||
) %>%
|
) %>%
|
||||||
select(-package_name) %>%
|
select(-package_name) %>%
|
||||||
rename(
|
rename(
|
||||||
genre = play_store_genre,
|
genre = play_store_genre,
|
||||||
package_name = package_hash)
|
package_name = package_hash
|
||||||
|
)
|
||||||
|
|
||||||
table(df_app_categories$genre)
|
table(df_app_categories$genre)
|
||||||
|
|
||||||
df_app_categories %>%
|
df_app_categories %>%
|
||||||
group_by(genre) %>%
|
group_by(genre) %>%
|
||||||
count() %>%
|
count() %>%
|
||||||
arrange(desc(n)) %>%
|
arrange(desc(n)) %>%
|
||||||
write_csv("play_store_categories_count.csv")
|
write_csv("play_store_categories_count.csv")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue