Completely classify unknown applications.

master
junos 2023-04-19 10:54:46 +02:00
parent 711b451eff
commit d6eea0fc00
1 changed files with 32 additions and 1 deletions

View File

@ -71,6 +71,37 @@ rows_os_manufacturer <- grepl(grep_pattern, df_app_categories$package_name)
df_app_categories[!rows_os_manufacturer, ] %>% df_app_categories[!rows_os_manufacturer, ] %>%
filter(play_store_genre == "not_found") filter(play_store_genre == "not_found")
# Manually classify apps
df_app_categories %<>%
mutate(
play_store_genre =
case_when(
str_detect(str_to_lower(package_name), grep_pattern) ~ "System",
str_detect(str_to_lower(package_name), "straw") ~ "STRAW",
str_detect(str_to_lower(package_name), "chromium") ~ "Communication", # Same as chrome.
str_detect(str_to_lower(package_name), "skype") ~ "Communication", # Skype Lite not classified.
str_detect(str_to_lower(package_name), "imsservice") ~ "Communication", # IP Multimedia Subsystem
str_detect(str_to_lower(package_name), paste(c("covid", "empatica"), collapse = "|")) ~ "Medical",
str_detect(str_to_lower(package_name), paste(c("libri", "tachiyomi"), collapse = "|")) ~ "Books & Reference",
str_detect(str_to_lower(package_name), paste(c("bricks", "chess"), collapse = "|")) ~ "Casual",
str_detect(str_to_lower(package_name), "weather") ~ "Weather",
str_detect(str_to_lower(package_name), "excel") ~ "Productivity",
str_detect(str_to_lower(package_name), paste(c("qr", "barcode", "archimedes", "mixplorer", "winrar", "filemanager", "shot", "faceunlock", "signin", "milink"), collapse = "|")) ~ "Tools",
str_detect(str_to_lower(package_name), "stupeflix") ~ "Photography",
str_detect(str_to_lower(package_name), "anyme") ~ "Entertainment",
str_detect(str_to_lower(package_name), "vanced") ~ "Video Players & Editors",
str_detect(str_to_lower(package_name), paste(c("music", "radio", "dolby"), collapse = "|")) ~ "Music & Audio",
str_detect(str_to_lower(package_name), paste(c("tensorflow", "object_detection"), collapse = "|")) ~ "Education",
.default = play_store_genre
)
)
# Explore what remains after classifying above.
df_app_categories %>%
filter(play_store_genre == "not_found")
# After this, 13 applications remain, which I will classify as "Other".
# Correct some mistakes # Correct some mistakes
# And classify 'not_found' # And classify 'not_found'
df_app_categories %<>% df_app_categories %<>%
@ -80,7 +111,7 @@ df_app_categories %<>%
case_when( case_when(
x == "Education,Education" ~ "Education", x == "Education,Education" ~ "Education",
x == "EducationEducation" ~ "Education", x == "EducationEducation" ~ "Education",
x == "not_found" ~ "System", x == "not_found" ~ "Other",
.default = x .default = x
) )
} }