From d6eea0fc00c2cd77b8c131f64bc05ddd35aafcd7 Mon Sep 17 00:00:00 2001 From: junos Date: Wed, 19 Apr 2023 10:54:46 +0200 Subject: [PATCH] Completely classify unknown applications. --- presentation/ApplicationCategories.R | 33 +++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/presentation/ApplicationCategories.R b/presentation/ApplicationCategories.R index 3e47b23..ca44067 100644 --- a/presentation/ApplicationCategories.R +++ b/presentation/ApplicationCategories.R @@ -71,6 +71,37 @@ rows_os_manufacturer <- grepl(grep_pattern, df_app_categories$package_name) df_app_categories[!rows_os_manufacturer, ] %>% filter(play_store_genre == "not_found") +# Manually classify apps +df_app_categories %<>% + mutate( + play_store_genre = + case_when( + str_detect(str_to_lower(package_name), grep_pattern) ~ "System", + str_detect(str_to_lower(package_name), "straw") ~ "STRAW", + str_detect(str_to_lower(package_name), "chromium") ~ "Communication", # Same as chrome. + str_detect(str_to_lower(package_name), "skype") ~ "Communication", # Skype Lite not classified. + str_detect(str_to_lower(package_name), "imsservice") ~ "Communication", # IP Multimedia Subsystem + str_detect(str_to_lower(package_name), paste(c("covid", "empatica"), collapse = "|")) ~ "Medical", + str_detect(str_to_lower(package_name), paste(c("libri", "tachiyomi"), collapse = "|")) ~ "Books & Reference", + str_detect(str_to_lower(package_name), paste(c("bricks", "chess"), collapse = "|")) ~ "Casual", + str_detect(str_to_lower(package_name), "weather") ~ "Weather", + str_detect(str_to_lower(package_name), "excel") ~ "Productivity", + str_detect(str_to_lower(package_name), paste(c("qr", "barcode", "archimedes", "mixplorer", "winrar", "filemanager", "shot", "faceunlock", "signin", "milink"), collapse = "|")) ~ "Tools", + str_detect(str_to_lower(package_name), "stupeflix") ~ "Photography", + str_detect(str_to_lower(package_name), "anyme") ~ "Entertainment", + str_detect(str_to_lower(package_name), "vanced") ~ "Video Players & Editors", + str_detect(str_to_lower(package_name), paste(c("music", "radio", "dolby"), collapse = "|")) ~ "Music & Audio", + str_detect(str_to_lower(package_name), paste(c("tensorflow", "object_detection"), collapse = "|")) ~ "Education", + .default = play_store_genre + ) + ) + +# Explore what remains after classifying above. +df_app_categories %>% + filter(play_store_genre == "not_found") + +# After this, 13 applications remain, which I will classify as "Other". + # Correct some mistakes # And classify 'not_found' df_app_categories %<>% @@ -80,7 +111,7 @@ df_app_categories %<>% case_when( x == "Education,Education" ~ "Education", x == "EducationEducation" ~ "Education", - x == "not_found" ~ "System", + x == "not_found" ~ "Other", .default = x ) }