library(conflicted) library(yaml) library(RPostgreSQL) library(tidyverse) conflicts_prefer( dplyr::filter, dplyr::lag ) library(magrittr) # read the password from file credentials <- yaml.load_file("../rapids/credentials.yaml") pw <- credentials$PSQL_STRAW$password # load the PostgreSQL driver drv <- RPostgres::Postgres() # creates a connection to the postgres database # note that "con" will be used later in each connection to the database con <- RPostgres::dbConnect(drv, dbname = "staw", host = "eol.ijs.si", port = 5432, user = "staw_db", password = pw ) rm(pw, credentials) # removes the password # check for the bluetooth table, an example dbExistsTable(con, "app_categories") df_app_categories <- tbl(con, "app_categories") %>% collect() head(df_app_categories) table(df_app_categories$play_store_genre) # Correct some mistakes df_app_categories %<>% mutate( play_store_genre = { function(x) { case_when( x == "Education,Education" ~ "Education", x == "EducationEducation" ~ "Education", x == "not_found" ~ "System", .default = x ) } }(play_store_genre) ) %>% select(-package_name) %>% rename( genre = play_store_genre, package_name = package_hash) table(df_app_categories$genre) df_app_categories %>% group_by(genre) %>% count() %>% arrange(desc(n)) %>% write_csv("play_store_categories_count.csv") write_csv( x = select(df_app_categories, c(package_name, genre)), file = "play_store_application_genre_catalogue.csv" ) dbDisconnect(con)