Bring back application_name.

This column still needs to be in the data, so add it in app_add_name.py.
Later, join categories by package hash.
labels
junos 2021-12-15 12:58:02 +01:00
parent 4485c4c95e
commit 6f451e05ac
4 changed files with 19 additions and 3 deletions

View File

@ -29,6 +29,7 @@ get_genre <- function(apps){
apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F) apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F)
genre_catalogue <- data.frame() genre_catalogue <- data.frame()
catalogue_source <- snakemake@params[["catalogue_source"]] catalogue_source <- snakemake@params[["catalogue_source"]]
package_names_hashed <- snakemake@params[["package_names_hashed"]]
update_catalogue_file <- snakemake@params[["update_catalogue_file"]] update_catalogue_file <- snakemake@params[["update_catalogue_file"]]
scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]] scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]]
apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre")))) apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre"))))
@ -38,8 +39,12 @@ if(nrow(apps) > 0){
apps_with_genre <- apps %>% mutate(genre = NA_character_) apps_with_genre <- apps %>% mutate(genre = NA_character_)
} else if(catalogue_source == "FILE"){ } else if(catalogue_source == "FILE"){
genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character")) genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character"))
if (package_names_hashed) {
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_hash")
} else {
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name") apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
} }
}
if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){ if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){
apps_without_genre <- (apps_with_genre %>% filter(is.na(genre)) %>% distinct(package_name))$package_name apps_without_genre <- (apps_with_genre %>% filter(is.na(genre)) %>% distinct(package_name))$package_name

View File

@ -67,10 +67,12 @@ PHONE_APPLICATIONS_FOREGROUND:
TIMESTAMP: timestamp TIMESTAMP: timestamp
DEVICE_ID: device_id DEVICE_ID: device_id
PACKAGE_NAME: package_hash PACKAGE_NAME: package_hash
APPLICATION_NAME: FLAG_TO_MUTATE
IS_SYSTEM_APP: is_system_app IS_SYSTEM_APP: is_system_app
MUTATION: MUTATION:
COLUMN_MAPPINGS: COLUMN_MAPPINGS:
SCRIPTS: # List any python or r scripts that mutate your raw data SCRIPTS:
- src/data/streams/mutations/phone/straw/app_add_name.py
PHONE_APPLICATIONS_NOTIFICATIONS: PHONE_APPLICATIONS_NOTIFICATIONS:
ANDROID: ANDROID:
@ -78,11 +80,13 @@ PHONE_APPLICATIONS_NOTIFICATIONS:
TIMESTAMP: timestamp TIMESTAMP: timestamp
DEVICE_ID: device_id DEVICE_ID: device_id
PACKAGE_NAME: package_hash PACKAGE_NAME: package_hash
APPLICATION_NAME: FLAG_TO_MUTATE
SOUND: sound SOUND: sound
VIBRATE: vibrate VIBRATE: vibrate
MUTATION: MUTATION:
COLUMN_MAPPINGS: COLUMN_MAPPINGS:
SCRIPTS: # List any python or r scripts that mutate your raw data SCRIPTS:
- src/data/streams/mutations/phone/straw/app_add_name.py
PHONE_BATTERY: PHONE_BATTERY:
ANDROID: ANDROID:

View File

@ -0,0 +1,5 @@
import pandas as pd
def main(data, stream_parameters):
data["application_name"] = "hashed"
return(data)

View File

@ -27,12 +27,14 @@ PHONE_APPLICATIONS_FOREGROUND:
- TIMESTAMP - TIMESTAMP
- DEVICE_ID - DEVICE_ID
- PACKAGE_NAME - PACKAGE_NAME
- APPLICATION_NAME
- IS_SYSTEM_APP - IS_SYSTEM_APP
PHONE_APPLICATIONS_NOTIFICATIONS: PHONE_APPLICATIONS_NOTIFICATIONS:
- TIMESTAMP - TIMESTAMP
- DEVICE_ID - DEVICE_ID
- PACKAGE_NAME - PACKAGE_NAME
- APPLICATION_NAME
- SOUND - SOUND
- VIBRATE - VIBRATE