Bring back application_name.
This column still needs to be in the data, so add it in app_add_name.py. Later, join categories by package hash.labels
parent
4485c4c95e
commit
6f451e05ac
|
@ -29,6 +29,7 @@ get_genre <- function(apps){
|
|||
apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F)
|
||||
genre_catalogue <- data.frame()
|
||||
catalogue_source <- snakemake@params[["catalogue_source"]]
|
||||
package_names_hashed <- snakemake@params[["package_names_hashed"]]
|
||||
update_catalogue_file <- snakemake@params[["update_catalogue_file"]]
|
||||
scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]]
|
||||
apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre"))))
|
||||
|
@ -38,8 +39,12 @@ if(nrow(apps) > 0){
|
|||
apps_with_genre <- apps %>% mutate(genre = NA_character_)
|
||||
} else if(catalogue_source == "FILE"){
|
||||
genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character"))
|
||||
if (package_names_hashed) {
|
||||
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_hash")
|
||||
} else {
|
||||
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
|
||||
}
|
||||
}
|
||||
|
||||
if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){
|
||||
apps_without_genre <- (apps_with_genre %>% filter(is.na(genre)) %>% distinct(package_name))$package_name
|
||||
|
|
|
@ -67,10 +67,12 @@ PHONE_APPLICATIONS_FOREGROUND:
|
|||
TIMESTAMP: timestamp
|
||||
DEVICE_ID: device_id
|
||||
PACKAGE_NAME: package_hash
|
||||
APPLICATION_NAME: FLAG_TO_MUTATE
|
||||
IS_SYSTEM_APP: is_system_app
|
||||
MUTATION:
|
||||
COLUMN_MAPPINGS:
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
SCRIPTS:
|
||||
- src/data/streams/mutations/phone/straw/app_add_name.py
|
||||
|
||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||
ANDROID:
|
||||
|
@ -78,11 +80,13 @@ PHONE_APPLICATIONS_NOTIFICATIONS:
|
|||
TIMESTAMP: timestamp
|
||||
DEVICE_ID: device_id
|
||||
PACKAGE_NAME: package_hash
|
||||
APPLICATION_NAME: FLAG_TO_MUTATE
|
||||
SOUND: sound
|
||||
VIBRATE: vibrate
|
||||
MUTATION:
|
||||
COLUMN_MAPPINGS:
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
SCRIPTS:
|
||||
- src/data/streams/mutations/phone/straw/app_add_name.py
|
||||
|
||||
PHONE_BATTERY:
|
||||
ANDROID:
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
import pandas as pd
|
||||
|
||||
def main(data, stream_parameters):
|
||||
data["application_name"] = "hashed"
|
||||
return(data)
|
|
@ -27,12 +27,14 @@ PHONE_APPLICATIONS_FOREGROUND:
|
|||
- TIMESTAMP
|
||||
- DEVICE_ID
|
||||
- PACKAGE_NAME
|
||||
- APPLICATION_NAME
|
||||
- IS_SYSTEM_APP
|
||||
|
||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||
- TIMESTAMP
|
||||
- DEVICE_ID
|
||||
- PACKAGE_NAME
|
||||
- APPLICATION_NAME
|
||||
- SOUND
|
||||
- VIBRATE
|
||||
|
||||
|
|
Loading…
Reference in New Issue