Bring back application_name.
This column still needs to be in the data, so add it in app_add_name.py. Later, join categories by package hash.labels
parent
4485c4c95e
commit
6f451e05ac
|
@ -29,6 +29,7 @@ get_genre <- function(apps){
|
||||||
apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F)
|
apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F)
|
||||||
genre_catalogue <- data.frame()
|
genre_catalogue <- data.frame()
|
||||||
catalogue_source <- snakemake@params[["catalogue_source"]]
|
catalogue_source <- snakemake@params[["catalogue_source"]]
|
||||||
|
package_names_hashed <- snakemake@params[["package_names_hashed"]]
|
||||||
update_catalogue_file <- snakemake@params[["update_catalogue_file"]]
|
update_catalogue_file <- snakemake@params[["update_catalogue_file"]]
|
||||||
scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]]
|
scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]]
|
||||||
apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre"))))
|
apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre"))))
|
||||||
|
@ -38,8 +39,12 @@ if(nrow(apps) > 0){
|
||||||
apps_with_genre <- apps %>% mutate(genre = NA_character_)
|
apps_with_genre <- apps %>% mutate(genre = NA_character_)
|
||||||
} else if(catalogue_source == "FILE"){
|
} else if(catalogue_source == "FILE"){
|
||||||
genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character"))
|
genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character"))
|
||||||
|
if (package_names_hashed) {
|
||||||
|
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_hash")
|
||||||
|
} else {
|
||||||
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
|
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){
|
if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){
|
||||||
apps_without_genre <- (apps_with_genre %>% filter(is.na(genre)) %>% distinct(package_name))$package_name
|
apps_without_genre <- (apps_with_genre %>% filter(is.na(genre)) %>% distinct(package_name))$package_name
|
||||||
|
|
|
@ -67,10 +67,12 @@ PHONE_APPLICATIONS_FOREGROUND:
|
||||||
TIMESTAMP: timestamp
|
TIMESTAMP: timestamp
|
||||||
DEVICE_ID: device_id
|
DEVICE_ID: device_id
|
||||||
PACKAGE_NAME: package_hash
|
PACKAGE_NAME: package_hash
|
||||||
|
APPLICATION_NAME: FLAG_TO_MUTATE
|
||||||
IS_SYSTEM_APP: is_system_app
|
IS_SYSTEM_APP: is_system_app
|
||||||
MUTATION:
|
MUTATION:
|
||||||
COLUMN_MAPPINGS:
|
COLUMN_MAPPINGS:
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
SCRIPTS:
|
||||||
|
- src/data/streams/mutations/phone/straw/app_add_name.py
|
||||||
|
|
||||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||||
ANDROID:
|
ANDROID:
|
||||||
|
@ -78,11 +80,13 @@ PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||||
TIMESTAMP: timestamp
|
TIMESTAMP: timestamp
|
||||||
DEVICE_ID: device_id
|
DEVICE_ID: device_id
|
||||||
PACKAGE_NAME: package_hash
|
PACKAGE_NAME: package_hash
|
||||||
|
APPLICATION_NAME: FLAG_TO_MUTATE
|
||||||
SOUND: sound
|
SOUND: sound
|
||||||
VIBRATE: vibrate
|
VIBRATE: vibrate
|
||||||
MUTATION:
|
MUTATION:
|
||||||
COLUMN_MAPPINGS:
|
COLUMN_MAPPINGS:
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
SCRIPTS:
|
||||||
|
- src/data/streams/mutations/phone/straw/app_add_name.py
|
||||||
|
|
||||||
PHONE_BATTERY:
|
PHONE_BATTERY:
|
||||||
ANDROID:
|
ANDROID:
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def main(data, stream_parameters):
|
||||||
|
data["application_name"] = "hashed"
|
||||||
|
return(data)
|
|
@ -27,12 +27,14 @@ PHONE_APPLICATIONS_FOREGROUND:
|
||||||
- TIMESTAMP
|
- TIMESTAMP
|
||||||
- DEVICE_ID
|
- DEVICE_ID
|
||||||
- PACKAGE_NAME
|
- PACKAGE_NAME
|
||||||
|
- APPLICATION_NAME
|
||||||
- IS_SYSTEM_APP
|
- IS_SYSTEM_APP
|
||||||
|
|
||||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||||
- TIMESTAMP
|
- TIMESTAMP
|
||||||
- DEVICE_ID
|
- DEVICE_ID
|
||||||
- PACKAGE_NAME
|
- PACKAGE_NAME
|
||||||
|
- APPLICATION_NAME
|
||||||
- SOUND
|
- SOUND
|
||||||
- VIBRATE
|
- VIBRATE
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue