Compare commits
7 Commits
4485c4c95e
...
e5cc02501f
Author | SHA1 | Date |
---|---|---|
junos | e5cc02501f | |
junos | 352598f3da | |
junos | 15653b6e70 | |
junos | a66a7d0cc3 | |
junos | 70cada8bb8 | |
junos | d2ed73dccf | |
junos | 6f451e05ac |
|
@ -31,6 +31,7 @@ TIMEZONE:
|
||||||
SINGLE:
|
SINGLE:
|
||||||
TZCODE: Europe/Ljubljana
|
TZCODE: Europe/Ljubljana
|
||||||
MULTIPLE:
|
MULTIPLE:
|
||||||
|
TZ_FILE: data/external/timezone.csv
|
||||||
TZCODES_FILE: data/external/multiple_timezones.csv
|
TZCODES_FILE: data/external/multiple_timezones.csv
|
||||||
IF_MISSING_TZCODE: USE_DEFAULT
|
IF_MISSING_TZCODE: USE_DEFAULT
|
||||||
DEFAULT_TZCODE: Europe/Ljubljana
|
DEFAULT_TZCODE: Europe/Ljubljana
|
||||||
|
@ -110,6 +111,7 @@ PHONE_APPLICATIONS_FOREGROUND:
|
||||||
APPLICATION_CATEGORIES:
|
APPLICATION_CATEGORIES:
|
||||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||||
|
PACKAGE_NAMES_HASHED: True
|
||||||
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||||
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
Warning: 1241 parsing failures.
|
||||||
|
row col expected actual file
|
||||||
|
1 is_system_app an integer TRUE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
||||||
|
2 is_system_app an integer FALSE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
||||||
|
3 is_system_app an integer TRUE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
||||||
|
4 is_system_app an integer TRUE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
||||||
|
5 is_system_app an integer TRUE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
||||||
|
... ............. .......... ...... ...............................................................................
|
||||||
|
See problems(...) for more details.
|
||||||
|
|
||||||
|
Warning message:
|
||||||
|
The following named parsers don't match the column names: application_name
|
||||||
|
Error: Problem with `filter()` input `..1`.
|
||||||
|
✖ object 'application_name' not found
|
||||||
|
ℹ Input `..1` is `!is.na(application_name)`.
|
||||||
|
Backtrace:
|
||||||
|
█
|
||||||
|
1. ├─`%>%`(...)
|
||||||
|
2. ├─dplyr::mutate(...)
|
||||||
|
3. ├─utils::head(., -1)
|
||||||
|
4. ├─dplyr::select(., -c("timestamp"))
|
||||||
|
5. ├─dplyr::filter(., !is.na(application_name))
|
||||||
|
6. ├─dplyr:::filter.data.frame(., !is.na(application_name))
|
||||||
|
7. │ └─dplyr:::filter_rows(.data, ...)
|
||||||
|
8. │ ├─base::withCallingHandlers(...)
|
||||||
|
9. │ └─mask$eval_all_filter(dots, env_filter)
|
||||||
|
10. └─base::.handleSimpleError(...)
|
||||||
|
11. └─dplyr:::h(simpleError(msg, call))
|
||||||
|
Execution halted
|
||||||
|
[Mon Dec 13 17:19:06 2021]
|
||||||
|
Error in rule app_episodes:
|
||||||
|
jobid: 54
|
||||||
|
output: data/interim/p011/phone_app_episodes.csv
|
|
@ -0,0 +1,5 @@
|
||||||
|
Warning message:
|
||||||
|
In barnett_daily_features(snakemake) :
|
||||||
|
Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:
|
||||||
|
Location data rows within a daily time segment: 0
|
||||||
|
Location data time span in days: 398.6
|
|
@ -114,7 +114,16 @@ def input_tzcodes_file(wilcards):
|
||||||
if not config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"].lower().endswith(".csv"):
|
if not config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"].lower().endswith(".csv"):
|
||||||
raise ValueError("[TIMEZONE][MULTIPLE][TZCODES_FILE] should point to a CSV file, instead you typed: " + config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"])
|
raise ValueError("[TIMEZONE][MULTIPLE][TZCODES_FILE] should point to a CSV file, instead you typed: " + config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"])
|
||||||
if not Path(config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]).exists():
|
if not Path(config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]).exists():
|
||||||
raise ValueError("[TIMEZONE][MULTIPLE][TZCODES_FILE] should point to a CSV file, the file in the path you typed does not exist: " + config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"])
|
try:
|
||||||
|
config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError("To create TZCODES_FILE, a list of timezones should be created " +
|
||||||
|
"with the rule preprocessing.smk/prepare_tzcodes_file " +
|
||||||
|
"which will create a file specified as config['TIMEZONE']['MULTIPLE']['TZ_FILE']." +
|
||||||
|
"\n An alternative is to provide the file manually:" +
|
||||||
|
"[TIMEZONE][MULTIPLE][TZCODES_FILE] should point to a CSV file," +
|
||||||
|
"but the file in the path you typed does not exist: " +
|
||||||
|
config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"])
|
||||||
return [config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]]
|
return [config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]]
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
|
@ -9,13 +9,13 @@ rule query_usernames_device_empatica_ids:
|
||||||
baseline_folder = "/mnt/e/STRAWbaseline/"
|
baseline_folder = "/mnt/e/STRAWbaseline/"
|
||||||
output:
|
output:
|
||||||
usernames_file = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"],
|
usernames_file = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"],
|
||||||
timezone_file = "data/external/timezone.csv"
|
timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
||||||
script:
|
script:
|
||||||
"../../participants/prepare_usernames_file.py"
|
"../../participants/prepare_usernames_file.py"
|
||||||
|
|
||||||
rule prepare_tzcodes_file:
|
rule prepare_tzcodes_file:
|
||||||
input:
|
input:
|
||||||
timezone_file = "data/external/timezone.csv"
|
timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
||||||
output:
|
output:
|
||||||
tzcodes_file = config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]
|
tzcodes_file = config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]
|
||||||
script:
|
script:
|
||||||
|
|
|
@ -29,6 +29,7 @@ get_genre <- function(apps){
|
||||||
apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F)
|
apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F)
|
||||||
genre_catalogue <- data.frame()
|
genre_catalogue <- data.frame()
|
||||||
catalogue_source <- snakemake@params[["catalogue_source"]]
|
catalogue_source <- snakemake@params[["catalogue_source"]]
|
||||||
|
package_names_hashed <- snakemake@params[["package_names_hashed"]]
|
||||||
update_catalogue_file <- snakemake@params[["update_catalogue_file"]]
|
update_catalogue_file <- snakemake@params[["update_catalogue_file"]]
|
||||||
scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]]
|
scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]]
|
||||||
apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre"))))
|
apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre"))))
|
||||||
|
@ -38,8 +39,12 @@ if(nrow(apps) > 0){
|
||||||
apps_with_genre <- apps %>% mutate(genre = NA_character_)
|
apps_with_genre <- apps %>% mutate(genre = NA_character_)
|
||||||
} else if(catalogue_source == "FILE"){
|
} else if(catalogue_source == "FILE"){
|
||||||
genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character"))
|
genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character"))
|
||||||
|
if (package_names_hashed) {
|
||||||
|
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_hash")
|
||||||
|
} else {
|
||||||
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
|
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){
|
if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){
|
||||||
apps_without_genre <- (apps_with_genre %>% filter(is.na(genre)) %>% distinct(package_name))$package_name
|
apps_without_genre <- (apps_with_genre %>% filter(is.na(genre)) %>% distinct(package_name))$package_name
|
||||||
|
|
|
@ -67,10 +67,12 @@ PHONE_APPLICATIONS_FOREGROUND:
|
||||||
TIMESTAMP: timestamp
|
TIMESTAMP: timestamp
|
||||||
DEVICE_ID: device_id
|
DEVICE_ID: device_id
|
||||||
PACKAGE_NAME: package_hash
|
PACKAGE_NAME: package_hash
|
||||||
|
APPLICATION_NAME: FLAG_TO_MUTATE
|
||||||
IS_SYSTEM_APP: is_system_app
|
IS_SYSTEM_APP: is_system_app
|
||||||
MUTATION:
|
MUTATION:
|
||||||
COLUMN_MAPPINGS:
|
COLUMN_MAPPINGS:
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
SCRIPTS:
|
||||||
|
- src/data/streams/mutations/phone/straw/app_add_name.py
|
||||||
|
|
||||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||||
ANDROID:
|
ANDROID:
|
||||||
|
@ -78,11 +80,13 @@ PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||||
TIMESTAMP: timestamp
|
TIMESTAMP: timestamp
|
||||||
DEVICE_ID: device_id
|
DEVICE_ID: device_id
|
||||||
PACKAGE_NAME: package_hash
|
PACKAGE_NAME: package_hash
|
||||||
|
APPLICATION_NAME: FLAG_TO_MUTATE
|
||||||
SOUND: sound
|
SOUND: sound
|
||||||
VIBRATE: vibrate
|
VIBRATE: vibrate
|
||||||
MUTATION:
|
MUTATION:
|
||||||
COLUMN_MAPPINGS:
|
COLUMN_MAPPINGS:
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
SCRIPTS:
|
||||||
|
- src/data/streams/mutations/phone/straw/app_add_name.py
|
||||||
|
|
||||||
PHONE_BATTERY:
|
PHONE_BATTERY:
|
||||||
ANDROID:
|
ANDROID:
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def main(data, stream_parameters):
|
||||||
|
data["application_name"] = "hashed"
|
||||||
|
return(data)
|
|
@ -27,12 +27,14 @@ PHONE_APPLICATIONS_FOREGROUND:
|
||||||
- TIMESTAMP
|
- TIMESTAMP
|
||||||
- DEVICE_ID
|
- DEVICE_ID
|
||||||
- PACKAGE_NAME
|
- PACKAGE_NAME
|
||||||
|
- APPLICATION_NAME
|
||||||
- IS_SYSTEM_APP
|
- IS_SYSTEM_APP
|
||||||
|
|
||||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||||
- TIMESTAMP
|
- TIMESTAMP
|
||||||
- DEVICE_ID
|
- DEVICE_ID
|
||||||
- PACKAGE_NAME
|
- PACKAGE_NAME
|
||||||
|
- APPLICATION_NAME
|
||||||
- SOUND
|
- SOUND
|
||||||
- VIBRATE
|
- VIBRATE
|
||||||
|
|
||||||
|
|
|
@ -38,6 +38,7 @@ def getDataForPlot(phone_data_yield_per_segment):
|
||||||
for columns in columns_for_full_index:
|
for columns in columns_for_full_index:
|
||||||
full_index = full_index + columns
|
full_index = full_index + columns
|
||||||
full_index = pd.MultiIndex.from_tuples(full_index, names=("local_segment_start_datetimes", "minutes_after_segment_start"))
|
full_index = pd.MultiIndex.from_tuples(full_index, names=("local_segment_start_datetimes", "minutes_after_segment_start"))
|
||||||
|
phone_data_yield_per_segment = phone_data_yield_per_segment.drop_duplicates(subset=["local_segment_start_datetimes", "minutes_after_segment_start"], keep="first")
|
||||||
phone_data_yield_per_segment = phone_data_yield_per_segment.set_index(["local_segment_start_datetimes", "minutes_after_segment_start"]).reindex(full_index).reset_index().fillna(0)
|
phone_data_yield_per_segment = phone_data_yield_per_segment.set_index(["local_segment_start_datetimes", "minutes_after_segment_start"]).reindex(full_index).reset_index().fillna(0)
|
||||||
|
|
||||||
# transpose the dataframe per local start datetime of the segment and discard the useless index layer
|
# transpose the dataframe per local start datetime of the segment and discard the useless index layer
|
||||||
|
|
|
@ -22,7 +22,7 @@ output:
|
||||||
</style>
|
</style>
|
||||||
|
|
||||||
```{r include=FALSE}
|
```{r include=FALSE}
|
||||||
source("renv/activate.R")
|
source("/mnt/c/Users/junos/Documents/FWO-ARRS/Analysis/straw2analysis/rapids/renv/activate.R")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue