Compare commits
7 Commits
4485c4c95e
...
e5cc02501f
Author | SHA1 | Date |
---|---|---|
junos | e5cc02501f | |
junos | 352598f3da | |
junos | 15653b6e70 | |
junos | a66a7d0cc3 | |
junos | 70cada8bb8 | |
junos | d2ed73dccf | |
junos | 6f451e05ac |
|
@ -31,6 +31,7 @@ TIMEZONE:
|
|||
SINGLE:
|
||||
TZCODE: Europe/Ljubljana
|
||||
MULTIPLE:
|
||||
TZ_FILE: data/external/timezone.csv
|
||||
TZCODES_FILE: data/external/multiple_timezones.csv
|
||||
IF_MISSING_TZCODE: USE_DEFAULT
|
||||
DEFAULT_TZCODE: Europe/Ljubljana
|
||||
|
@ -110,6 +111,7 @@ PHONE_APPLICATIONS_FOREGROUND:
|
|||
APPLICATION_CATEGORIES:
|
||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||
PACKAGE_NAMES_HASHED: True
|
||||
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||
PROVIDERS:
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
Warning: 1241 parsing failures.
|
||||
row col expected actual file
|
||||
1 is_system_app an integer TRUE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
||||
2 is_system_app an integer FALSE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
||||
3 is_system_app an integer TRUE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
||||
4 is_system_app an integer TRUE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
||||
5 is_system_app an integer TRUE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
||||
... ............. .......... ...... ...............................................................................
|
||||
See problems(...) for more details.
|
||||
|
||||
Warning message:
|
||||
The following named parsers don't match the column names: application_name
|
||||
Error: Problem with `filter()` input `..1`.
|
||||
✖ object 'application_name' not found
|
||||
ℹ Input `..1` is `!is.na(application_name)`.
|
||||
Backtrace:
|
||||
█
|
||||
1. ├─`%>%`(...)
|
||||
2. ├─dplyr::mutate(...)
|
||||
3. ├─utils::head(., -1)
|
||||
4. ├─dplyr::select(., -c("timestamp"))
|
||||
5. ├─dplyr::filter(., !is.na(application_name))
|
||||
6. ├─dplyr:::filter.data.frame(., !is.na(application_name))
|
||||
7. │ └─dplyr:::filter_rows(.data, ...)
|
||||
8. │ ├─base::withCallingHandlers(...)
|
||||
9. │ └─mask$eval_all_filter(dots, env_filter)
|
||||
10. └─base::.handleSimpleError(...)
|
||||
11. └─dplyr:::h(simpleError(msg, call))
|
||||
Execution halted
|
||||
[Mon Dec 13 17:19:06 2021]
|
||||
Error in rule app_episodes:
|
||||
jobid: 54
|
||||
output: data/interim/p011/phone_app_episodes.csv
|
|
@ -0,0 +1,5 @@
|
|||
Warning message:
|
||||
In barnett_daily_features(snakemake) :
|
||||
Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:
|
||||
Location data rows within a daily time segment: 0
|
||||
Location data time span in days: 398.6
|
|
@ -114,7 +114,16 @@ def input_tzcodes_file(wilcards):
|
|||
if not config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"].lower().endswith(".csv"):
|
||||
raise ValueError("[TIMEZONE][MULTIPLE][TZCODES_FILE] should point to a CSV file, instead you typed: " + config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"])
|
||||
if not Path(config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]).exists():
|
||||
raise ValueError("[TIMEZONE][MULTIPLE][TZCODES_FILE] should point to a CSV file, the file in the path you typed does not exist: " + config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"])
|
||||
try:
|
||||
config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
||||
except KeyError:
|
||||
raise ValueError("To create TZCODES_FILE, a list of timezones should be created " +
|
||||
"with the rule preprocessing.smk/prepare_tzcodes_file " +
|
||||
"which will create a file specified as config['TIMEZONE']['MULTIPLE']['TZ_FILE']." +
|
||||
"\n An alternative is to provide the file manually:" +
|
||||
"[TIMEZONE][MULTIPLE][TZCODES_FILE] should point to a CSV file," +
|
||||
"but the file in the path you typed does not exist: " +
|
||||
config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"])
|
||||
return [config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]]
|
||||
return []
|
||||
|
||||
|
|
|
@ -9,13 +9,13 @@ rule query_usernames_device_empatica_ids:
|
|||
baseline_folder = "/mnt/e/STRAWbaseline/"
|
||||
output:
|
||||
usernames_file = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"],
|
||||
timezone_file = "data/external/timezone.csv"
|
||||
timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
||||
script:
|
||||
"../../participants/prepare_usernames_file.py"
|
||||
|
||||
rule prepare_tzcodes_file:
|
||||
input:
|
||||
timezone_file = "data/external/timezone.csv"
|
||||
timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
||||
output:
|
||||
tzcodes_file = config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]
|
||||
script:
|
||||
|
|
|
@ -29,6 +29,7 @@ get_genre <- function(apps){
|
|||
apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F)
|
||||
genre_catalogue <- data.frame()
|
||||
catalogue_source <- snakemake@params[["catalogue_source"]]
|
||||
package_names_hashed <- snakemake@params[["package_names_hashed"]]
|
||||
update_catalogue_file <- snakemake@params[["update_catalogue_file"]]
|
||||
scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]]
|
||||
apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre"))))
|
||||
|
@ -38,7 +39,11 @@ if(nrow(apps) > 0){
|
|||
apps_with_genre <- apps %>% mutate(genre = NA_character_)
|
||||
} else if(catalogue_source == "FILE"){
|
||||
genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character"))
|
||||
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
|
||||
if (package_names_hashed) {
|
||||
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_hash")
|
||||
} else {
|
||||
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
|
||||
}
|
||||
}
|
||||
|
||||
if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){
|
||||
|
|
|
@ -67,10 +67,12 @@ PHONE_APPLICATIONS_FOREGROUND:
|
|||
TIMESTAMP: timestamp
|
||||
DEVICE_ID: device_id
|
||||
PACKAGE_NAME: package_hash
|
||||
APPLICATION_NAME: FLAG_TO_MUTATE
|
||||
IS_SYSTEM_APP: is_system_app
|
||||
MUTATION:
|
||||
COLUMN_MAPPINGS:
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
SCRIPTS:
|
||||
- src/data/streams/mutations/phone/straw/app_add_name.py
|
||||
|
||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||
ANDROID:
|
||||
|
@ -78,11 +80,13 @@ PHONE_APPLICATIONS_NOTIFICATIONS:
|
|||
TIMESTAMP: timestamp
|
||||
DEVICE_ID: device_id
|
||||
PACKAGE_NAME: package_hash
|
||||
APPLICATION_NAME: FLAG_TO_MUTATE
|
||||
SOUND: sound
|
||||
VIBRATE: vibrate
|
||||
MUTATION:
|
||||
COLUMN_MAPPINGS:
|
||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||
SCRIPTS:
|
||||
- src/data/streams/mutations/phone/straw/app_add_name.py
|
||||
|
||||
PHONE_BATTERY:
|
||||
ANDROID:
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
import pandas as pd
|
||||
|
||||
def main(data, stream_parameters):
|
||||
data["application_name"] = "hashed"
|
||||
return(data)
|
|
@ -27,12 +27,14 @@ PHONE_APPLICATIONS_FOREGROUND:
|
|||
- TIMESTAMP
|
||||
- DEVICE_ID
|
||||
- PACKAGE_NAME
|
||||
- APPLICATION_NAME
|
||||
- IS_SYSTEM_APP
|
||||
|
||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||
- TIMESTAMP
|
||||
- DEVICE_ID
|
||||
- PACKAGE_NAME
|
||||
- APPLICATION_NAME
|
||||
- SOUND
|
||||
- VIBRATE
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ def getDataForPlot(phone_data_yield_per_segment):
|
|||
for columns in columns_for_full_index:
|
||||
full_index = full_index + columns
|
||||
full_index = pd.MultiIndex.from_tuples(full_index, names=("local_segment_start_datetimes", "minutes_after_segment_start"))
|
||||
phone_data_yield_per_segment = phone_data_yield_per_segment.drop_duplicates(subset=["local_segment_start_datetimes", "minutes_after_segment_start"], keep="first")
|
||||
phone_data_yield_per_segment = phone_data_yield_per_segment.set_index(["local_segment_start_datetimes", "minutes_after_segment_start"]).reindex(full_index).reset_index().fillna(0)
|
||||
|
||||
# transpose the dataframe per local start datetime of the segment and discard the useless index layer
|
||||
|
|
|
@ -22,7 +22,7 @@ output:
|
|||
</style>
|
||||
|
||||
```{r include=FALSE}
|
||||
source("renv/activate.R")
|
||||
source("/mnt/c/Users/junos/Documents/FWO-ARRS/Analysis/straw2analysis/rapids/renv/activate.R")
|
||||
```
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue