Compare commits
No commits in common. "e5cc02501f629c96641dfd1bcd1f7fcfd0d55462" and "4485c4c95e6eddad00fb6b5221d2946930394970" have entirely different histories.
e5cc02501f
...
4485c4c95e
|
@ -31,7 +31,6 @@ TIMEZONE:
|
||||||
SINGLE:
|
SINGLE:
|
||||||
TZCODE: Europe/Ljubljana
|
TZCODE: Europe/Ljubljana
|
||||||
MULTIPLE:
|
MULTIPLE:
|
||||||
TZ_FILE: data/external/timezone.csv
|
|
||||||
TZCODES_FILE: data/external/multiple_timezones.csv
|
TZCODES_FILE: data/external/multiple_timezones.csv
|
||||||
IF_MISSING_TZCODE: USE_DEFAULT
|
IF_MISSING_TZCODE: USE_DEFAULT
|
||||||
DEFAULT_TZCODE: Europe/Ljubljana
|
DEFAULT_TZCODE: Europe/Ljubljana
|
||||||
|
@ -111,7 +110,6 @@ PHONE_APPLICATIONS_FOREGROUND:
|
||||||
APPLICATION_CATEGORIES:
|
APPLICATION_CATEGORIES:
|
||||||
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
CATALOGUE_SOURCE: FILE # FILE (genres are read from CATALOGUE_FILE) or GOOGLE (genres are scrapped from the Play Store)
|
||||||
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
CATALOGUE_FILE: "data/external/stachl_application_genre_catalogue.csv"
|
||||||
PACKAGE_NAMES_HASHED: True
|
|
||||||
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
UPDATE_CATALOGUE_FILE: False # if CATALOGUE_SOURCE is equal to FILE, whether or not to update CATALOGUE_FILE, if CATALOGUE_SOURCE is equal to GOOGLE all scraped genres will be saved to CATALOGUE_FILE
|
||||||
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
|
|
|
@ -1,33 +0,0 @@
|
||||||
Warning: 1241 parsing failures.
|
|
||||||
row col expected actual file
|
|
||||||
1 is_system_app an integer TRUE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
|
||||||
2 is_system_app an integer FALSE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
|
||||||
3 is_system_app an integer TRUE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
|
||||||
4 is_system_app an integer TRUE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
|
||||||
5 is_system_app an integer TRUE 'data/raw/p011/phone_applications_foreground_with_datetime_with_categories.csv'
|
|
||||||
... ............. .......... ...... ...............................................................................
|
|
||||||
See problems(...) for more details.
|
|
||||||
|
|
||||||
Warning message:
|
|
||||||
The following named parsers don't match the column names: application_name
|
|
||||||
Error: Problem with `filter()` input `..1`.
|
|
||||||
✖ object 'application_name' not found
|
|
||||||
ℹ Input `..1` is `!is.na(application_name)`.
|
|
||||||
Backtrace:
|
|
||||||
█
|
|
||||||
1. ├─`%>%`(...)
|
|
||||||
2. ├─dplyr::mutate(...)
|
|
||||||
3. ├─utils::head(., -1)
|
|
||||||
4. ├─dplyr::select(., -c("timestamp"))
|
|
||||||
5. ├─dplyr::filter(., !is.na(application_name))
|
|
||||||
6. ├─dplyr:::filter.data.frame(., !is.na(application_name))
|
|
||||||
7. │ └─dplyr:::filter_rows(.data, ...)
|
|
||||||
8. │ ├─base::withCallingHandlers(...)
|
|
||||||
9. │ └─mask$eval_all_filter(dots, env_filter)
|
|
||||||
10. └─base::.handleSimpleError(...)
|
|
||||||
11. └─dplyr:::h(simpleError(msg, call))
|
|
||||||
Execution halted
|
|
||||||
[Mon Dec 13 17:19:06 2021]
|
|
||||||
Error in rule app_episodes:
|
|
||||||
jobid: 54
|
|
||||||
output: data/interim/p011/phone_app_episodes.csv
|
|
|
@ -1,5 +0,0 @@
|
||||||
Warning message:
|
|
||||||
In barnett_daily_features(snakemake) :
|
|
||||||
Barnett's location features cannot be computed for data or time segments that do not span one or more entire days (00:00:00 to 23:59:59). Values below point to the problem:
|
|
||||||
Location data rows within a daily time segment: 0
|
|
||||||
Location data time span in days: 398.6
|
|
|
@ -114,16 +114,7 @@ def input_tzcodes_file(wilcards):
|
||||||
if not config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"].lower().endswith(".csv"):
|
if not config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"].lower().endswith(".csv"):
|
||||||
raise ValueError("[TIMEZONE][MULTIPLE][TZCODES_FILE] should point to a CSV file, instead you typed: " + config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"])
|
raise ValueError("[TIMEZONE][MULTIPLE][TZCODES_FILE] should point to a CSV file, instead you typed: " + config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"])
|
||||||
if not Path(config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]).exists():
|
if not Path(config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]).exists():
|
||||||
try:
|
raise ValueError("[TIMEZONE][MULTIPLE][TZCODES_FILE] should point to a CSV file, the file in the path you typed does not exist: " + config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"])
|
||||||
config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
|
||||||
except KeyError:
|
|
||||||
raise ValueError("To create TZCODES_FILE, a list of timezones should be created " +
|
|
||||||
"with the rule preprocessing.smk/prepare_tzcodes_file " +
|
|
||||||
"which will create a file specified as config['TIMEZONE']['MULTIPLE']['TZ_FILE']." +
|
|
||||||
"\n An alternative is to provide the file manually:" +
|
|
||||||
"[TIMEZONE][MULTIPLE][TZCODES_FILE] should point to a CSV file," +
|
|
||||||
"but the file in the path you typed does not exist: " +
|
|
||||||
config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"])
|
|
||||||
return [config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]]
|
return [config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]]
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
|
@ -9,13 +9,13 @@ rule query_usernames_device_empatica_ids:
|
||||||
baseline_folder = "/mnt/e/STRAWbaseline/"
|
baseline_folder = "/mnt/e/STRAWbaseline/"
|
||||||
output:
|
output:
|
||||||
usernames_file = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"],
|
usernames_file = config["CREATE_PARTICIPANT_FILES"]["USERNAMES_CSV"],
|
||||||
timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
timezone_file = "data/external/timezone.csv"
|
||||||
script:
|
script:
|
||||||
"../../participants/prepare_usernames_file.py"
|
"../../participants/prepare_usernames_file.py"
|
||||||
|
|
||||||
rule prepare_tzcodes_file:
|
rule prepare_tzcodes_file:
|
||||||
input:
|
input:
|
||||||
timezone_file = config["TIMEZONE"]["MULTIPLE"]["TZ_FILE"]
|
timezone_file = "data/external/timezone.csv"
|
||||||
output:
|
output:
|
||||||
tzcodes_file = config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]
|
tzcodes_file = config["TIMEZONE"]["MULTIPLE"]["TZCODES_FILE"]
|
||||||
script:
|
script:
|
||||||
|
|
|
@ -29,7 +29,6 @@ get_genre <- function(apps){
|
||||||
apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F)
|
apps <- read.csv(snakemake@input[[1]], stringsAsFactors = F)
|
||||||
genre_catalogue <- data.frame()
|
genre_catalogue <- data.frame()
|
||||||
catalogue_source <- snakemake@params[["catalogue_source"]]
|
catalogue_source <- snakemake@params[["catalogue_source"]]
|
||||||
package_names_hashed <- snakemake@params[["package_names_hashed"]]
|
|
||||||
update_catalogue_file <- snakemake@params[["update_catalogue_file"]]
|
update_catalogue_file <- snakemake@params[["update_catalogue_file"]]
|
||||||
scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]]
|
scrape_missing_genres <- snakemake@params[["scrape_missing_genres"]]
|
||||||
apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre"))))
|
apps_with_genre <- data.frame(matrix(ncol=length(colnames(apps)) + 1,nrow=0, dimnames=list(NULL, c(colnames(apps), "genre"))))
|
||||||
|
@ -39,11 +38,7 @@ if(nrow(apps) > 0){
|
||||||
apps_with_genre <- apps %>% mutate(genre = NA_character_)
|
apps_with_genre <- apps %>% mutate(genre = NA_character_)
|
||||||
} else if(catalogue_source == "FILE"){
|
} else if(catalogue_source == "FILE"){
|
||||||
genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character"))
|
genre_catalogue <- read.csv(snakemake@params[["catalogue_file"]], colClasses = c("character", "character"))
|
||||||
if (package_names_hashed) {
|
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
|
||||||
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_hash")
|
|
||||||
} else {
|
|
||||||
apps_with_genre <- left_join(apps, genre_catalogue, by = "package_name")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){
|
if(catalogue_source == "GOOGLE" || (catalogue_source == "FILE" && scrape_missing_genres)){
|
||||||
|
|
|
@ -67,12 +67,10 @@ PHONE_APPLICATIONS_FOREGROUND:
|
||||||
TIMESTAMP: timestamp
|
TIMESTAMP: timestamp
|
||||||
DEVICE_ID: device_id
|
DEVICE_ID: device_id
|
||||||
PACKAGE_NAME: package_hash
|
PACKAGE_NAME: package_hash
|
||||||
APPLICATION_NAME: FLAG_TO_MUTATE
|
|
||||||
IS_SYSTEM_APP: is_system_app
|
IS_SYSTEM_APP: is_system_app
|
||||||
MUTATION:
|
MUTATION:
|
||||||
COLUMN_MAPPINGS:
|
COLUMN_MAPPINGS:
|
||||||
SCRIPTS:
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
- src/data/streams/mutations/phone/straw/app_add_name.py
|
|
||||||
|
|
||||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||||
ANDROID:
|
ANDROID:
|
||||||
|
@ -80,13 +78,11 @@ PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||||
TIMESTAMP: timestamp
|
TIMESTAMP: timestamp
|
||||||
DEVICE_ID: device_id
|
DEVICE_ID: device_id
|
||||||
PACKAGE_NAME: package_hash
|
PACKAGE_NAME: package_hash
|
||||||
APPLICATION_NAME: FLAG_TO_MUTATE
|
|
||||||
SOUND: sound
|
SOUND: sound
|
||||||
VIBRATE: vibrate
|
VIBRATE: vibrate
|
||||||
MUTATION:
|
MUTATION:
|
||||||
COLUMN_MAPPINGS:
|
COLUMN_MAPPINGS:
|
||||||
SCRIPTS:
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
- src/data/streams/mutations/phone/straw/app_add_name.py
|
|
||||||
|
|
||||||
PHONE_BATTERY:
|
PHONE_BATTERY:
|
||||||
ANDROID:
|
ANDROID:
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
def main(data, stream_parameters):
|
|
||||||
data["application_name"] = "hashed"
|
|
||||||
return(data)
|
|
|
@ -27,14 +27,12 @@ PHONE_APPLICATIONS_FOREGROUND:
|
||||||
- TIMESTAMP
|
- TIMESTAMP
|
||||||
- DEVICE_ID
|
- DEVICE_ID
|
||||||
- PACKAGE_NAME
|
- PACKAGE_NAME
|
||||||
- APPLICATION_NAME
|
|
||||||
- IS_SYSTEM_APP
|
- IS_SYSTEM_APP
|
||||||
|
|
||||||
PHONE_APPLICATIONS_NOTIFICATIONS:
|
PHONE_APPLICATIONS_NOTIFICATIONS:
|
||||||
- TIMESTAMP
|
- TIMESTAMP
|
||||||
- DEVICE_ID
|
- DEVICE_ID
|
||||||
- PACKAGE_NAME
|
- PACKAGE_NAME
|
||||||
- APPLICATION_NAME
|
|
||||||
- SOUND
|
- SOUND
|
||||||
- VIBRATE
|
- VIBRATE
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,6 @@ def getDataForPlot(phone_data_yield_per_segment):
|
||||||
for columns in columns_for_full_index:
|
for columns in columns_for_full_index:
|
||||||
full_index = full_index + columns
|
full_index = full_index + columns
|
||||||
full_index = pd.MultiIndex.from_tuples(full_index, names=("local_segment_start_datetimes", "minutes_after_segment_start"))
|
full_index = pd.MultiIndex.from_tuples(full_index, names=("local_segment_start_datetimes", "minutes_after_segment_start"))
|
||||||
phone_data_yield_per_segment = phone_data_yield_per_segment.drop_duplicates(subset=["local_segment_start_datetimes", "minutes_after_segment_start"], keep="first")
|
|
||||||
phone_data_yield_per_segment = phone_data_yield_per_segment.set_index(["local_segment_start_datetimes", "minutes_after_segment_start"]).reindex(full_index).reset_index().fillna(0)
|
phone_data_yield_per_segment = phone_data_yield_per_segment.set_index(["local_segment_start_datetimes", "minutes_after_segment_start"]).reindex(full_index).reset_index().fillna(0)
|
||||||
|
|
||||||
# transpose the dataframe per local start datetime of the segment and discard the useless index layer
|
# transpose the dataframe per local start datetime of the segment and discard the useless index layer
|
||||||
|
|
|
@ -22,7 +22,7 @@ output:
|
||||||
</style>
|
</style>
|
||||||
|
|
||||||
```{r include=FALSE}
|
```{r include=FALSE}
|
||||||
source("/mnt/c/Users/junos/Documents/FWO-ARRS/Analysis/straw2analysis/rapids/renv/activate.R")
|
source("renv/activate.R")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue