diff --git a/src/features/applications_foreground_features.py b/src/features/applications_foreground_features.py deleted file mode 100644 index 9863381c..00000000 --- a/src/features/applications_foreground_features.py +++ /dev/null @@ -1,36 +0,0 @@ -import pandas as pd -from applications_foreground.applications_foreground_base import base_applications_foreground_features - -apps_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"], encoding="ISO-8859-1") -day_segment = snakemake.params["day_segment"] -single_categories = snakemake.params["single_categories"] -multiple_categories_with_genres = snakemake.params["multiple_categories"] -single_apps = snakemake.params["single_apps"] -excluded_categories = snakemake.params["excluded_categories"] -excluded_apps = snakemake.params["excluded_apps"] -requested_features = snakemake.params["features"] -apps_features = pd.DataFrame(columns=["local_date"]) - -single_categories = list(set(single_categories) - set(excluded_categories)) -multiple_categories = list(multiple_categories_with_genres.keys() - set(excluded_categories)) -apps = list(set(single_apps) - set(excluded_apps)) -type_count = len(single_categories) + len(multiple_categories) + len(apps) - -params = {} -params["multiple_categories_with_genres"] = multiple_categories_with_genres -params["single_categories"] = single_categories -params["multiple_categories"] = multiple_categories -params["apps"] = apps - -# exclude categories in the excluded_categories list -if "system_apps" in excluded_categories: - apps_data = apps_data[apps_data["is_system_app"] == 0] -apps_data = apps_data[~apps_data["genre"].isin(excluded_categories)] -# exclude apps in the excluded_apps list -apps_data = apps_data[~apps_data["package_name"].isin(excluded_apps)] - -apps_features = apps_features.merge(base_applications_foreground_features(apps_data, day_segment, requested_features, params), on="local_date", how="outer") - -assert len(requested_features) * type_count + 1 == apps_features.shape[1], "The number of features in the output dataframe (=" + str(apps_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your application foreground feature extraction functions" - -apps_features.to_csv(snakemake.output[0], index=False) diff --git a/src/features/bluetooth_features.R b/src/features/bluetooth_features.R deleted file mode 100644 index de072a29..00000000 --- a/src/features/bluetooth_features.R +++ /dev/null @@ -1,20 +0,0 @@ -source("renv/activate.R") -source("src/features/bluetooth/bluetooth_base.R") -library(dplyr) -library(tidyr) - -bluetooth_data <- read.csv(snakemake@input[[1]], stringsAsFactors = FALSE) -day_segments <- read.csv(snakemake@input[["day_segments"]], stringsAsFactors = FALSE) -requested_features <- snakemake@params[["features"]] -features = data.frame(local_date = character(), stringsAsFactors = FALSE) - -day_segments <- day_segments %>% distinct(label) %>% pull(label) -# Compute base bluetooth features -for (day_segment in day_segments) - features <- merge(features, base_bluetooth_features(bluetooth_data, day_segment, requested_features), by="local_date", all = TRUE) - -if(ncol(features) != (length(requested_features)) * length(day_segments) + 1) - stop(paste0("The number of features in the output dataframe (=", ncol(features),") does not match the expected value (=", length(requested_features)," + 1). Verify your bluetooth feature extraction functions")) - - -write.csv(features, snakemake@output[[1]], row.names = FALSE) \ No newline at end of file diff --git a/src/features/build_features.py b/src/features/build_features.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/features/call_features.R b/src/features/call_features.R deleted file mode 100644 index 8392809e..00000000 --- a/src/features/call_features.R +++ /dev/null @@ -1,23 +0,0 @@ -source("renv/activate.R") -source("src/features/call/call_base.R") -library(dplyr) - -calls <- read.csv(snakemake@input[[1]], stringsAsFactors = FALSE) -day_segments_labels <- read.csv(snakemake@input[["day_segments_labels"]]) -requested_features <- snakemake@params[["features"]] -call_type <- snakemake@params[["call_type"]] -features = data.frame(local_segment = character(), stringsAsFactors = FALSE) - -day_segments <- day_segments_labels %>% pull(label) -for (day_segment in day_segments) - features <- merge(features, base_call_features(calls, call_type, day_segment, requested_features), all = TRUE) - -if(ncol(features) != length(requested_features) + 1) - stop(paste0("The number of features in the output dataframe (=", ncol(features),") does not match the expected value (=", length(requested_features)," + 1). Verify your Call feature extraction functions")) - -features <- features %>% separate(col = local_segment, - into = c("local_segment_label", "local_start_date", "local_start_time", "local_end_date", "local_end_time"), - sep = "#", - remove = FALSE) - -write.csv(features, snakemake@output[[1]], row.names = FALSE) diff --git a/src/features/conversation_features.py b/src/features/conversation_features.py deleted file mode 100644 index 3b0f6b29..00000000 --- a/src/features/conversation_features.py +++ /dev/null @@ -1,15 +0,0 @@ -import pandas as pd -from conversation.conversation_base import base_conversation_features - -conversation_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"]) -day_segment = snakemake.params["day_segment"] -requested_features = snakemake.params["features"] -recordingMinutes = snakemake.params["recordingMinutes"] -pausedMinutes = snakemake.params["pausedMinutes"] -expectedMinutes = 1440 / (recordingMinutes + pausedMinutes) -conversation_features = pd.DataFrame(columns=["local_date"]) - -conversation_features = conversation_features.merge(base_conversation_features(conversation_data, day_segment, requested_features,recordingMinutes,pausedMinutes,expectedMinutes), on="local_date", how="outer") -assert len(requested_features) + 1 == conversation_features.shape[1], "The number of features in the output dataframe (=" + str(conversation_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your conversation feature extraction functions" - -conversation_features.to_csv(snakemake.output[0], index=False) \ No newline at end of file diff --git a/src/features/light_features.py b/src/features/light_features.py deleted file mode 100644 index fd6f06ee..00000000 --- a/src/features/light_features.py +++ /dev/null @@ -1,13 +0,0 @@ -import pandas as pd -from light.light_base import base_light_features - -light_data = pd.read_csv(snakemake.input[0], parse_dates=["local_date_time", "local_date"]) -day_segment = snakemake.params["day_segment"] -requested_features = snakemake.params["features"] -light_features = pd.DataFrame(columns=["local_date"]) - -light_features = light_features.merge(base_light_features(light_data, day_segment, requested_features), on="local_date", how="outer") - -assert len(requested_features) + 1 == light_features.shape[1], "The number of features in the output dataframe (=" + str(light_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your light feature extraction functions" - -light_features.to_csv(snakemake.output[0], index=False) \ No newline at end of file diff --git a/src/features/messages_features.R b/src/features/messages_features.R deleted file mode 100644 index 4602aa99..00000000 --- a/src/features/messages_features.R +++ /dev/null @@ -1,26 +0,0 @@ -# If you want to implement extra features, source(..) a new file and duplicate the line "features <- merge(...)", then -# swap base_sms_features(...) for your own function - -source("renv/activate.R") -source("src/features/messages/messages_base.R") -library("dplyr", warn.conflicts = FALSE) - -messages <- read.csv(snakemake@input[[1]]) -day_segments_labels <- read.csv(snakemake@input[["day_segments_labels"]]) -requested_features <- snakemake@params[["features"]] -messages_type <- snakemake@params[["messages_type"]] -features <- data.frame(local_segment = character(), stringsAsFactors = FALSE) - -day_segments <- day_segments_labels %>% pull(label) -for (day_segment in day_segments) - features <- merge(features, base_messages_features(messages, messages_type, day_segment, requested_features), all = TRUE) - -if(ncol(features) != length(requested_features) + 1) - stop(paste0("The number of features in the output dataframe (=", ncol(features),") does not match the expected value (=", length(requested_features)," + 1). Verify your Messages (SMS) feature extraction functions")) - -features <- features %>% separate(col = local_segment, - into = c("local_segment_label", "local_start_date", "local_start_time", "local_end_date", "local_end_time"), - sep = "#", - remove = FALSE) - -write.csv(features, snakemake@output[[1]], row.names = FALSE) diff --git a/src/features/wifi_features.R b/src/features/wifi_features.R deleted file mode 100644 index 864ca5b9..00000000 --- a/src/features/wifi_features.R +++ /dev/null @@ -1,33 +0,0 @@ -source("renv/activate.R") -source("src/features/wifi/wifi_base.R") -library("dplyr") - -if(!is.null(snakemake@input[["visible_access_points"]]) && is.null(snakemake@input[["connected_access_points"]])){ - wifi_data <- read.csv(snakemake@input[["visible_access_points"]], stringsAsFactors = FALSE) - wifi_data <- wifi_data %>% mutate(connected = 0) -} else if(is.null(snakemake@input[["visible_access_points"]]) && !is.null(snakemake@input[["connected_access_points"]])){ - wifi_data <- read.csv(snakemake@input[["connected_access_points"]], stringsAsFactors = FALSE) - wifi_data <- wifi_data %>% mutate(connected = 1) -} else if(!is.null(snakemake@input[["visible_access_points"]]) && !is.null(snakemake@input[["connected_access_points"]])){ - visible_access_points <- read.csv(snakemake@input[["visible_access_points"]], stringsAsFactors = FALSE) - visible_access_points <- visible_access_points %>% mutate(connected = 0) - connected_access_points <- read.csv(snakemake@input[["connected_access_points"]], stringsAsFactors = FALSE) - connected_access_points <- connected_access_points %>% mutate(connected = 1) - wifi_data <- bind_rows(visible_access_points, connected_access_points) %>% arrange(timestamp) -} - -wifi_data <- read.csv(snakemake@input[[1]], stringsAsFactors = FALSE) -day_segments <- read.csv(snakemake@input[["day_segments"]]) -requested_features <- snakemake@params[["features"]] -features = data.frame(local_date = character(), stringsAsFactors = FALSE) - - -day_segments <- day_segments %>% distinct(label) %>% pull(label) -# Compute base wifi features -for (day_segment in day_segments) - features <- merge(features, base_wifi_features(wifi_data, day_segment, requested_features), by="local_date", all = TRUE) - -if(ncol(features) != (length(requested_features)) * length(day_segments) + 1) - stop(paste0("The number of features in the output dataframe (=", ncol(features),") does not match the expected value (=", length(requested_features)," + 1). Verify your wifi feature extraction functions")) - -write.csv(features, snakemake@output[[1]], row.names = FALSE)