Merge branch 'imputation_and_cleaning' of https://repo.ijs.si/junoslukan/rapids into imputation_and_cleaning

imputation_and_cleaning
Primoz 2022-10-25 15:28:27 +00:00
commit 0d143e6aad
2 changed files with 1 additions and 11 deletions

View File

@ -5,15 +5,11 @@ options(scipen=999)
assign_rows_to_segments <- function(data, segments){ assign_rows_to_segments <- function(data, segments){
# This function is used by all segment types, we use data.tables because they are fast # This function is used by all segment types, we use data.tables because they are fast
print(nrow(data))
print(ncol(data))
data <- data.table::as.data.table(data) data <- data.table::as.data.table(data)
data[, assigned_segments := ""] data[, assigned_segments := ""]
for(i in seq_len(nrow(segments))) { for(i in seq_len(nrow(segments))) {
segment <- segments[i,] segment <- segments[i,]
print(segment)
print(data[segment$segment_start_ts<= timestamp & segment$segment_end_ts >= timestamp])
data[segment$segment_start_ts<= timestamp & segment$segment_end_ts >= timestamp, data[segment$segment_start_ts<= timestamp & segment$segment_end_ts >= timestamp,
assigned_segments := stringi::stri_c(assigned_segments, segment$segment_id, sep = "|")] assigned_segments := stringi::stri_c(assigned_segments, segment$segment_id, sep = "|")]
@ -21,12 +17,6 @@ assign_rows_to_segments <- function(data, segments){
data[,assigned_segments:=substring(assigned_segments, 2)] data[,assigned_segments:=substring(assigned_segments, 2)]
data data
test <- # print multiple columns
data %>%
dplyr::filter(is.na(assigned_segments))
test %>% as_tibble() %>% print(n=50)
} }
assign_to_time_segment <- function(sensor_data, time_segments, time_segments_type, include_past_periodic_segments, most_common_tz){ assign_to_time_segment <- function(sensor_data, time_segments, time_segments_type, include_past_periodic_segments, most_common_tz){

View File

@ -192,7 +192,7 @@ def straw_cleaning(sensor_data_files, provider, target):
if esm not in features: if esm not in features:
features[esm] = esm_cols[esm] features[esm] = esm_cols[esm]
graph_bf_af(features, "11correlation_drop") graph_bf_af(features, "10correlation_drop")
# (10) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME # (10) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
if features.isna().any().any(): if features.isna().any().any():