Merge branch 'imputation_and_cleaning' of https://repo.ijs.si/junoslukan/rapids into imputation_and_cleaning
commit
0d143e6aad
|
@ -5,15 +5,11 @@ options(scipen=999)
|
||||||
|
|
||||||
assign_rows_to_segments <- function(data, segments){
|
assign_rows_to_segments <- function(data, segments){
|
||||||
# This function is used by all segment types, we use data.tables because they are fast
|
# This function is used by all segment types, we use data.tables because they are fast
|
||||||
print(nrow(data))
|
|
||||||
print(ncol(data))
|
|
||||||
|
|
||||||
data <- data.table::as.data.table(data)
|
data <- data.table::as.data.table(data)
|
||||||
data[, assigned_segments := ""]
|
data[, assigned_segments := ""]
|
||||||
for(i in seq_len(nrow(segments))) {
|
for(i in seq_len(nrow(segments))) {
|
||||||
segment <- segments[i,]
|
segment <- segments[i,]
|
||||||
print(segment)
|
|
||||||
print(data[segment$segment_start_ts<= timestamp & segment$segment_end_ts >= timestamp])
|
|
||||||
|
|
||||||
data[segment$segment_start_ts<= timestamp & segment$segment_end_ts >= timestamp,
|
data[segment$segment_start_ts<= timestamp & segment$segment_end_ts >= timestamp,
|
||||||
assigned_segments := stringi::stri_c(assigned_segments, segment$segment_id, sep = "|")]
|
assigned_segments := stringi::stri_c(assigned_segments, segment$segment_id, sep = "|")]
|
||||||
|
@ -21,12 +17,6 @@ assign_rows_to_segments <- function(data, segments){
|
||||||
|
|
||||||
data[,assigned_segments:=substring(assigned_segments, 2)]
|
data[,assigned_segments:=substring(assigned_segments, 2)]
|
||||||
data
|
data
|
||||||
|
|
||||||
test <- # print multiple columns
|
|
||||||
data %>%
|
|
||||||
dplyr::filter(is.na(assigned_segments))
|
|
||||||
|
|
||||||
test %>% as_tibble() %>% print(n=50)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
assign_to_time_segment <- function(sensor_data, time_segments, time_segments_type, include_past_periodic_segments, most_common_tz){
|
assign_to_time_segment <- function(sensor_data, time_segments, time_segments_type, include_past_periodic_segments, most_common_tz){
|
||||||
|
|
|
@ -192,7 +192,7 @@ def straw_cleaning(sensor_data_files, provider, target):
|
||||||
if esm not in features:
|
if esm not in features:
|
||||||
features[esm] = esm_cols[esm]
|
features[esm] = esm_cols[esm]
|
||||||
|
|
||||||
graph_bf_af(features, "11correlation_drop")
|
graph_bf_af(features, "10correlation_drop")
|
||||||
|
|
||||||
# (10) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
|
# (10) VERIFY IF THERE ARE ANY NANS LEFT IN THE DATAFRAME
|
||||||
if features.isna().any().any():
|
if features.isna().any().any():
|
||||||
|
|
Loading…
Reference in New Issue