Merge commit 'c05b047c2d9452151553961928c846c01d7395bc'
commit
ce04394679
|
@ -95,7 +95,7 @@ if not participant_info.empty:
|
||||||
- limesurvey_demand.loc[rows_demand_reverse, "score_original"]
|
- limesurvey_demand.loc[rows_demand_reverse, "score_original"]
|
||||||
)
|
)
|
||||||
baseline_interim = pd.concat([baseline_interim, limesurvey_demand], axis=0, ignore_index=True)
|
baseline_interim = pd.concat([baseline_interim, limesurvey_demand], axis=0, ignore_index=True)
|
||||||
if "demand" in requested_features:
|
if "limesurvey_demand" in requested_features:
|
||||||
baseline_features.loc[0, "limesurvey_demand"] = limesurvey_demand[
|
baseline_features.loc[0, "limesurvey_demand"] = limesurvey_demand[
|
||||||
"score"
|
"score"
|
||||||
].sum()
|
].sum()
|
||||||
|
@ -136,9 +136,12 @@ if not participant_info.empty:
|
||||||
].sum()
|
].sum()
|
||||||
|
|
||||||
if "limesurvey_demand_control_ratio" in requested_features:
|
if "limesurvey_demand_control_ratio" in requested_features:
|
||||||
|
if limesurvey_control["score"].sum():
|
||||||
limesurvey_demand_control_ratio = (
|
limesurvey_demand_control_ratio = (
|
||||||
limesurvey_demand["score"].sum() / limesurvey_control["score"].sum()
|
limesurvey_demand["score"].sum() / limesurvey_control["score"].sum()
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
limesurvey_demand_control_ratio = 0
|
||||||
if (
|
if (
|
||||||
JCQ_NORMS[participant_info.loc[0, "gender"]][0]
|
JCQ_NORMS[participant_info.loc[0, "gender"]][0]
|
||||||
<= limesurvey_demand_control_ratio
|
<= limesurvey_demand_control_ratio
|
||||||
|
|
|
@ -44,11 +44,11 @@ rapids_cleaning <- function(sensor_data_files, provider){
|
||||||
|
|
||||||
# Drop columns with a percentage of NA values above cols_nan_threshold
|
# Drop columns with a percentage of NA values above cols_nan_threshold
|
||||||
if(nrow(clean_features))
|
if(nrow(clean_features))
|
||||||
clean_features <- clean_features %>% select_if(~ sum(is.na(.)) / length(.) <= cols_nan_threshold )
|
clean_features <- clean_features %>% select(where(~ sum(is.na(.)) / length(.) <= cols_nan_threshold ), starts_with("phone_esm"))
|
||||||
|
|
||||||
# Drop columns with zero variance
|
# Drop columns with zero variance
|
||||||
if(drop_zero_variance_columns)
|
if(drop_zero_variance_columns)
|
||||||
clean_features <- clean_features %>% select_if(grepl("pid|local_segment|local_segment_label|local_segment_start_datetime|local_segment_end_datetime",names(.)) | sapply(., n_distinct, na.rm = T) > 1)
|
clean_features <- clean_features %>% select_if(grepl("pid|local_segment|local_segment_label|local_segment_start_datetime|local_segment_end_datetime|phone_esm",names(.)) | sapply(., n_distinct, na.rm = T) > 1)
|
||||||
|
|
||||||
# Drop highly correlated features
|
# Drop highly correlated features
|
||||||
if(as.logical(drop_highly_correlated_features$COMPUTE)){
|
if(as.logical(drop_highly_correlated_features$COMPUTE)){
|
||||||
|
|
Loading…
Reference in New Issue