diff --git a/config.yaml b/config.yaml index c294f9a4..a11034a2 100644 --- a/config.yaml +++ b/config.yaml @@ -208,6 +208,7 @@ HEATMAP_FEATURES_CORRELATIONS: PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen] FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep] CORR_THRESHOLD: 0.1 + CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"} HISTOGRAM_VALID_SENSED_HOURS: PLOT: False diff --git a/rules/reports.snakefile b/rules/reports.snakefile index 54d6fd36..da9a0809 100644 --- a/rules/reports.snakefile +++ b/rules/reports.snakefile @@ -4,7 +4,8 @@ rule heatmap_features_correlations: phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"]) params: min_rows_ratio = config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_ROWS_RATIO"], - corr_threshold = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_THRESHOLD"] #0.75 + corr_threshold = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_THRESHOLD"], + corr_method = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_METHOD"] output: "reports/data_exploration/{min_valid_hours_per_day}h/heatmap_features_correlations.html" script: diff --git a/src/visualization/heatmap_features_correlations.py b/src/visualization/heatmap_features_correlations.py index c8793765..8093a9db 100644 --- a/src/visualization/heatmap_features_correlations.py +++ b/src/visualization/heatmap_features_correlations.py @@ -50,7 +50,7 @@ features = features.loc[features.index.intersection(selected_participants_and_da # get correlation matrix features = features.astype(float) -corr_matrix = features.corr(min_periods=min_rows_ratio * features.shape[0]) +corr_matrix = features.corr(method=snakemake.params["corr_method"], min_periods=min_rows_ratio * features.shape[0]) # replace correlation coefficients less than corr_threshold with NA corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan