Add "corr_method" param to heatmap_features_correlations.py

pull/95/head
Meng Li 2020-07-22 20:00:14 -04:00
parent 6d31e6734d
commit b0354393ae
3 changed files with 4 additions and 2 deletions

View File

@ -208,6 +208,7 @@ HEATMAP_FEATURES_CORRELATIONS:
PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
CORR_THRESHOLD: 0.1
CORR_METHOD: "pearson" # choose from {"pearson", "kendall", "spearman"}
HISTOGRAM_VALID_SENSED_HOURS:
PLOT: False

View File

@ -4,7 +4,8 @@ rule heatmap_features_correlations:
phone_valid_sensed_days = expand("data/interim/{pid}/phone_valid_sensed_days_{{min_valid_hours_per_day}}h.csv", pid=config["PIDS"])
params:
min_rows_ratio = config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_ROWS_RATIO"],
corr_threshold = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_THRESHOLD"] #0.75
corr_threshold = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_THRESHOLD"],
corr_method = config["HEATMAP_FEATURES_CORRELATIONS"]["CORR_METHOD"]
output:
"reports/data_exploration/{min_valid_hours_per_day}h/heatmap_features_correlations.html"
script:

View File

@ -50,7 +50,7 @@ features = features.loc[features.index.intersection(selected_participants_and_da
# get correlation matrix
features = features.astype(float)
corr_matrix = features.corr(min_periods=min_rows_ratio * features.shape[0])
corr_matrix = features.corr(method=snakemake.params["corr_method"], min_periods=min_rows_ratio * features.shape[0])
# replace correlation coefficients less than corr_threshold with NA
corr_matrix[(corr_matrix > -corr_threshold) & (corr_matrix < corr_threshold)] = np.nan