From e88bbd548fcd2bb1f9057b27b572afa5a6d028a5 Mon Sep 17 00:00:00 2001 From: Primoz Date: Tue, 18 Oct 2022 09:15:00 +0000 Subject: [PATCH] Add new daily segment and filter by segment in the cleaning script. --- data/external/timesegments_daily.csv | 1 + src/features/all_cleaning_overall/straw/main.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/data/external/timesegments_daily.csv b/data/external/timesegments_daily.csv index 605a4a53..183245b9 100644 --- a/data/external/timesegments_daily.csv +++ b/data/external/timesegments_daily.csv @@ -1,2 +1,3 @@ label,start_time,length,repeats_on,repeats_value daily,04:00:00,23H 59M 59S,every_day,0 +working_day,04:00:00,18H 00M 00S,every_day,0 diff --git a/src/features/all_cleaning_overall/straw/main.py b/src/features/all_cleaning_overall/straw/main.py index 40e346cc..afa371f7 100644 --- a/src/features/all_cleaning_overall/straw/main.py +++ b/src/features/all_cleaning_overall/straw/main.py @@ -11,8 +11,10 @@ sys.path.append('/rapids/') from src.features import empatica_data_yield as edy def straw_cleaning(sensor_data_files, provider, target): - + features = pd.read_csv(sensor_data_files["sensor_data"][0]) + + features = features[features['local_segment_label'] == 'working_day'] # Filtriranje ustreznih Ĩasovnih segmentov esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns @@ -160,11 +162,11 @@ def straw_cleaning(sensor_data_files, provider, target): # plt.savefig(f'correlation_matrix.png', bbox_inches='tight') # plt.close() - s = corr_matrix.unstack() - so = s.sort_values(ascending=False) + # s = corr_matrix.unstack() + # so = s.sort_values(ascending=False) - pd.set_option('display.max_rows', None) - sorted_upper = upper.unstack().sort_values(ascending=False) + # pd.set_option('display.max_rows', None) + # sorted_upper = upper.unstack().sort_values(ascending=False) # print(sorted_upper[sorted_upper > drop_corr_features["CORR_THRESHOLD"]]) features.drop(to_drop, axis=1, inplace=True) @@ -196,7 +198,7 @@ def impute(df, method='zero'): 'knn': k_nearest(df) }[method] -def graph_bf_af(features, phase_name, plt_flag=False): +def graph_bf_af(features, phase_name, plt_flag=True): if plt_flag: sns.set(rc={"figure.figsize":(16, 8)}) sns.heatmap(features.isna(), cbar=False) #features.select_dtypes(include=np.number)