Add new daily segment and filter by segment in the cleaning script.

notes
Primoz 2022-10-18 09:15:00 +00:00
parent de15a52dba
commit e88bbd548f
2 changed files with 9 additions and 6 deletions

View File

@ -1,2 +1,3 @@
label,start_time,length,repeats_on,repeats_value label,start_time,length,repeats_on,repeats_value
daily,04:00:00,23H 59M 59S,every_day,0 daily,04:00:00,23H 59M 59S,every_day,0
working_day,04:00:00,18H 00M 00S,every_day,0

1 label start_time length repeats_on repeats_value
2 daily 04:00:00 23H 59M 59S every_day 0
3 working_day 04:00:00 18H 00M 00S every_day 0

View File

@ -11,8 +11,10 @@ sys.path.append('/rapids/')
from src.features import empatica_data_yield as edy from src.features import empatica_data_yield as edy
def straw_cleaning(sensor_data_files, provider, target): def straw_cleaning(sensor_data_files, provider, target):
features = pd.read_csv(sensor_data_files["sensor_data"][0]) features = pd.read_csv(sensor_data_files["sensor_data"][0])
features = features[features['local_segment_label'] == 'working_day'] # Filtriranje ustreznih časovnih segmentov
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns
@ -160,11 +162,11 @@ def straw_cleaning(sensor_data_files, provider, target):
# plt.savefig(f'correlation_matrix.png', bbox_inches='tight') # plt.savefig(f'correlation_matrix.png', bbox_inches='tight')
# plt.close() # plt.close()
s = corr_matrix.unstack() # s = corr_matrix.unstack()
so = s.sort_values(ascending=False) # so = s.sort_values(ascending=False)
pd.set_option('display.max_rows', None) # pd.set_option('display.max_rows', None)
sorted_upper = upper.unstack().sort_values(ascending=False) # sorted_upper = upper.unstack().sort_values(ascending=False)
# print(sorted_upper[sorted_upper > drop_corr_features["CORR_THRESHOLD"]]) # print(sorted_upper[sorted_upper > drop_corr_features["CORR_THRESHOLD"]])
features.drop(to_drop, axis=1, inplace=True) features.drop(to_drop, axis=1, inplace=True)
@ -196,7 +198,7 @@ def impute(df, method='zero'):
'knn': k_nearest(df) 'knn': k_nearest(df)
}[method] }[method]
def graph_bf_af(features, phase_name, plt_flag=False): def graph_bf_af(features, phase_name, plt_flag=True):
if plt_flag: if plt_flag:
sns.set(rc={"figure.figsize":(16, 8)}) sns.set(rc={"figure.figsize":(16, 8)})
sns.heatmap(features.isna(), cbar=False) #features.select_dtypes(include=np.number) sns.heatmap(features.isna(), cbar=False) #features.select_dtypes(include=np.number)