Add new daily segment and filter by segment in the cleaning script.
parent
de15a52dba
commit
e88bbd548f
|
@ -1,2 +1,3 @@
|
|||
label,start_time,length,repeats_on,repeats_value
|
||||
daily,04:00:00,23H 59M 59S,every_day,0
|
||||
working_day,04:00:00,18H 00M 00S,every_day,0
|
||||
|
|
|
|
@ -14,6 +14,8 @@ def straw_cleaning(sensor_data_files, provider, target):
|
|||
|
||||
features = pd.read_csv(sensor_data_files["sensor_data"][0])
|
||||
|
||||
features = features[features['local_segment_label'] == 'working_day'] # Filtriranje ustreznih časovnih segmentov
|
||||
|
||||
esm_cols = features.loc[:, features.columns.str.startswith('phone_esm_straw')] # Get target (esm) columns
|
||||
|
||||
with open('config.yaml', 'r') as stream:
|
||||
|
@ -160,11 +162,11 @@ def straw_cleaning(sensor_data_files, provider, target):
|
|||
# plt.savefig(f'correlation_matrix.png', bbox_inches='tight')
|
||||
# plt.close()
|
||||
|
||||
s = corr_matrix.unstack()
|
||||
so = s.sort_values(ascending=False)
|
||||
# s = corr_matrix.unstack()
|
||||
# so = s.sort_values(ascending=False)
|
||||
|
||||
pd.set_option('display.max_rows', None)
|
||||
sorted_upper = upper.unstack().sort_values(ascending=False)
|
||||
# pd.set_option('display.max_rows', None)
|
||||
# sorted_upper = upper.unstack().sort_values(ascending=False)
|
||||
# print(sorted_upper[sorted_upper > drop_corr_features["CORR_THRESHOLD"]])
|
||||
|
||||
features.drop(to_drop, axis=1, inplace=True)
|
||||
|
@ -196,7 +198,7 @@ def impute(df, method='zero'):
|
|||
'knn': k_nearest(df)
|
||||
}[method]
|
||||
|
||||
def graph_bf_af(features, phase_name, plt_flag=False):
|
||||
def graph_bf_af(features, phase_name, plt_flag=True):
|
||||
if plt_flag:
|
||||
sns.set(rc={"figure.figsize":(16, 8)})
|
||||
sns.heatmap(features.isna(), cbar=False) #features.select_dtypes(include=np.number)
|
||||
|
|
Loading…
Reference in New Issue