import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler import sys sys.path.append('/rapids/') from src.features import cr_features_helper_methods as crhm pd.set_option("display.max_columns", None) features_win = pd.read_csv("data/interim/p031/empatica_temperature_features/empatica_temperature_python_cr_windows.csv", usecols=[0, 1, 2, 3, 4, 5]) # First standardization method excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime', "empatica_temperature_cr_level_1"] z1_windows = features_win.copy() z1_windows.loc[:, ~z1_windows.columns.isin(excluded_columns)] = StandardScaler().fit_transform(z1_windows.loc[:, ~z1_windows.columns.isin(excluded_columns)]) z1 = crhm.extract_second_order_features(z1_windows, ['mean', 'median', 'sd', 'nlargest', 'nsmallest', 'count_windows'], prefix="empatica_temperature_cr_") z1 = z1.iloc[:,4:] # print(z1) # Second standardization method so_features_reg = crhm.extract_second_order_features(features_win, ['mean', 'median', 'sd', 'nlargest', 'nsmallest', 'count_windows'], prefix="empatica_temperature_cr_") so_features_reg = so_features_reg.iloc[:,4:] z2 = pd.DataFrame(StandardScaler().fit_transform(so_features_reg), columns=so_features_reg.columns) # print(z2) # Standardization of the first standardization method values z1_z = pd.DataFrame(StandardScaler().fit_transform(z1), columns=z1.columns) # print(z1_z) # For SD fig, axs = plt.subplots(3, figsize=(8, 10)) axs[0].plot(z1['empatica_temperature_cr_squareSumOfComponent_X_SO_sd']) axs[0].set_title("Z1 - standardizirana okna, nato ekstrahiranje značilk SO") axs[1].plot(z2['empatica_temperature_cr_squareSumOfComponent_X_SO_sd']) axs[1].set_title("Z2 - ekstrahirane značilke SO 'normalnih' vrednosti, nato standardizacija") axs[2].plot(z1_z['empatica_temperature_cr_squareSumOfComponent_X_SO_sd']) axs[2].set_title("Standardiziran Z1") fig.suptitle('Z-Score methods for temperature_squareSumOfComponent_SO_sd') plt.savefig('z_score_comparison_temperature_squareSumOfComponent_X_SO_sd', bbox_inches='tight') showcase = pd.DataFrame() showcase['Z1__SD'] = z1['empatica_temperature_cr_squareSumOfComponent_X_SO_sd'] showcase['Z2__SD'] = z2['empatica_temperature_cr_squareSumOfComponent_X_SO_sd'] showcase['Z1__SD_STANDARDIZED'] = z1_z['empatica_temperature_cr_squareSumOfComponent_X_SO_sd'] print(showcase) # For fig, axs = plt.subplots(3, figsize=(8, 10)) axs[0].plot(z1['empatica_temperature_cr_squareSumOfComponent_X_SO_nlargest']) axs[0].set_title("Z1 - standardizirana okna, nato ekstrahiranje značilk SO") axs[1].plot(z2['empatica_temperature_cr_squareSumOfComponent_X_SO_nlargest']) axs[1].set_title("Z2") axs[2].plot(z1_z['empatica_temperature_cr_squareSumOfComponent_X_SO_nlargest']) axs[2].set_title("Standardized Z1") fig.suptitle('Z-Score methods for temperature_squareSumOfComponent_SO_nlargest') plt.savefig('z_score_comparison_temperature_squareSumOfComponent_X_SO_nlargest', bbox_inches='tight') showcase2 = pd.DataFrame() showcase2['Z1__nlargest'] = z1['empatica_temperature_cr_squareSumOfComponent_X_SO_nlargest'] showcase2['Z2__nlargest'] = z2['empatica_temperature_cr_squareSumOfComponent_X_SO_nlargest'] showcase2['Z1__nlargest_STANDARDIZED'] = z1_z['empatica_temperature_cr_squareSumOfComponent_X_SO_nlargest'] print(showcase2)