71 lines
3.3 KiB
Python
71 lines
3.3 KiB
Python
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
import matplotlib.pyplot as plt
|
||
|
from sklearn.preprocessing import StandardScaler
|
||
|
import sys
|
||
|
|
||
|
sys.path.append('/rapids/')
|
||
|
from src.features import cr_features_helper_methods as crhm
|
||
|
|
||
|
pd.set_option("display.max_columns", None)
|
||
|
features_win = pd.read_csv("data/interim/p031/empatica_temperature_features/empatica_temperature_python_cr_windows.csv", usecols=[0, 1, 2, 3, 4, 5])
|
||
|
|
||
|
# First standardization method
|
||
|
excluded_columns = ['local_segment', 'local_segment_label', 'local_segment_start_datetime', 'local_segment_end_datetime', "empatica_temperature_cr_level_1"]
|
||
|
z1_windows = features_win.copy()
|
||
|
z1_windows.loc[:, ~z1_windows.columns.isin(excluded_columns)] = StandardScaler().fit_transform(z1_windows.loc[:, ~z1_windows.columns.isin(excluded_columns)])
|
||
|
z1 = crhm.extract_second_order_features(z1_windows, ['mean', 'median', 'sd', 'nlargest', 'nsmallest', 'count_windows'], prefix="empatica_temperature_cr_")
|
||
|
z1 = z1.iloc[:,4:]
|
||
|
# print(z1)
|
||
|
|
||
|
# Second standardization method
|
||
|
so_features_reg = crhm.extract_second_order_features(features_win, ['mean', 'median', 'sd', 'nlargest', 'nsmallest', 'count_windows'], prefix="empatica_temperature_cr_")
|
||
|
so_features_reg = so_features_reg.iloc[:,4:]
|
||
|
z2 = pd.DataFrame(StandardScaler().fit_transform(so_features_reg), columns=so_features_reg.columns)
|
||
|
# print(z2)
|
||
|
|
||
|
# Standardization of the first standardization method values
|
||
|
z1_z = pd.DataFrame(StandardScaler().fit_transform(z1), columns=z1.columns)
|
||
|
# print(z1_z)
|
||
|
|
||
|
# For SD
|
||
|
fig, axs = plt.subplots(3, figsize=(8, 10))
|
||
|
axs[0].plot(z1['empatica_temperature_cr_squareSumOfComponent_X_SO_sd'])
|
||
|
axs[0].set_title("Z1 - standardizirana okna, nato ekstrahiranje značilk SO")
|
||
|
|
||
|
axs[1].plot(z2['empatica_temperature_cr_squareSumOfComponent_X_SO_sd'])
|
||
|
axs[1].set_title("Z2 - ekstrahirane značilke SO 'normalnih' vrednosti, nato standardizacija")
|
||
|
|
||
|
axs[2].plot(z1_z['empatica_temperature_cr_squareSumOfComponent_X_SO_sd'])
|
||
|
axs[2].set_title("Standardiziran Z1")
|
||
|
|
||
|
fig.suptitle('Z-Score methods for temperature_squareSumOfComponent_SO_sd')
|
||
|
plt.savefig('z_score_comparison_temperature_squareSumOfComponent_X_SO_sd', bbox_inches='tight')
|
||
|
|
||
|
showcase = pd.DataFrame()
|
||
|
showcase['Z1__SD'] = z1['empatica_temperature_cr_squareSumOfComponent_X_SO_sd']
|
||
|
showcase['Z2__SD'] = z2['empatica_temperature_cr_squareSumOfComponent_X_SO_sd']
|
||
|
showcase['Z1__SD_STANDARDIZED'] = z1_z['empatica_temperature_cr_squareSumOfComponent_X_SO_sd']
|
||
|
print(showcase)
|
||
|
|
||
|
# For
|
||
|
fig, axs = plt.subplots(3, figsize=(8, 10))
|
||
|
axs[0].plot(z1['empatica_temperature_cr_squareSumOfComponent_X_SO_nlargest'])
|
||
|
axs[0].set_title("Z1 - standardizirana okna, nato ekstrahiranje značilk SO")
|
||
|
|
||
|
axs[1].plot(z2['empatica_temperature_cr_squareSumOfComponent_X_SO_nlargest'])
|
||
|
axs[1].set_title("Z2")
|
||
|
|
||
|
axs[2].plot(z1_z['empatica_temperature_cr_squareSumOfComponent_X_SO_nlargest'])
|
||
|
axs[2].set_title("Standardized Z1")
|
||
|
|
||
|
fig.suptitle('Z-Score methods for temperature_squareSumOfComponent_SO_nlargest')
|
||
|
plt.savefig('z_score_comparison_temperature_squareSumOfComponent_X_SO_nlargest', bbox_inches='tight')
|
||
|
|
||
|
showcase2 = pd.DataFrame()
|
||
|
showcase2['Z1__nlargest'] = z1['empatica_temperature_cr_squareSumOfComponent_X_SO_nlargest']
|
||
|
showcase2['Z2__nlargest'] = z2['empatica_temperature_cr_squareSumOfComponent_X_SO_nlargest']
|
||
|
showcase2['Z1__nlargest_STANDARDIZED'] = z1_z['empatica_temperature_cr_squareSumOfComponent_X_SO_nlargest']
|
||
|
print(showcase2)
|
||
|
|