2022-03-29 18:37:13 +02:00
import pandas as pd
2023-03-30 13:54:51 +02:00
import sys
import warnings
sys . path . append ( ' src/features/phone_esm/straw ' )
from esm_activities import esm_activities_LTM_features , process_answers_aggregation
2022-03-29 18:37:13 +02:00
2022-04-05 19:08:43 +02:00
QUESTIONNAIRE_IDS = {
" sleep_quality " : 1 ,
" PANAS_positive_affect " : 8 ,
" PANAS_negative_affect " : 9 ,
" JCQ_job_demand " : 10 ,
" JCQ_job_control " : 11 ,
" JCQ_supervisor_support " : 12 ,
" JCQ_coworker_support " : 13 ,
" PFITS_supervisor " : 14 ,
" PFITS_coworkers " : 15 ,
" UWES_vigor " : 16 ,
" UWES_dedication " : 17 ,
" UWES_absorption " : 18 ,
" COPE_active " : 19 ,
" COPE_support " : 20 ,
" COPE_emotions " : 21 ,
" balance_life_work " : 22 ,
" balance_work_life " : 23 ,
" recovery_experience_detachment " : 24 ,
" recovery_experience_relaxation " : 25 ,
" symptoms " : 26 ,
" appraisal_stressfulness_event " : 87 ,
" appraisal_threat " : 88 ,
" appraisal_challenge " : 89 ,
" appraisal_event_time " : 90 ,
" appraisal_event_duration " : 91 ,
" appraisal_event_work_related " : 92 ,
" appraisal_stressfulness_period " : 93 ,
" late_work " : 94 ,
" work_hours " : 95 ,
" left_work " : 96 ,
" activities " : 97 ,
" coffee_breaks " : 98 ,
" at_work_yet " : 99 ,
}
2022-04-05 19:05:34 +02:00
2022-03-29 18:37:13 +02:00
def straw_features ( sensor_data_files , time_segment , provider , filter_data_by_segment , * args , * * kwargs ) :
2022-04-05 15:46:02 +02:00
esm_data = pd . read_csv ( sensor_data_files [ " sensor_data " ] )
2023-03-30 13:54:51 +02:00
2022-04-05 15:46:02 +02:00
requested_features = provider [ " FEATURES " ]
# name of the features this function can compute
2022-04-05 19:05:34 +02:00
requested_scales = provider [ " SCALES " ]
2022-12-08 17:04:39 +01:00
base_features_names = [ " PANAS_positive_affect " , " PANAS_negative_affect " , " JCQ_job_demand " , " JCQ_job_control " , " JCQ_supervisor_support " , " JCQ_coworker_support " ,
2023-03-30 13:54:51 +02:00
" appraisal_stressfulness_period " , " appraisal_stressfulness_event " , " appraisal_threat " , " appraisal_challenge " , " activities_n_others " , " activities_inperson " , " activities_formal " ]
2022-04-05 19:05:34 +02:00
#TODO Check valid questionnaire and feature names.
2022-04-05 15:46:02 +02:00
# the subset of requested features this function can compute
features_to_compute = list ( set ( requested_features ) & set ( base_features_names ) )
esm_features = pd . DataFrame ( columns = [ " local_segment " ] + features_to_compute )
2022-04-05 18:58:09 +02:00
if not esm_data . empty :
esm_data = filter_data_by_segment ( esm_data , time_segment )
if not esm_data . empty :
esm_features = pd . DataFrame ( )
2022-04-05 19:05:34 +02:00
for scale in requested_scales :
questionnaire_id = QUESTIONNAIRE_IDS [ scale ]
mask = esm_data [ " questionnaire_id " ] == questionnaire_id
2023-03-31 15:08:15 +02:00
#print(esm_data.loc[mask].head())
#print(time_segment)
2023-03-30 13:54:51 +02:00
if not mask . any ( ) :
temp = sensor_data_files [ " sensor_data " ]
2023-03-31 15:08:15 +02:00
warnings . warn ( f " Warning........... No relevant questions for scale { scale } in { temp } - { time_segment } " , RuntimeWarning )
2023-03-30 13:54:51 +02:00
continue
#TODO: calculation of LTM features
if scale == " activities " :
2023-03-31 15:08:15 +02:00
requested_subset = [ req for req in requested_features if req . startswith ( " activities " ) ]
2023-03-30 13:54:51 +02:00
if not bool ( requested_subset ) :
continue
# ltm_features = esm_activities_LTM_features(esm_data.loc[mask])
# print(esm_data["esm_json"].values)
# print(mask)
# print(esm_data.loc[mask])
# #ltm_features = ltm_features[ltm_features["correct_ids"==44]]
2023-03-31 15:08:15 +02:00
#print(esm_data.loc[mask]["local_segment"])
2023-03-30 13:54:51 +02:00
ltm_features = esm_data . loc [ mask ] . groupby ( [ " local_segment " ] ) . apply ( process_answers_aggregation )
2023-03-31 15:08:15 +02:00
#print("PRINTING ltm_features:\n",ltm_features)
ltm_features . rename ( columns = { " n_others " : " activities_n_others " , " inperson " : " activities_inperson " , " formal " : " activities_formal " } , inplace = True )
esm_features [ requested_subset ] = ltm_features . groupby ( " local_segment " ) . first ( ) [ requested_subset ]
#print(esm_features.columns)
#print("PRINTING esm_features after rename:\n",ltm_features)
2023-03-30 13:54:51 +02:00
#FIXME: it might be an issue that im calculating for whole time segment and not grouping by "local segment"
2023-03-31 15:08:15 +02:00
#print("~~~~~~~~~~~~~~~~~~~~~~~~===============================~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n LTM FEATURES STORED... AFTER RETURN:\n",ltm_features,esm_features[["activities_"+req for req in requested_subset]])
if ( " mean " in features_to_compute ) :
esm_features [ scale + " _mean " ] = esm_data . loc [ mask ] . groupby ( [ " local_segment " ] ) [ " esm_user_score " ] . mean ( )
2022-04-05 19:05:34 +02:00
#TODO Create the column esm_user_score in esm_clean. Currently, this is only done when reversing.
2022-04-05 18:58:09 +02:00
esm_features = esm_features . reset_index ( )
2022-12-08 17:04:39 +01:00
if ' index ' in esm_features : # In calse of empty esm_features df
esm_features . rename ( columns = { ' index ' : ' local_segment ' } , inplace = True )
2022-04-05 15:46:02 +02:00
return esm_features
2023-03-31 15:08:15 +02:00
def test_main ( ) :
import temp_help
provider = {
" FEATURES " : [ " mean " , " activities_n_others " , " activities_inperson " , " activities_formal " ] ,
" SCALES " : [ ' activities ' ]
}
sensor_data_files = { " sensor_data " : " data/interim/p069/phone_esm_clean.csv " }
s_feat = straw_features ( sensor_data_files , " straw_event_stress_event_p069_110 " , provider , temp_help . filter_data_by_segment )
print ( s_feat )
#test_main()