import pandas as pd import sys import warnings sys.path.append('src/features/phone_esm/straw') from esm_activities import esm_activities_LTM_features,process_answers_aggregation QUESTIONNAIRE_IDS = { "sleep_quality": 1, "PANAS_positive_affect": 8, "PANAS_negative_affect": 9, "JCQ_job_demand": 10, "JCQ_job_control": 11, "JCQ_supervisor_support": 12, "JCQ_coworker_support": 13, "PFITS_supervisor": 14, "PFITS_coworkers": 15, "UWES_vigor": 16, "UWES_dedication": 17, "UWES_absorption": 18, "COPE_active": 19, "COPE_support": 20, "COPE_emotions": 21, "balance_life_work": 22, "balance_work_life": 23, "recovery_experience_detachment": 24, "recovery_experience_relaxation": 25, "symptoms": 26, "appraisal_stressfulness_event": 87, "appraisal_threat": 88, "appraisal_challenge": 89, "appraisal_event_time": 90, "appraisal_event_duration": 91, "appraisal_event_work_related": 92, "appraisal_stressfulness_period": 93, "late_work": 94, "work_hours": 95, "left_work": 96, "activities": 97, "coffee_breaks": 98, "at_work_yet": 99, } def straw_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): esm_data = pd.read_csv(sensor_data_files["sensor_data"]) requested_features = provider["FEATURES"] # name of the features this function can compute requested_scales = provider["SCALES"] base_features_names = ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support", "appraisal_stressfulness_period", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge","activities_n_others","activities_inperson","activities_formal"] #TODO Check valid questionnaire and feature names. # the subset of requested features this function can compute features_to_compute = list(set(requested_features) & set(base_features_names)) esm_features = pd.DataFrame(columns=["local_segment"] + features_to_compute) if not esm_data.empty: esm_data = filter_data_by_segment(esm_data, time_segment) if not esm_data.empty: esm_features = pd.DataFrame() for scale in requested_scales: questionnaire_id = QUESTIONNAIRE_IDS[scale] mask = esm_data["questionnaire_id"] == questionnaire_id #print(esm_data.loc[mask].head()) #print(time_segment) if not mask.any(): temp = sensor_data_files["sensor_data"] warnings.warn(f"Warning........... No relevant questions for scale {scale} in {temp}-{time_segment}",RuntimeWarning) continue #TODO: calculation of LTM features if scale=="activities": requested_subset = [req for req in requested_features if req.startswith("activities")] if not bool(requested_subset): continue # ltm_features = esm_activities_LTM_features(esm_data.loc[mask]) # print(esm_data["esm_json"].values) # print(mask) # print(esm_data.loc[mask]) # #ltm_features = ltm_features[ltm_features["correct_ids"==44]] #print(esm_data.loc[mask]["local_segment"]) ltm_features = esm_data.loc[mask].groupby(["local_segment"]).apply(process_answers_aggregation) #print("PRINTING ltm_features:\n",ltm_features) ltm_features.rename(columns={"n_others":"activities_n_others","inperson":"activities_inperson","formal":"activities_formal"},inplace=True) esm_features[requested_subset] = ltm_features.groupby("local_segment").first()[requested_subset] #print(esm_features.columns) #print("PRINTING esm_features after rename:\n",ltm_features) #FIXME: it might be an issue that im calculating for whole time segment and not grouping by "local segment" #print("~~~~~~~~~~~~~~~~~~~~~~~~===============================~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n LTM FEATURES STORED... AFTER RETURN:\n",ltm_features,esm_features[["activities_"+req for req in requested_subset]]) if("mean" in features_to_compute): esm_features[scale + "_mean"] = esm_data.loc[mask].groupby(["local_segment"])["esm_user_score"].mean() #TODO Create the column esm_user_score in esm_clean. Currently, this is only done when reversing. esm_features = esm_features.reset_index() if 'index' in esm_features: # In calse of empty esm_features df esm_features.rename(columns={'index': 'local_segment'}, inplace=True) return esm_features def test_main(): import temp_help provider = { "FEATURES":["mean","activities_n_others","activities_inperson","activities_formal"], "SCALES":['activities'] } sensor_data_files = {"sensor_data":"data/interim/p069/phone_esm_clean.csv"} s_feat = straw_features(sensor_data_files,"straw_event_stress_event_p069_110",provider,temp_help.filter_data_by_segment) print(s_feat) #test_main()