not working temp

2023-03-30 11:54:51 +00:00 · 2023-03-30 11:54:51 +00:00 · e7bb9d6702
parent 689f677a3e
commit e7bb9d6702
5 changed files with 362 additions and 32 deletions
--- a/config.yaml
+++ b/config.yaml
@ -26,7 +26,7 @@ TIME_SEGMENTS: &time_segments
  INCLUDE_PAST_PERIODIC_SEGMENTS: TRUE # Only relevant if TYPE=PERIODIC, see docs
  TAILORED_EVENTS: # Only relevant if TYPE=EVENT
    COMPUTE: True
-    SEGMENTING_METHOD: "stress_event" # 30_before, 90_before, stress_event
+    SEGMENTING_METHOD: "10_before" # 30_before, 90_before, stress_event
    INTERVAL_OF_INTEREST: 10 # duration of event of interest [minutes]
    IOI_ERROR_TOLERANCE: 5 # interval of interest erorr tolerance (before and after IOI) [minutes]
@ -91,7 +91,7 @@ PHONE_ACTIVITY_RECOGNITION:
  EPISODE_THRESHOLD_BETWEEN_ROWS: 5 # minutes. Max time difference for two consecutive rows to be considered within the same AR episode.
  PROVIDERS:
    RAPIDS:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"]
      ACTIVITY_CLASSES:
        STATIONARY: ["still", "tilting"]
@ -120,7 +120,7 @@ PHONE_APPLICATIONS_FOREGROUND:
    SCRAPE_MISSING_CATEGORIES: False # whether or not to scrape missing genres, only effective if CATALOGUE_SOURCE is equal to FILE. If CATALOGUE_SOURCE is equal to GOOGLE, all genres are scraped anyway
  PROVIDERS:
    RAPIDS:
-      COMPUTE: True
+      COMPUTE: False
      INCLUDE_EPISODE_FEATURES: True
      SINGLE_CATEGORIES: ["all", "email"]
      MULTIPLE_CATEGORIES:
@ -155,7 +155,7 @@ PHONE_BATTERY:
  EPISODE_THRESHOLD_BETWEEN_ROWS: 30 # minutes. Max time difference for two consecutive rows to be considered within the same battery episode.
  PROVIDERS:
    RAPIDS:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
      SRC_SCRIPT: src/features/phone_battery/rapids/main.py
@ -169,7 +169,7 @@ PHONE_BLUETOOTH:
      SRC_SCRIPT: src/features/phone_bluetooth/rapids/main.R
    DORYAB:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: 
        ALL: 
            DEVICES: ["countscans", "uniquedevices", "meanscans", "stdscans"]
@ -190,7 +190,7 @@ PHONE_CALLS:
  CONTAINER: call
  PROVIDERS:
    RAPIDS:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES_TYPE: EPISODES # EVENTS or EPISODES
      CALL_TYPES: [missed, incoming, outgoing]
      FEATURES:
@ -233,7 +233,7 @@ PHONE_DATA_YIELD:
            PHONE_WIFI_VISIBLE]
  PROVIDERS:
    RAPIDS:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours]
      MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1, minimum percentage of valid minutes in an hour to be considered valid.
      SRC_SCRIPT: src/features/phone_data_yield/rapids/main.R
@ -243,9 +243,8 @@ PHONE_ESM:
  PROVIDERS:
    STRAW:
      COMPUTE: True
-      SCALES: ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support", 
+      SCALES: ["activities"]
-              "appraisal_stressfulness_period", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"]
+      FEATURES: [activities_n_others, activities_inperson, activities_formal]
      FEATURES: [mean]
      SRC_SCRIPT: src/features/phone_esm/straw/main.py
 # See https://www.rapids.science/latest/features/phone-keyboard/
@ -262,7 +261,7 @@ PHONE_LIGHT:
  CONTAINER: light_sensor
  PROVIDERS:
    RAPIDS:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
      SRC_SCRIPT: src/features/phone_light/rapids/main.py
@ -276,7 +275,7 @@ PHONE_LOCATIONS:
  PROVIDERS:
    DORYAB:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome", "homelabel"]
      DBSCAN_EPS: 100 # meters
      DBSCAN_MINSAMPLES: 5
@ -291,7 +290,7 @@ PHONE_LOCATIONS:
      SRC_SCRIPT: src/features/phone_locations/doryab/main.py
    BARNETT:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: ["hometime","disttravelled","rog","maxdiam","maxhomedist","siglocsvisited","avgflightlen","stdflightlen","avgflightdur","stdflightdur","probpause","siglocentropy","circdnrtn","wkenddayrtn"]
      IF_MULTIPLE_TIMEZONES: USE_MOST_COMMON
      MINUTES_DATA_USED: False # Use this for quality control purposes, how many minutes of data (location coordinates gruped by minute) were used to compute features
@ -309,7 +308,7 @@ PHONE_MESSAGES:
  CONTAINER: sms
  PROVIDERS:
    RAPIDS:
-      COMPUTE: True
+      COMPUTE: False
      MESSAGES_TYPES : [received, sent]
      FEATURES: 
        received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
@ -321,7 +320,7 @@ PHONE_SCREEN:
  CONTAINER: screen
  PROVIDERS:
    RAPIDS:
-      COMPUTE: True
+      COMPUTE: False
      REFERENCE_HOUR_FIRST_USE: 0
      IGNORE_EPISODES_SHORTER_THAN: 0 # in minutes, set to 0 to disable
      IGNORE_EPISODES_LONGER_THAN: 360 # in minutes, set to 0 to disable
@ -334,7 +333,7 @@ PHONE_SPEECH:
  CONTAINER: speech
  PROVIDERS:
    STRAW:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: ["meanspeech", "stdspeech", "nlargest", "nsmallest", "medianspeech"]
      SRC_SCRIPT: src/features/phone_speech/straw/main.py
@ -352,7 +351,7 @@ PHONE_WIFI_VISIBLE:
  CONTAINER: wifi
  PROVIDERS:
    RAPIDS:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: ["countscans", "uniquedevices", "countscansmostuniquedevice"]
      SRC_SCRIPT: src/features/phone_wifi_visible/rapids/main.R
@ -521,10 +520,10 @@ EMPATICA_ACCELEROMETER:
      FEATURES: ["maxmagnitude", "minmagnitude", "avgmagnitude", "medianmagnitude", "stdmagnitude"]
      SRC_SCRIPT: src/features/empatica_accelerometer/dbdp/main.py
    CR:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: ["totalMagnitudeBand", "absoluteMeanBand", "varianceBand"] # Acc features
      WINDOWS:
-        COMPUTE: True
+        COMPUTE: False
        WINDOW_LENGTH: 15 # specify window length in seconds
        SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest', 'nsmallest', 'count_windows']
      SRC_SCRIPT: src/features/empatica_accelerometer/cr/main.py
@ -548,11 +547,11 @@ EMPATICA_TEMPERATURE:
      FEATURES: ["maxtemp", "mintemp", "avgtemp", "mediantemp", "modetemp", "stdtemp", "diffmaxmodetemp", "diffminmodetemp", "entropytemp"]
      SRC_SCRIPT: src/features/empatica_temperature/dbdp/main.py
    CR:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: ["maximum", "minimum", "meanAbsChange", "longestStrikeAboveMean", "longestStrikeBelowMean", 
                  "stdDev", "median", "meanChange", "sumSquared", "squareSumOfComponent", "sumOfSquareComponents"]
      WINDOWS:
-        COMPUTE: True
+        COMPUTE: False
        WINDOW_LENGTH: 300 # specify window length in seconds
        SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest', 'nsmallest', 'count_windows']
      SRC_SCRIPT: src/features/empatica_temperature/cr/main.py
@ -566,14 +565,14 @@ EMPATICA_ELECTRODERMAL_ACTIVITY:
      FEATURES: ["maxeda", "mineda", "avgeda", "medianeda", "modeeda", "stdeda", "diffmaxmodeeda", "diffminmodeeda", "entropyeda"]
      SRC_SCRIPT: src/features/empatica_electrodermal_activity/dbdp/main.py
    CR:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: ['mean', 'std', 'q25', 'q75', 'qd', 'deriv', 'power', 'numPeaks', 'ratePeaks', 'powerPeaks', 'sumPosDeriv', 'propPosDeriv', 'derivTonic', 
                  'sigTonicDifference', 'freqFeats','maxPeakAmplitudeChangeBefore', 'maxPeakAmplitudeChangeAfter', 'avgPeakAmplitudeChangeBefore', 
                  'avgPeakAmplitudeChangeAfter', 'avgPeakChangeRatio', 'maxPeakIncreaseTime', 'maxPeakDecreaseTime', 'maxPeakDuration', 'maxPeakChangeRatio',
                  'avgPeakIncreaseTime', 'avgPeakDecreaseTime', 'avgPeakDuration', 'signalOverallChange', 'changeDuration', 'changeRate', 'significantIncrease', 
                  'significantDecrease']
      WINDOWS:
-        COMPUTE: True
+        COMPUTE: False
        WINDOW_LENGTH: 60 # specify window length in seconds
        SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest', 'nsmallest', count_windows, eda_num_peaks_non_zero]
        IMPUTE_NANS: True
@ -592,7 +591,7 @@ EMPATICA_BLOOD_VOLUME_PULSE:
      FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR', # Time features
                  'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features
      WINDOWS:
-        COMPUTE: True
+        COMPUTE: False
        WINDOW_LENGTH: 300 # specify window length in seconds
        SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest', 'nsmallest', 'count_windows', 'hrv_num_windows_non_nan']
      SRC_SCRIPT: src/features/empatica_blood_volume_pulse/cr/main.py
@ -606,12 +605,12 @@ EMPATICA_INTER_BEAT_INTERVAL:
      FEATURES: ["maxibi", "minibi", "avgibi", "medianibi", "modeibi", "stdibi", "diffmaxmodeibi", "diffminmodeibi", "entropyibi"]
      SRC_SCRIPT: src/features/empatica_inter_beat_interval/dbdp/main.py
    CR:
-      COMPUTE: True
+      COMPUTE: False
      FEATURES: ['meanHr', 'ibi', 'sdnn', 'sdsd', 'rmssd', 'pnn20', 'pnn50', 'sd', 'sd2', 'sd1/sd2', 'numRR', # Time features
                  'VLF', 'LF', 'LFnorm', 'HF', 'HFnorm', 'LF/HF', 'fullIntegral'] # Freq features            
      PATCH_WITH_BVP: True
      WINDOWS:
-        COMPUTE: True
+        COMPUTE: False
        WINDOW_LENGTH: 300 # specify window length in seconds
        SECOND_ORDER_FEATURES: ['mean', 'median', 'sd', 'nlargest', 'nsmallest', 'count_windows', 'hrv_num_windows_non_nan']
      SRC_SCRIPT: src/features/empatica_inter_beat_interval/cr/main.py
@ -732,7 +731,7 @@ ALL_CLEANING_OVERALL:
 PARAMS_FOR_ANALYSIS:
  BASELINE:
-    COMPUTE: True
+    COMPUTE: False
    FOLDER: data/external/baseline
    CONTAINER: [results-survey637813_final.csv,  # Slovenia
                results-survey358134_final.csv,  # Belgium 1
@ -743,7 +742,7 @@ PARAMS_FOR_ANALYSIS:
    CATEGORICAL_FEATURES: [gender]
  TARGET:
-    COMPUTE: True
+    COMPUTE: False
    LABEL: appraisal_stressfulness_event_mean
    ALL_LABELS: [appraisal_stressfulness_event_mean, appraisal_threat_mean, appraisal_challenge_mean]
                # PANAS_positive_affect_mean, PANAS_negative_affect_mean, JCQ_job_demand_mean, JCQ_job_control_mean, JCQ_supervisor_support_mean, 
--- a/src/features/phone_esm/straw/esm_activities.py
+++ b/src/features/phone_esm/straw/esm_activities.py
@ -0,0 +1,288 @@
 import pandas as pd
 import numpy as np
 id2qc = {  44:["What have you mainly been doing within the last 10 minutes?",
                            "Waar ben je voornamelijk mee bezig geweest de laatste 10 minuten?",
                            "Kaj ste v glavnem počeli v zadnjih 10 minutah?"],
                        45:["What type of individual work?",
                            "Wat voor soort individueel werk?",
                            "Kakšno vrsto samostojnega dela ste opravljali?"],
                        46:["How did you work with others?",
                            "Hoe heb je met anderen gewerkt?",
                            "Kako ste sodelovali z drugimi?"],
                        47:["What type of break?",
                            "Wat voor soort pauze?",
                            "Kakšno vrsto odmora ste imeli?"],
                        48:["Where did you travel between?",
                            "Waar heb je tussen gereisd?",
                            "Kam ste potovali?"],
                        49:["Did you use a computer or phone for that?",
                            "Heb je daarvoor een computer of telefoon gebruikt?",
                            "Ste za to uporabljali računalnik ali telefon?"],
                        50:["What kind of an interaction was that?",
                            "Wat voor interactie was dat?",
                            "Kakšne vrste sodelovanja je bilo to?"],
                        51:["How many people were involved besides yourself?",
                            "Hoeveel mensen waren er behalve jezelf betrokken?",
                            "Koliko oseb je bilo poleg vas še vpletenih?"],
                        # 52:["What have you mainly been doing within the last 10 minutes?",
                        #     "Waar ben je voornamelijk mee bezig geweest de laatste 10 minuten?",
                        #     "Kaj ste v glavnem počeli v zadnjih 10 minutah?"]
 }
 qc2id = {v:k for k,values in id2qc.items() for v in values}
 next_questions = {  44: [45,46,47,48],
                    45:[49,49],
                    46:[50,50],
                    47:[],
                    48:[],
                    49:[],
                    50:[51,51],
                    51:[]
                    #52:[45,46,47,48],
                 }
 def esm_activities_LTM_features(
    df_esm_activities_cleaned: pd.DataFrame,
 ) -> pd.DataFrame:
    """ Function for calculating LTM(Last 10 minutes) features of questionnaire answers. It first corrects the question ids according
        to esm_instructions and the updated corpus of question_ids. It then processes each LTM question chain to 
        find relevant social properties given by the answers such as the number of people interacted with, the formality and whether the socializing was done in person.
    Parameters
    ----------
    df_esm_activities_cleaned: pd.DataFrame
        A cleaned up dataframe, which must include esm_instructions, esm_user_answer_numeric.
    Returns
    -------
    df_esm_activities_cleaned: pd.DataFrame
        The same dataframe with columns which contain:
            ["correct_ids"] - Corrected question_ids
            ["ans_seq"] - For each LTM question, the sequence of numerical user answers pertaining to this chain of questions.
            ["n_others","inperson","formal"]- Properties of known potential social encounters as given by process_answers().
    """
    #TODO: preprocess questionaires
        #DONE: correct ids
    correct_id_df = correct_activity_qids(df_esm_activities_cleaned)
        #DONE: process subquestions 
    ids = correct_id_df["correct_ids"]
    main_q_indices = ids[ids==44].index
    q_group = []
    i=-1
    for id in ids:
        if(id==44):
            i=i+1
        q_group.append(i)
    correct_id_df["q_group"] = q_group
    ans_seq = correct_id_df.groupby("q_group").agg({"esm_user_answer_numeric":lambda group: list(group)}).rename(columns={"esm_user_answer_numeric":"ans_seq"})
    ans_seq.set_index(main_q_indices,inplace=True)
    # correct_id_df["ans_seq"] = [[] for i in range(len(correct_id_df))]
    # correct_id_df["ans_seq"].loc[main_q_indices] = correct_id_df.groupby("q_group").agg({"esm_user_answer_numeric":lambda group: list(group)}).values.reshape(-1)
    #DONE: find types of status for each main question: socializing:[none,irl,online,unknown], num_people:[0,1,2,>2,unknown]
    processed_ans_df = process_answers(ans_seq)
    # df_out = df_esm_activities_cleaned.join(test)
    return df_esm_activities_cleaned.join(processed_ans_df)
 """ 
 possible answer sequences for LTM question chains
 #alone
 0,0,0 not social
 0,0,1 not social
 0,1,0 not social
 0,1,1 not social
 0,2 not social
 0,3 not social
 0,4 not social
 0,5 not social
 0,6 not social
 #w/ others
 1,0,0,0 1 irl
 1,0,0,1 2 irl
 1,0,0,2 3+ irl
 1,0,1,0 1 irl
 1,0,1,1 2 irl
 1,0,1,2 3+ irl
 1,1,0,0 1 online 
 1,1,0,1 2 online 
 1,1,0,2 3+ online 
 1,1,1,0 1 online 
 1,1,1,1 2 online 
 1,1,1,2 3+ online 
 1,2 positive likely to be more than 2
 1,3 positive
 #break
 2,0 ambiguous
 2,1 positive irl
 2,2 ambiguous
 2,3 ambiguous
 #transit
 3,0 ambiguous
 3,1 ambiguous
 3,2 ambiguous
 """
 #TODO: docstring
 def process_answers(df:pd.DataFrame)-> pd.DataFrame:
    """ Function to process answer sequences for LTM question chains. It checks the chain of subquestion answers and extracts the following attributes:
        > n_others: Number of other people interacted with in the last 10 minutes
            - -1: Number is positive but unknown exactly
            - 0: No people/alone
            - 1: One extra person
            - 2: Two extra people
            - 3: More than two extra people
            - NaN : Can't say anything with enough certainty.
        > inperson: 
            - True/False: The interaction in question was/wasn't in person.
            - None: Can't say anything with enough certainty.
        > formal: 
            - True/False: The interaction in question was/wasn't formal.
            - None: Can't say anything with enough certainty.
    Args:
        df (pd.DataFrame): _description_
    Returns:
        pd.DataFrame: _description_
    """    
    properties = {"n_others":[],
                  "inperson":[],
                  "formal":[]}
    for ans_seq in df["ans_seq"]:
        n_other = None
        inperson = None
        formal = None
        if(ans_seq[0]==0):
            n_other = 0
        elif(ans_seq[0]==1):
            if(ans_seq[1]==3):
                n_other = -1    # anwsered "Other" but did work with other people        
            elif(ans_seq[1]==2):
                n_other = 3 #assuming more than 2 people participated in the lecture or presentation
            elif(ans_seq[1] in [0,1]):
                    inperson = ans_seq[1]==0 #ans[1]==0, means irl interaction, ==1 means online or phone
                    formal = ans_seq[2]==0#0 means formal
                    n_other = ans_seq[3]+1 #ans3 is on [0,2] so we add 1 to make it [1,3]
        elif(ans_seq[0]==2):
            formal = False#assuming one does not have a formal meeting during break time
            if(ans_seq[1]==1):
                n_other = -1
                inperson = True
            #if not 1 then we dont know anythong for sure
        elif(ans_seq[0]==3):
            #we cant say whether the persion was carpooling or driving alone.
            pass
        properties["n_others"].append(n_other)
        properties["inperson"].append(inperson)
        properties["formal"].append(formal)
    #df = df.join(pd.DataFrame(properties,index=df.index))
    return pd.DataFrame(properties,index=df.index)
 def correct_activity_qids(df:pd.DataFrame)->pd.DataFrame:
    """_summary_
    Args:
        df (pd.DataFrame): _description_
    Returns:
        pd.DataFrame: Input dataframe with added column "correct_ids"
    """
    df["correct_ids"] = df["esm_instructions"].apply(lambda x: qc2id[x])
    return df
 def process_answers_aggregation(df:pd.DataFrame)-> pd.DataFrame:
    """ Function to process answer sequences for LTM question chains. It checks the chain of subquestion answers and extracts the following attributes:
        > n_others: Number of other people interacted with in the last 10 minutes
            - -1: Number is positive but unknown exactly
            - 0: No people/alone
            - 1: One extra person
            - 2: Two extra people
            - 3: More than two extra people
            - NaN : Can't say anything with enough certainty.
        > inperson: 
            - True/False: The interaction in question was/wasn't in person.
            - None: Can't say anything with enough certainty.
        > formal: 
            - True/False: The interaction in question was/wasn't formal.
            - None: Can't say anything with enough certainty.
    Args:
        df (pd.DataFrame): _description_
    Returns:
        pd.DataFrame: _description_
    """    
    properties = {"n_others":[],
                  "inperson":[],
                  "formal":[]}
    ans_seq = df["esm_user_answer_numeric"].values
    n_other = None
    inperson = None
    formal = None
    if(ans_seq[0]==0):
        n_other = 0
    elif(ans_seq[0]==1):
        if(ans_seq[1]==3):
            n_other = -1    # anwsered "Other" but did work with other people        
        elif(ans_seq[1]==2):
            n_other = 3 #assuming more than 2 people participated in the lecture or presentation
        elif(ans_seq[1] in [0,1]):
                inperson = ans_seq[1]==0 #ans[1]==0, means irl interaction, ==1 means online or phone
                formal = ans_seq[2]==0#0 means formal
                n_other = ans_seq[3]+1 #ans3 is on [0,2] so we add 1 to make it [1,3]
    elif(ans_seq[0]==2):
        formal = False#assuming one does not have a formal meeting during break time
        if(ans_seq[1]==1):
            n_other = -1
            inperson = True
        #if not 1 then we dont know anythong for sure
    elif(ans_seq[0]==3):
        #we cant say whether the persion was carpooling or driving alone.
        pass
    properties["n_others"].append(n_other)
    properties["inperson"].append(inperson)
    properties["formal"].append(formal)
    #df = df.join(pd.DataFrame(properties,index=df.index))
    return pd.DataFrame(properties,index=df.index)
 #test stuff
 def test():
    from esm_preprocess import preprocess_esm,clean_up_esm
    df = pd.read_csv("data/raw/p031/phone_esm_with_datetime.csv")
    df = preprocess_esm(df)
    df = clean_up_esm(df)
    df = df[df["questionnaire_id"]==97]
    original = esm_activities_LTM_features(df)
    df["local_segment"] = [str(i)+":"+j for i,j in df[["esm_session","device_id"]].values]
    temp = df.groupby("local_segment")
    temp2 = temp.apply(process_answers_aggregation)
    #compare with original function results
    selection = original[original["correct_ids"]==44][["n_others",  "inperson", "formal"]]
    temp_selection = temp2.loc[selection.index]
    temp_selection.compare(selection,keep_shape=True,keep_equal =True)
    #print out ans_seq processing results
    # import json
    # i = 0
    # for j,ans in correct_id_df[["esm_json","esm_user_answer"]].values:
    #     obj = json.loads(j)
    #     text = obj["esm_instructions"]
    #     if ("10 minut" in text):
    #         print("---\n",test.ans_seq.iloc[i])
    #         print(test[["n_others","inperson","formal"]].values[i])
    #         i = i+1
    #     print(text,ans)
 #test()
--- a/src/features/phone_esm/straw/main.py
+++ b/src/features/phone_esm/straw/main.py
@ -1,4 +1,8 @@
 import pandas as pd
 import sys
 import warnings
 sys.path.append('src/features/phone_esm/straw')   
 from esm_activities import esm_activities_LTM_features,process_answers_aggregation
 QUESTIONNAIRE_IDS = {
    "sleep_quality": 1,
@ -39,23 +43,49 @@ QUESTIONNAIRE_IDS = {
 def straw_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
    esm_data = pd.read_csv(sensor_data_files["sensor_data"])
    requested_features = provider["FEATURES"]
    # name of the features this function can compute
    requested_scales = provider["SCALES"]
    base_features_names = ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support", 
-                            "appraisal_stressfulness_period", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"]
+                            "appraisal_stressfulness_period", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge","activities_n_others","activities_inperson","activities_formal"]
    #TODO Check valid questionnaire and feature names.
    # the subset of requested features this function can compute
    features_to_compute = list(set(requested_features) & set(base_features_names))
    esm_features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
    if not esm_data.empty:
        # print(esm_data.head())
        # print(time_segment)
        esm_data = filter_data_by_segment(esm_data, time_segment)
        if not esm_data.empty:
            esm_features = pd.DataFrame()
            for scale in requested_scales:
                questionnaire_id = QUESTIONNAIRE_IDS[scale]
                mask = esm_data["questionnaire_id"] == questionnaire_id
                if not mask.any():
                    temp = sensor_data_files["sensor_data"]
                    warnings.warn(f"Warning........... No relevant questions for scale {scale} in {temp}",RuntimeWarning) 
                    continue
                #TODO: calculation of LTM features
                if scale=="activities":
                    requested_subset = [req[len("activities_"):] for req in requested_features if req.startswith("activities")]
                    if not bool(requested_subset):
                        continue
                    # ltm_features = esm_activities_LTM_features(esm_data.loc[mask])
                    # print(esm_data["esm_json"].values)
                    # print(mask)
                    # print(esm_data.loc[mask])
                    # print(ltm_features)
                    # #ltm_features = ltm_features[ltm_features["correct_ids"==44]]
                    print(esm_data["local_segment"])
                    if(type(esm_data["local_segment"].values[0]) != str):
                        raise Exception("wrong dtype of local_segment")
                    ltm_features = esm_data.loc[mask].groupby(["local_segment"]).apply(process_answers_aggregation)
                    print(ltm_features)
                    esm_features[["activities_"+req for req in requested_subset]] = ltm_features[requested_subset]
                    #FIXME: it might be an issue that im calculating for whole time segment and not grouping by "local segment"
                    continue
                esm_features[scale + "_mean"] = esm_data.loc[mask].groupby(["local_segment"])["esm_user_score"].mean()
                #TODO Create the column esm_user_score in esm_clean. Currently, this is only done when reversing.
--- a/src/features/phone_esm/straw/process_user_event_related_segments.py
+++ b/src/features/phone_esm/straw/process_user_event_related_segments.py
@ -67,7 +67,7 @@ def extract_ers(esm_df):
    segmenting_method = config["TIME_SEGMENTS"]["TAILORED_EVENTS"]["SEGMENTING_METHOD"]
-    if segmenting_method in ["30_before", "90_before"]: # takes 30-minute peroid before the questionnaire + the duration of the questionnaire
+    if segmenting_method in ["10_before", "30_before", "90_before"]: # takes 30-minute peroid before the questionnaire + the duration of the questionnaire
        """ '30-minutes and 90-minutes before' have the same fundamental logic with couple of deviations that will be explained below.
        Both take x-minute period before the questionnaire that is summed with the questionnaire duration.
        All questionnaire durations over 15 minutes are excluded from the querying.
@ -79,7 +79,18 @@ def extract_ers(esm_df):
        extracted_ers = extracted_ers[extracted_ers["timestamp"] <= 15 * 60].reset_index(drop=True) # ensure that the longest duration of the questionnaire anwsering is 15 min 
        extracted_ers["shift_direction"] = -1 
-        if segmenting_method == "30_before":
+        if segmenting_method == "10_before":
            """The method 10-minutes before simply takes 10 minutes before the questionnaire and sums it with the questionnaire duration.
            The timestamps are formatted with the help of format_timestamp() method.
            """
            time_before_questionnaire = 10 * 60 # in seconds (10 minutes)
            #TODO: split into small segments with manipulating lenght and shift
            extracted_ers["length"] = (extracted_ers["timestamp"] + time_before_questionnaire).apply(lambda x: format_timestamp(x))
            extracted_ers["shift"] = time_before_questionnaire
            extracted_ers["shift"] = extracted_ers["shift"].apply(lambda x: format_timestamp(x))
        elif segmenting_method == "30_before":
            """The method 30-minutes before simply takes 30 minutes before the questionnaire and sums it with the questionnaire duration.
            The timestamps are formatted with the help of format_timestamp() method.
            """
--- a/src/features/utils/utils.py
+++ b/src/features/utils/utils.py
@ -14,6 +14,7 @@ def import_path(path):
    sys.modules[module_name] = module
    return module
 #TODO:check why segments change to int 
 def filter_data_by_segment(data, time_segment):
    data.dropna(subset=["assigned_segments"], inplace=True)
    if(data.shape[0] == 0): # data is empty
@ -151,6 +152,7 @@ def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_file
    else: 
        segment_colums = pd.DataFrame()
        print(sensor_features,sensor_features['local_segment'])
        sensor_features['local_segment'] = sensor_features['local_segment'].str.replace(r'_RR\d+SS', '')
        split_segemnt_columns = sensor_features["local_segment"].str.split(pat="(.*)#(.*),(.*)", expand=True)
        new_segment_columns = split_segemnt_columns.iloc[:,1:4] if split_segemnt_columns.shape[1] == 5 else pd.DataFrame(columns=["local_segment_label", "local_segment_start_datetime","local_segment_end_datetime"])