Set appropriate calculations for speech senzor.
parent
75b054d358
commit
4d0497a5e0
|
@ -248,13 +248,12 @@ PHONE_ESM:
|
||||||
FEATURES: [mean]
|
FEATURES: [mean]
|
||||||
SRC_SCRIPT: src/features/phone_esm/straw/main.py
|
SRC_SCRIPT: src/features/phone_esm/straw/main.py
|
||||||
|
|
||||||
# Custom sensor
|
|
||||||
PHONE_SPEECH:
|
PHONE_SPEECH:
|
||||||
CONTAINER: speech
|
CONTAINER: speech
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
STRAW:
|
STRAW:
|
||||||
COMPUTE: True
|
COMPUTE: True
|
||||||
FEATURES: ["countscans"]
|
FEATURES: ["meanspeech", "stdspeech", "nlargest", "nsmallest", "medianspeech"]
|
||||||
SRC_SCRIPT: src/features/phone_speech/straw/main.py
|
SRC_SCRIPT: src/features/phone_speech/straw/main.py
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/features/phone-keyboard/
|
# See https://www.rapids.science/latest/features/phone-keyboard/
|
||||||
|
|
|
@ -1,67 +1,30 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
# TODO: adjust features files
|
|
||||||
QUESTIONNAIRE_IDS = {
|
|
||||||
"sleep_quality": 1,
|
|
||||||
"PANAS_positive_affect": 8,
|
|
||||||
"PANAS_negative_affect": 9,
|
|
||||||
"JCQ_job_demand": 10,
|
|
||||||
"JCQ_job_control": 11,
|
|
||||||
"JCQ_supervisor_support": 12,
|
|
||||||
"JCQ_coworker_support": 13,
|
|
||||||
"PFITS_supervisor": 14,
|
|
||||||
"PFITS_coworkers": 15,
|
|
||||||
"UWES_vigor": 16,
|
|
||||||
"UWES_dedication": 17,
|
|
||||||
"UWES_absorption": 18,
|
|
||||||
"COPE_active": 19,
|
|
||||||
"COPE_support": 20,
|
|
||||||
"COPE_emotions": 21,
|
|
||||||
"balance_life_work": 22,
|
|
||||||
"balance_work_life": 23,
|
|
||||||
"recovery_experience_detachment": 24,
|
|
||||||
"recovery_experience_relaxation": 25,
|
|
||||||
"symptoms": 26,
|
|
||||||
"appraisal_stressfulness_event": 87,
|
|
||||||
"appraisal_threat": 88,
|
|
||||||
"appraisal_challenge": 89,
|
|
||||||
"appraisal_event_time": 90,
|
|
||||||
"appraisal_event_duration": 91,
|
|
||||||
"appraisal_event_work_related": 92,
|
|
||||||
"appraisal_stressfulness_period": 93,
|
|
||||||
"late_work": 94,
|
|
||||||
"work_hours": 95,
|
|
||||||
"left_work": 96,
|
|
||||||
"activities": 97,
|
|
||||||
"coffee_breaks": 98,
|
|
||||||
"at_work_yet": 99,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def straw_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
def straw_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||||
esm_data = pd.read_csv(sensor_data_files["sensor_data"])
|
speech_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||||
requested_features = provider["FEATURES"]
|
requested_features = provider["FEATURES"]
|
||||||
# name of the features this function can compute
|
# name of the features this function can compute+
|
||||||
requested_scales = provider["SCALES"]
|
base_features_names = ["meanspeech", "stdspeech", "nlargest", "nsmallest", "medianspeech"]
|
||||||
base_features_names = ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support",
|
|
||||||
"appraisal_stressfulness_period", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"]
|
|
||||||
#TODO Check valid questionnaire and feature names.
|
|
||||||
# the subset of requested features this function can compute
|
|
||||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||||
esm_features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
|
speech_features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
|
||||||
if not esm_data.empty:
|
|
||||||
esm_data = filter_data_by_segment(esm_data, time_segment)
|
if not speech_data.empty:
|
||||||
|
speech_data = filter_data_by_segment(speech_data, time_segment)
|
||||||
|
|
||||||
if not esm_data.empty:
|
if not speech_data.empty:
|
||||||
esm_features = pd.DataFrame()
|
speech_features = pd.DataFrame()
|
||||||
for scale in requested_scales:
|
if "meanspeech" in features_to_compute:
|
||||||
questionnaire_id = QUESTIONNAIRE_IDS[scale]
|
speech_features["meanspeech"] = speech_data.groupby(["local_segment"])['speech_proportion'].mean()
|
||||||
mask = esm_data["questionnaire_id"] == questionnaire_id
|
if "stdspeech" in features_to_compute:
|
||||||
esm_features[scale + "_mean"] = esm_data.loc[mask].groupby(["local_segment"])["esm_user_score"].mean()
|
speech_features["stdspeech"] = speech_data.groupby(["local_segment"])['speech_proportion'].std()
|
||||||
#TODO Create the column esm_user_score in esm_clean. Currently, this is only done when reversing.
|
if "nlargest" in features_to_compute:
|
||||||
|
speech_features["nlargest"] = speech_data.groupby(["local_segment"])['speech_proportion'].apply(lambda x: x.nlargest(5).mean())
|
||||||
|
if "nsmallest" in features_to_compute:
|
||||||
|
speech_features["nsmallest"] = speech_data.groupby(["local_segment"])['speech_proportion'].apply(lambda x: x.nsmallest(5).mean())
|
||||||
|
if "medianspeech" in features_to_compute:
|
||||||
|
speech_features["medianspeech"] = speech_data.groupby(["local_segment"])['speech_proportion'].median()
|
||||||
|
|
||||||
|
speech_features = speech_features.reset_index()
|
||||||
|
|
||||||
esm_features = esm_features.reset_index()
|
return speech_features
|
||||||
if 'index' in esm_features: # In calse of empty esm_features df
|
|
||||||
esm_features.rename(columns={'index': 'local_segment'}, inplace=True)
|
|
||||||
|
|
||||||
return esm_features
|
|
||||||
|
|
Loading…
Reference in New Issue