Set appropriate calculations for speech senzor.

sociality-task
Primoz 2023-01-17 14:00:42 +00:00
parent 75b054d358
commit 4d0497a5e0
2 changed files with 23 additions and 61 deletions

View File

@ -248,13 +248,12 @@ PHONE_ESM:
FEATURES: [mean] FEATURES: [mean]
SRC_SCRIPT: src/features/phone_esm/straw/main.py SRC_SCRIPT: src/features/phone_esm/straw/main.py
# Custom sensor
PHONE_SPEECH: PHONE_SPEECH:
CONTAINER: speech CONTAINER: speech
PROVIDERS: PROVIDERS:
STRAW: STRAW:
COMPUTE: True COMPUTE: True
FEATURES: ["countscans"] FEATURES: ["meanspeech", "stdspeech", "nlargest", "nsmallest", "medianspeech"]
SRC_SCRIPT: src/features/phone_speech/straw/main.py SRC_SCRIPT: src/features/phone_speech/straw/main.py
# See https://www.rapids.science/latest/features/phone-keyboard/ # See https://www.rapids.science/latest/features/phone-keyboard/

View File

@ -1,67 +1,30 @@
import pandas as pd import pandas as pd
# TODO: adjust features files
QUESTIONNAIRE_IDS = {
"sleep_quality": 1,
"PANAS_positive_affect": 8,
"PANAS_negative_affect": 9,
"JCQ_job_demand": 10,
"JCQ_job_control": 11,
"JCQ_supervisor_support": 12,
"JCQ_coworker_support": 13,
"PFITS_supervisor": 14,
"PFITS_coworkers": 15,
"UWES_vigor": 16,
"UWES_dedication": 17,
"UWES_absorption": 18,
"COPE_active": 19,
"COPE_support": 20,
"COPE_emotions": 21,
"balance_life_work": 22,
"balance_work_life": 23,
"recovery_experience_detachment": 24,
"recovery_experience_relaxation": 25,
"symptoms": 26,
"appraisal_stressfulness_event": 87,
"appraisal_threat": 88,
"appraisal_challenge": 89,
"appraisal_event_time": 90,
"appraisal_event_duration": 91,
"appraisal_event_work_related": 92,
"appraisal_stressfulness_period": 93,
"late_work": 94,
"work_hours": 95,
"left_work": 96,
"activities": 97,
"coffee_breaks": 98,
"at_work_yet": 99,
}
def straw_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): def straw_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
esm_data = pd.read_csv(sensor_data_files["sensor_data"]) speech_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_features = provider["FEATURES"] requested_features = provider["FEATURES"]
# name of the features this function can compute # name of the features this function can compute+
requested_scales = provider["SCALES"] base_features_names = ["meanspeech", "stdspeech", "nlargest", "nsmallest", "medianspeech"]
base_features_names = ["PANAS_positive_affect", "PANAS_negative_affect", "JCQ_job_demand", "JCQ_job_control", "JCQ_supervisor_support", "JCQ_coworker_support",
"appraisal_stressfulness_period", "appraisal_stressfulness_event", "appraisal_threat", "appraisal_challenge"]
#TODO Check valid questionnaire and feature names.
# the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names)) features_to_compute = list(set(requested_features) & set(base_features_names))
esm_features = pd.DataFrame(columns=["local_segment"] + features_to_compute) speech_features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
if not esm_data.empty:
esm_data = filter_data_by_segment(esm_data, time_segment)
if not esm_data.empty: if not speech_data.empty:
esm_features = pd.DataFrame() speech_data = filter_data_by_segment(speech_data, time_segment)
for scale in requested_scales:
questionnaire_id = QUESTIONNAIRE_IDS[scale]
mask = esm_data["questionnaire_id"] == questionnaire_id
esm_features[scale + "_mean"] = esm_data.loc[mask].groupby(["local_segment"])["esm_user_score"].mean()
#TODO Create the column esm_user_score in esm_clean. Currently, this is only done when reversing.
esm_features = esm_features.reset_index() if not speech_data.empty:
if 'index' in esm_features: # In calse of empty esm_features df speech_features = pd.DataFrame()
esm_features.rename(columns={'index': 'local_segment'}, inplace=True) if "meanspeech" in features_to_compute:
speech_features["meanspeech"] = speech_data.groupby(["local_segment"])['speech_proportion'].mean()
if "stdspeech" in features_to_compute:
speech_features["stdspeech"] = speech_data.groupby(["local_segment"])['speech_proportion'].std()
if "nlargest" in features_to_compute:
speech_features["nlargest"] = speech_data.groupby(["local_segment"])['speech_proportion'].apply(lambda x: x.nlargest(5).mean())
if "nsmallest" in features_to_compute:
speech_features["nsmallest"] = speech_data.groupby(["local_segment"])['speech_proportion'].apply(lambda x: x.nsmallest(5).mean())
if "medianspeech" in features_to_compute:
speech_features["medianspeech"] = speech_data.groupby(["local_segment"])['speech_proportion'].median()
return esm_features speech_features = speech_features.reset_index()
return speech_features