From 98ebf9bd13be31bd4a298d9a300f837f4b249f9b Mon Sep 17 00:00:00 2001 From: JulioV Date: Tue, 1 Sep 2020 17:07:07 -0400 Subject: [PATCH] Migrate conversation to new file structure --- Snakefile | 18 +-- config.yaml | 21 +-- rules/features.smk | 27 ++-- .../conversation/conversation_base.py | 117 ---------------- .../conversation/conversation_entry.R | 13 ++ .../conversation/conversation_entry.py | 18 +++ src/features/conversation/rapids/main.py | 128 ++++++++++++++++++ 7 files changed, 200 insertions(+), 142 deletions(-) delete mode 100644 src/features/conversation/conversation_base.py create mode 100644 src/features/conversation/conversation_entry.R create mode 100644 src/features/conversation/conversation_entry.py create mode 100644 src/features/conversation/rapids/main.py diff --git a/Snakefile b/Snakefile index 4719df9c..4154a72d 100644 --- a/Snakefile +++ b/Snakefile @@ -135,15 +135,17 @@ if config["SLEEP"]["COMPUTE"]: files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"])) files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"])) -if config["CONVERSATION"]["COMPUTE"]: - pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"])) - pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"])) +pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"])) +pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"])) - for pids,table in zip([pids_android, pids_ios], [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"]]): - files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table)) - files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table)) - files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table)) - files_to_compute.extend(expand("data/processed/{pid}/conversation_{day_segment}.csv",pid=config["PIDS"], day_segment = config["CONVERSATION"]["DAY_SEGMENTS"])) +for provider in config["CONVERSATION"]["PROVIDERS"].keys(): + if config["CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]: + for pids,table in zip([pids_android, pids_ios], [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"]]): + files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table)) + files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table)) + files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table)) + files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="CONVERSATION".lower())) + files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="CONVERSATION".lower())) for provider in config["LOCATIONS"]["PROVIDERS"].keys(): if config["LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]: diff --git a/config.yaml b/config.yaml index 0ef95d12..851e2f31 100644 --- a/config.yaml +++ b/config.yaml @@ -214,18 +214,21 @@ WIFI: SRC_LANGUAGE: "r" CONVERSATION: - COMPUTE: False DB_TABLE: ANDROID: plugin_studentlife_audio_android IOS: plugin_studentlife_audio - DAY_SEGMENTS: *day_segments - FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration", - "sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy", - "avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction", - "voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction", - "unknownexpectedfraction","countconversation"] - RECORDINGMINUTES: 1 - PAUSEDMINUTES : 3 + PROVIDERS: + RAPIDS: + COMPUTE: True + FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration", + "sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy", + "avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction", + "voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction", + "unknownexpectedfraction","countconversation"] + RECORDING_MINUTES: 1 + PAUSED_MINUTES : 3 + SRC_FOLDER: "rapids" # inside src/features/conversation + SRC_LANGUAGE: "python" ### Visualizations ################################################################ HEATMAP_FEATURES_CORRELATIONS: diff --git a/rules/features.smk b/rules/features.smk index 56c96972..1b945469 100644 --- a/rules/features.smk +++ b/rules/features.smk @@ -197,18 +197,29 @@ rule light_python_features: script: "../src/features/light/light_entry.py" -rule conversation_features: +rule conversation_r_features: input: - optional_conversation_input + sensor_data = optional_conversation_input, + day_segments_labels = "data/interim/day_segments_labels.csv" params: - day_segment = "{day_segment}", - features = config["CONVERSATION"]["FEATURES"], - recordingMinutes = config["CONVERSATION"]["RECORDINGMINUTES"], - pausedMinutes = config["CONVERSATION"]["PAUSEDMINUTES"], + provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key], + provider_key = "{provider_key}" output: - "data/processed/{pid}/conversation_{day_segment}.csv" + "data/interim/{pid}/conversation_features/conversation_r_{provider_key}.csv" script: - "../src/features/conversation_features.py" + "../src/features/conversation/conversation_entry.R" + +rule conversation_python_features: + input: + sensor_data = optional_conversation_input, + day_segments_labels = "data/interim/day_segments_labels.csv" + params: + provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key], + provider_key = "{provider_key}" + output: + "data/interim/{pid}/conversation_features/conversation_python_{provider_key}.csv" + script: + "../src/features/conversation/conversation_entry.py" rule accelerometer_features: input: diff --git a/src/features/conversation/conversation_base.py b/src/features/conversation/conversation_base.py deleted file mode 100644 index eb00c64b..00000000 --- a/src/features/conversation/conversation_base.py +++ /dev/null @@ -1,117 +0,0 @@ -import pandas as pd - -def base_conversation_features(conversation_data, day_segment, requested_features,recordingMinutes,pausedMinutes,expectedMinutes): - # name of the features this function can compute - base_features_names = ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration", - "sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy", - "avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction", - "voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction", - "unknownexpectedfraction","countconversation"] - - # the subset of requested features this function can compute - features_to_compute = list(set(requested_features) & set(base_features_names)) - - conversation_features = pd.DataFrame(columns=["local_date"] + ["conversation_" + day_segment + "_" + x for x in features_to_compute]) - if not conversation_data.empty: - if day_segment != "daily": - conversation_data = conversation_data[conversation_data["local_day_segment"] == day_segment] - - if not conversation_data.empty: - conversation_features = pd.DataFrame() - - conversation_data = conversation_data.drop_duplicates(subset="local_time", keep="first") - - if "minutessilence" in features_to_compute: - conversation_features["conversation_" + day_segment + "_minutessilence"] = conversation_data[conversation_data['inference']==0].groupby(["local_date"])['inference'].count()/60 - - if "minutesnoise" in features_to_compute: - conversation_features["conversation_" + day_segment + "_minutesnoise"] = conversation_data[conversation_data['inference']==1].groupby(["local_date"])['inference'].count()/60 - - if "minutesvoice" in features_to_compute: - conversation_features["conversation_" + day_segment + "_minutesvoice"] = conversation_data[conversation_data['inference']==2].groupby(["local_date"])['inference'].count()/60 - - if "minutesunknown" in features_to_compute: - conversation_features["conversation_" + day_segment + "_minutesunknown"] = conversation_data[conversation_data['inference']==3].groupby(["local_date"])['inference'].count()/60 - - if "countconversation" in features_to_compute: - conversation_features["conversation_" + day_segment + "_countconversation"] = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_date"])['double_convo_start'].nunique() - - conv_duration = (conversation_data['double_convo_end']/1000 - conversation_data['double_convo_start']/1000)/60 - conversation_data = conversation_data.assign(conv_duration = conv_duration.values) - - conv_totalDuration = conversation_data[(conversation_data['inference'] >= 0) & (conversation_data['inference'] < 4)].groupby(["local_date"])['inference'].count()/60 - - if "silencesensedfraction" in features_to_compute: - conversation_features["conversation_" + day_segment + "_silencesensedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_date"])['inference'].count()/60)/ conv_totalDuration - - if "noisesensedfraction" in features_to_compute: - conversation_features["conversation_" + day_segment + "_noisesensedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_date"])['inference'].count()/60)/ conv_totalDuration - - if "voicesensedfraction" in features_to_compute: - conversation_features["conversation_" + day_segment + "_voicesensedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_date"])['inference'].count()/60)/ conv_totalDuration - - if "unknownsensedfraction" in features_to_compute: - conversation_features["conversation_" + day_segment + "_unknownsensedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_date"])['inference'].count()/60)/ conv_totalDuration - - if "silenceexpectedfraction" in features_to_compute: - conversation_features["conversation_" + day_segment + "_silenceexpectedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_date"])['inference'].count()/60)/ expectedMinutes - - if "noiseexpectedfraction" in features_to_compute: - conversation_features["conversation_" + day_segment + "_noiseexpectedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_date"])['inference'].count()/60)/ expectedMinutes - - if "voiceexpectedfraction" in features_to_compute: - conversation_features["conversation_" + day_segment + "_voiceexpectedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_date"])['inference'].count()/60)/ expectedMinutes - - if "unknownexpectedfraction" in features_to_compute: - conversation_features["conversation_" + day_segment + "_unknownexpectedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_date"])['inference'].count()/60)/ expectedMinutes - - if "sumconversationduration" in features_to_compute: - conversation_features["conversation_" + day_segment + "_sumconversationduration"] = conversation_data.groupby(["local_date"])["conv_duration"].sum() - - if "avgconversationduration" in features_to_compute: - conversation_features["conversation_" + day_segment + "_avgconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_date"])["conv_duration"].mean() - - if "sdconversationduration" in features_to_compute: - conversation_features["conversation_" + day_segment + "_sdconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_date"])["conv_duration"].std() - - if "minconversationduration" in features_to_compute: - conversation_features["conversation_" + day_segment + "_minconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_date"])["conv_duration"].min() - - if "maxconversationduration" in features_to_compute: - conversation_features["conversation_" + day_segment + "_maxconversationduration"] = conversation_data.groupby(["local_date"])["conv_duration"].max() - - if "timefirstconversation" in features_to_compute: - timeFirstConversation = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_date"])['local_time'].min() - if len(list(timeFirstConversation.index)) > 0: - for date in list(timeFirstConversation.index): - conversation_features.loc[date,"conversation_" + day_segment + "_timefirstconversation"] = int(timeFirstConversation.loc[date].split(':')[0])*60 + int(timeFirstConversation.loc[date].split(':')[1]) - else: - conversation_features["conversation_" + day_segment + "_timefirstconversation"] = 0 - - if "timelastconversation" in features_to_compute: - timeLastConversation = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_date"])['local_time'].max() - if len(list(timeLastConversation.index)) > 0: - for date in list(timeLastConversation.index): - conversation_features.loc[date,"conversation_" + day_segment + "_timelastconversation"] = int(timeLastConversation.loc[date].split(':')[0])*60 + int(timeLastConversation.loc[date].split(':')[1]) - else: - conversation_features["conversation_" + day_segment + "_timelastconversation"] = 0 - - if "sumenergy" in features_to_compute: - conversation_features["conversation_" + day_segment + "_sumenergy"] = conversation_data.groupby(["local_date"])["double_energy"].sum() - - if "avgenergy" in features_to_compute: - conversation_features["conversation_" + day_segment + "_avgenergy"] = conversation_data.groupby(["local_date"])["double_energy"].mean() - - if "sdenergy" in features_to_compute: - conversation_features["conversation_" + day_segment + "_sdenergy"] = conversation_data.groupby(["local_date"])["double_energy"].std() - - if "minenergy" in features_to_compute: - conversation_features["conversation_" + day_segment + "_minenergy"] = conversation_data.groupby(["local_date"])["double_energy"].min() - - if "maxenergy" in features_to_compute: - conversation_features["conversation_" + day_segment + "_maxenergy"] = conversation_data.groupby(["local_date"])["double_energy"].max() - - - conversation_features = conversation_features.reset_index() - - return conversation_features \ No newline at end of file diff --git a/src/features/conversation/conversation_entry.R b/src/features/conversation/conversation_entry.R new file mode 100644 index 00000000..bf5d4fe9 --- /dev/null +++ b/src/features/conversation/conversation_entry.R @@ -0,0 +1,13 @@ +source("renv/activate.R") +source("src/features/utils/utils.R") +library("dplyr") +library("tidyr") + +sensor_data_file <- snakemake@input[["sensor_data"]] +day_segments_file <- snakemake@input[["day_segments_labels"]] +provider <- snakemake@params["provider"][["provider"]] +provider_key <- snakemake@params["provider_key"] + +sensor_features <- fetch_provider_features(provider, provider_key, "conversation", sensor_data_file, day_segments_file) + +write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE) diff --git a/src/features/conversation/conversation_entry.py b/src/features/conversation/conversation_entry.py new file mode 100644 index 00000000..41eee92d --- /dev/null +++ b/src/features/conversation/conversation_entry.py @@ -0,0 +1,18 @@ +import pandas as pd +from importlib import import_module, util +from pathlib import Path + +# import fetch_provider_features from src/features/utils/utils.py +spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py")) +mod = util.module_from_spec(spec) +spec.loader.exec_module(mod) +fetch_provider_features = getattr(mod, "fetch_provider_features") + +sensor_data_file = snakemake.input["sensor_data"][0] +day_segments_file = snakemake.input["day_segments_labels"] +provider = snakemake.params["provider"] +provider_key = snakemake.params["provider_key"] + +sensor_features = fetch_provider_features(provider, provider_key, "conversation", sensor_data_file, day_segments_file) + +sensor_features.to_csv(snakemake.output[0], index=False) \ No newline at end of file diff --git a/src/features/conversation/rapids/main.py b/src/features/conversation/rapids/main.py new file mode 100644 index 00000000..4b8dc390 --- /dev/null +++ b/src/features/conversation/rapids/main.py @@ -0,0 +1,128 @@ +import pandas as pd +import numpy as np + +# def rapids_features(conversation_data, day_segment, requested_features,recordingMinutes,pausedMinutes,expectedMinutes): +def rapids_features(conversation_data, day_segment, provider, filter_data_by_segment, *args, **kwargs): + + requested_features = provider["FEATURES"] + recordingMinutes = provider["RECORDING_MINUTES"] + pausedMinutes = provider["PAUSED_MINUTES"] + expectedMinutes = 1440 / (recordingMinutes + pausedMinutes) + + # name of the features this function can compute + base_features_names = ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration", + "sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy", + "avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction", + "voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction", + "unknownexpectedfraction","countconversation"] + + # the subset of requested features this function can compute + features_to_compute = list(set(requested_features) & set(base_features_names)) + + conversation_features = pd.DataFrame(columns=["local_segment"] + ["conversation_rapids" + "_" + x for x in features_to_compute]) + if not conversation_data.empty: + conversation_data = filter_data_by_segment(conversation_data, day_segment) + + if not conversation_data.empty: + conversation_features = pd.DataFrame() + + conversation_data = conversation_data.drop_duplicates(subset=["local_date", "local_time"], keep="first") + + if "minutessilence" in features_to_compute: + conversation_features["conversation_rapids" + "_minutessilence"] = conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60 + + if "minutesnoise" in features_to_compute: + conversation_features["conversation_rapids" + "_minutesnoise"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60 + + if "minutesvoice" in features_to_compute: + conversation_features["conversation_rapids" + "_minutesvoice"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60 + + if "minutesunknown" in features_to_compute: + conversation_features["conversation_rapids" + "_minutesunknown"] = conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60 + + if "countconversation" in features_to_compute: + conversation_features["conversation_rapids" + "_countconversation"] = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['double_convo_start'].nunique() + + conv_duration = (conversation_data['double_convo_end']/1000 - conversation_data['double_convo_start']/1000)/60 + conversation_data = conversation_data.assign(conv_duration = conv_duration.values) + + conv_totalDuration = conversation_data[(conversation_data['inference'] >= 0) & (conversation_data['inference'] < 4)].groupby(["local_segment"])['inference'].count()/60 + + if "silencesensedfraction" in features_to_compute: + conversation_features["conversation_rapids" + "_silencesensedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration + + if "noisesensedfraction" in features_to_compute: + conversation_features["conversation_rapids" + "_noisesensedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration + + if "voicesensedfraction" in features_to_compute: + conversation_features["conversation_rapids" + "_voicesensedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration + + if "unknownsensedfraction" in features_to_compute: + conversation_features["conversation_rapids" + "_unknownsensedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration + + if "silenceexpectedfraction" in features_to_compute: + conversation_features["conversation_rapids" + "_silenceexpectedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes + + if "noiseexpectedfraction" in features_to_compute: + conversation_features["conversation_rapids" + "_noiseexpectedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes + + if "voiceexpectedfraction" in features_to_compute: + conversation_features["conversation_rapids" + "_voiceexpectedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes + + if "unknownexpectedfraction" in features_to_compute: + conversation_features["conversation_rapids" + "_unknownexpectedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes + + if "sumconversationduration" in features_to_compute: + conversation_features["conversation_rapids" + "_sumconversationduration"] = conversation_data.groupby(["local_segment"])["conv_duration"].sum() + + if "avgconversationduration" in features_to_compute: + conversation_features["conversation_rapids" + "_avgconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].mean() + + if "sdconversationduration" in features_to_compute: + conversation_features["conversation_rapids" + "_sdconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].std() + + if "minconversationduration" in features_to_compute: + conversation_features["conversation_rapids" + "_minconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].min() + + if "maxconversationduration" in features_to_compute: + conversation_features["conversation_rapids" + "_maxconversationduration"] = conversation_data.groupby(["local_segment"])["conv_duration"].max() + + if "timefirstconversation" in features_to_compute: + timestampsLastConversation = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['timestamp'].min() + if len(list(timestampsLastConversation.index)) > 0: + for date in list(timestampsLastConversation.index): + lastimestamp = timestampsLastConversation.loc[date] + lasttime = (conversation_data.query('timestamp == @lastimestamp', inplace = False))['local_time'].iat[0] + conversation_features.loc[date,"conversation_rapids" + "_timefirstconversation"] = int(lasttime.split(':')[0])*60 + int(lasttime.split(':')[1]) + else: + conversation_features["conversation_rapids" + "_timefirstconversation"] = np.nan + + if "timelastconversation" in features_to_compute: + timestampsLastConversation = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['timestamp'].max() + if len(list(timestampsLastConversation.index)) > 0: + for date in list(timestampsLastConversation.index): + lastimestamp = timestampsLastConversation.loc[date] + lasttime = (conversation_data.query('timestamp == @lastimestamp', inplace = False))['local_time'].iat[0] + conversation_features.loc[date,"conversation_rapids" + "_timelastconversation"] = int(lasttime.split(':')[0])*60 + int(lasttime.split(':')[1]) + else: + conversation_features["conversation_rapids" + "_timelastconversation"] = np.nan + + if "sumenergy" in features_to_compute: + conversation_features["conversation_rapids" + "_sumenergy"] = conversation_data.groupby(["local_segment"])["double_energy"].sum() + + if "avgenergy" in features_to_compute: + conversation_features["conversation_rapids" + "_avgenergy"] = conversation_data.groupby(["local_segment"])["double_energy"].mean() + + if "sdenergy" in features_to_compute: + conversation_features["conversation_rapids" + "_sdenergy"] = conversation_data.groupby(["local_segment"])["double_energy"].std() + + if "minenergy" in features_to_compute: + conversation_features["conversation_rapids" + "_minenergy"] = conversation_data.groupby(["local_segment"])["double_energy"].min() + + if "maxenergy" in features_to_compute: + conversation_features["conversation_rapids" + "_maxenergy"] = conversation_data.groupby(["local_segment"])["double_energy"].max() + + + conversation_features = conversation_features.reset_index() + + return conversation_features \ No newline at end of file