Migrate conversation to new file structure
parent
8d87f6e497
commit
98ebf9bd13
|
@ -135,15 +135,17 @@ if config["SLEEP"]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"]))
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_{fitbit_data_type}_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["intraday", "summary"]))
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"]))
|
files_to_compute.extend(expand("data/processed/{pid}/fitbit_sleep_{day_segment}.csv", pid = config["PIDS"], day_segment = config["SLEEP"]["DAY_SEGMENTS"]))
|
||||||
|
|
||||||
if config["CONVERSATION"]["COMPUTE"]:
|
|
||||||
pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
|
pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
|
||||||
pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
|
pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
|
||||||
|
|
||||||
|
for provider in config["CONVERSATION"]["PROVIDERS"].keys():
|
||||||
|
if config["CONVERSATION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
for pids,table in zip([pids_android, pids_ios], [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"]]):
|
for pids,table in zip([pids_android, pids_ios], [config["CONVERSATION"]["DB_TABLE"]["ANDROID"], config["CONVERSATION"]["DB_TABLE"]["IOS"]]):
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table))
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table))
|
||||||
files_to_compute.extend(expand("data/processed/{pid}/conversation_{day_segment}.csv",pid=config["PIDS"], day_segment = config["CONVERSATION"]["DAY_SEGMENTS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["CONVERSATION"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="CONVERSATION".lower()))
|
||||||
|
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="CONVERSATION".lower()))
|
||||||
|
|
||||||
for provider in config["LOCATIONS"]["PROVIDERS"].keys():
|
for provider in config["LOCATIONS"]["PROVIDERS"].keys():
|
||||||
if config["LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
|
|
11
config.yaml
11
config.yaml
|
@ -214,18 +214,21 @@ WIFI:
|
||||||
SRC_LANGUAGE: "r"
|
SRC_LANGUAGE: "r"
|
||||||
|
|
||||||
CONVERSATION:
|
CONVERSATION:
|
||||||
COMPUTE: False
|
|
||||||
DB_TABLE:
|
DB_TABLE:
|
||||||
ANDROID: plugin_studentlife_audio_android
|
ANDROID: plugin_studentlife_audio_android
|
||||||
IOS: plugin_studentlife_audio
|
IOS: plugin_studentlife_audio
|
||||||
DAY_SEGMENTS: *day_segments
|
PROVIDERS:
|
||||||
|
RAPIDS:
|
||||||
|
COMPUTE: True
|
||||||
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
FEATURES: ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
||||||
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
|
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
|
||||||
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
|
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
|
||||||
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
|
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
|
||||||
"unknownexpectedfraction","countconversation"]
|
"unknownexpectedfraction","countconversation"]
|
||||||
RECORDINGMINUTES: 1
|
RECORDING_MINUTES: 1
|
||||||
PAUSEDMINUTES : 3
|
PAUSED_MINUTES : 3
|
||||||
|
SRC_FOLDER: "rapids" # inside src/features/conversation
|
||||||
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
### Visualizations ################################################################
|
### Visualizations ################################################################
|
||||||
HEATMAP_FEATURES_CORRELATIONS:
|
HEATMAP_FEATURES_CORRELATIONS:
|
||||||
|
|
|
@ -197,18 +197,29 @@ rule light_python_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/light/light_entry.py"
|
"../src/features/light/light_entry.py"
|
||||||
|
|
||||||
rule conversation_features:
|
rule conversation_r_features:
|
||||||
input:
|
input:
|
||||||
optional_conversation_input
|
sensor_data = optional_conversation_input,
|
||||||
|
day_segments_labels = "data/interim/day_segments_labels.csv"
|
||||||
params:
|
params:
|
||||||
day_segment = "{day_segment}",
|
provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
|
||||||
features = config["CONVERSATION"]["FEATURES"],
|
provider_key = "{provider_key}"
|
||||||
recordingMinutes = config["CONVERSATION"]["RECORDINGMINUTES"],
|
|
||||||
pausedMinutes = config["CONVERSATION"]["PAUSEDMINUTES"],
|
|
||||||
output:
|
output:
|
||||||
"data/processed/{pid}/conversation_{day_segment}.csv"
|
"data/interim/{pid}/conversation_features/conversation_r_{provider_key}.csv"
|
||||||
script:
|
script:
|
||||||
"../src/features/conversation_features.py"
|
"../src/features/conversation/conversation_entry.R"
|
||||||
|
|
||||||
|
rule conversation_python_features:
|
||||||
|
input:
|
||||||
|
sensor_data = optional_conversation_input,
|
||||||
|
day_segments_labels = "data/interim/day_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
|
||||||
|
provider_key = "{provider_key}"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/conversation_features/conversation_python_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/conversation/conversation_entry.py"
|
||||||
|
|
||||||
rule accelerometer_features:
|
rule accelerometer_features:
|
||||||
input:
|
input:
|
||||||
|
|
|
@ -1,117 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
def base_conversation_features(conversation_data, day_segment, requested_features,recordingMinutes,pausedMinutes,expectedMinutes):
|
|
||||||
# name of the features this function can compute
|
|
||||||
base_features_names = ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
|
||||||
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
|
|
||||||
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
|
|
||||||
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
|
|
||||||
"unknownexpectedfraction","countconversation"]
|
|
||||||
|
|
||||||
# the subset of requested features this function can compute
|
|
||||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
|
||||||
|
|
||||||
conversation_features = pd.DataFrame(columns=["local_date"] + ["conversation_" + day_segment + "_" + x for x in features_to_compute])
|
|
||||||
if not conversation_data.empty:
|
|
||||||
if day_segment != "daily":
|
|
||||||
conversation_data = conversation_data[conversation_data["local_day_segment"] == day_segment]
|
|
||||||
|
|
||||||
if not conversation_data.empty:
|
|
||||||
conversation_features = pd.DataFrame()
|
|
||||||
|
|
||||||
conversation_data = conversation_data.drop_duplicates(subset="local_time", keep="first")
|
|
||||||
|
|
||||||
if "minutessilence" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_minutessilence"] = conversation_data[conversation_data['inference']==0].groupby(["local_date"])['inference'].count()/60
|
|
||||||
|
|
||||||
if "minutesnoise" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_minutesnoise"] = conversation_data[conversation_data['inference']==1].groupby(["local_date"])['inference'].count()/60
|
|
||||||
|
|
||||||
if "minutesvoice" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_minutesvoice"] = conversation_data[conversation_data['inference']==2].groupby(["local_date"])['inference'].count()/60
|
|
||||||
|
|
||||||
if "minutesunknown" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_minutesunknown"] = conversation_data[conversation_data['inference']==3].groupby(["local_date"])['inference'].count()/60
|
|
||||||
|
|
||||||
if "countconversation" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_countconversation"] = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_date"])['double_convo_start'].nunique()
|
|
||||||
|
|
||||||
conv_duration = (conversation_data['double_convo_end']/1000 - conversation_data['double_convo_start']/1000)/60
|
|
||||||
conversation_data = conversation_data.assign(conv_duration = conv_duration.values)
|
|
||||||
|
|
||||||
conv_totalDuration = conversation_data[(conversation_data['inference'] >= 0) & (conversation_data['inference'] < 4)].groupby(["local_date"])['inference'].count()/60
|
|
||||||
|
|
||||||
if "silencesensedfraction" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_silencesensedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_date"])['inference'].count()/60)/ conv_totalDuration
|
|
||||||
|
|
||||||
if "noisesensedfraction" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_noisesensedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_date"])['inference'].count()/60)/ conv_totalDuration
|
|
||||||
|
|
||||||
if "voicesensedfraction" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_voicesensedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_date"])['inference'].count()/60)/ conv_totalDuration
|
|
||||||
|
|
||||||
if "unknownsensedfraction" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_unknownsensedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_date"])['inference'].count()/60)/ conv_totalDuration
|
|
||||||
|
|
||||||
if "silenceexpectedfraction" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_silenceexpectedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_date"])['inference'].count()/60)/ expectedMinutes
|
|
||||||
|
|
||||||
if "noiseexpectedfraction" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_noiseexpectedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_date"])['inference'].count()/60)/ expectedMinutes
|
|
||||||
|
|
||||||
if "voiceexpectedfraction" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_voiceexpectedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_date"])['inference'].count()/60)/ expectedMinutes
|
|
||||||
|
|
||||||
if "unknownexpectedfraction" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_unknownexpectedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_date"])['inference'].count()/60)/ expectedMinutes
|
|
||||||
|
|
||||||
if "sumconversationduration" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_sumconversationduration"] = conversation_data.groupby(["local_date"])["conv_duration"].sum()
|
|
||||||
|
|
||||||
if "avgconversationduration" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_avgconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_date"])["conv_duration"].mean()
|
|
||||||
|
|
||||||
if "sdconversationduration" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_sdconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_date"])["conv_duration"].std()
|
|
||||||
|
|
||||||
if "minconversationduration" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_minconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_date"])["conv_duration"].min()
|
|
||||||
|
|
||||||
if "maxconversationduration" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_maxconversationduration"] = conversation_data.groupby(["local_date"])["conv_duration"].max()
|
|
||||||
|
|
||||||
if "timefirstconversation" in features_to_compute:
|
|
||||||
timeFirstConversation = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_date"])['local_time'].min()
|
|
||||||
if len(list(timeFirstConversation.index)) > 0:
|
|
||||||
for date in list(timeFirstConversation.index):
|
|
||||||
conversation_features.loc[date,"conversation_" + day_segment + "_timefirstconversation"] = int(timeFirstConversation.loc[date].split(':')[0])*60 + int(timeFirstConversation.loc[date].split(':')[1])
|
|
||||||
else:
|
|
||||||
conversation_features["conversation_" + day_segment + "_timefirstconversation"] = 0
|
|
||||||
|
|
||||||
if "timelastconversation" in features_to_compute:
|
|
||||||
timeLastConversation = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_date"])['local_time'].max()
|
|
||||||
if len(list(timeLastConversation.index)) > 0:
|
|
||||||
for date in list(timeLastConversation.index):
|
|
||||||
conversation_features.loc[date,"conversation_" + day_segment + "_timelastconversation"] = int(timeLastConversation.loc[date].split(':')[0])*60 + int(timeLastConversation.loc[date].split(':')[1])
|
|
||||||
else:
|
|
||||||
conversation_features["conversation_" + day_segment + "_timelastconversation"] = 0
|
|
||||||
|
|
||||||
if "sumenergy" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_sumenergy"] = conversation_data.groupby(["local_date"])["double_energy"].sum()
|
|
||||||
|
|
||||||
if "avgenergy" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_avgenergy"] = conversation_data.groupby(["local_date"])["double_energy"].mean()
|
|
||||||
|
|
||||||
if "sdenergy" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_sdenergy"] = conversation_data.groupby(["local_date"])["double_energy"].std()
|
|
||||||
|
|
||||||
if "minenergy" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_minenergy"] = conversation_data.groupby(["local_date"])["double_energy"].min()
|
|
||||||
|
|
||||||
if "maxenergy" in features_to_compute:
|
|
||||||
conversation_features["conversation_" + day_segment + "_maxenergy"] = conversation_data.groupby(["local_date"])["double_energy"].max()
|
|
||||||
|
|
||||||
|
|
||||||
conversation_features = conversation_features.reset_index()
|
|
||||||
|
|
||||||
return conversation_features
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
source("renv/activate.R")
|
||||||
|
source("src/features/utils/utils.R")
|
||||||
|
library("dplyr")
|
||||||
|
library("tidyr")
|
||||||
|
|
||||||
|
sensor_data_file <- snakemake@input[["sensor_data"]]
|
||||||
|
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||||
|
provider <- snakemake@params["provider"][["provider"]]
|
||||||
|
provider_key <- snakemake@params["provider_key"]
|
||||||
|
|
||||||
|
sensor_features <- fetch_provider_features(provider, provider_key, "conversation", sensor_data_file, day_segments_file)
|
||||||
|
|
||||||
|
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -0,0 +1,18 @@
|
||||||
|
import pandas as pd
|
||||||
|
from importlib import import_module, util
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# import fetch_provider_features from src/features/utils/utils.py
|
||||||
|
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||||
|
mod = util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mod)
|
||||||
|
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||||
|
|
||||||
|
sensor_data_file = snakemake.input["sensor_data"][0]
|
||||||
|
day_segments_file = snakemake.input["day_segments_labels"]
|
||||||
|
provider = snakemake.params["provider"]
|
||||||
|
provider_key = snakemake.params["provider_key"]
|
||||||
|
|
||||||
|
sensor_features = fetch_provider_features(provider, provider_key, "conversation", sensor_data_file, day_segments_file)
|
||||||
|
|
||||||
|
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -0,0 +1,128 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# def rapids_features(conversation_data, day_segment, requested_features,recordingMinutes,pausedMinutes,expectedMinutes):
|
||||||
|
def rapids_features(conversation_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||||
|
|
||||||
|
requested_features = provider["FEATURES"]
|
||||||
|
recordingMinutes = provider["RECORDING_MINUTES"]
|
||||||
|
pausedMinutes = provider["PAUSED_MINUTES"]
|
||||||
|
expectedMinutes = 1440 / (recordingMinutes + pausedMinutes)
|
||||||
|
|
||||||
|
# name of the features this function can compute
|
||||||
|
base_features_names = ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
|
||||||
|
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","sumenergy",
|
||||||
|
"avgenergy","sdenergy","minenergy","maxenergy","silencesensedfraction","noisesensedfraction",
|
||||||
|
"voicesensedfraction","unknownsensedfraction","silenceexpectedfraction","noiseexpectedfraction","voiceexpectedfraction",
|
||||||
|
"unknownexpectedfraction","countconversation"]
|
||||||
|
|
||||||
|
# the subset of requested features this function can compute
|
||||||
|
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||||
|
|
||||||
|
conversation_features = pd.DataFrame(columns=["local_segment"] + ["conversation_rapids" + "_" + x for x in features_to_compute])
|
||||||
|
if not conversation_data.empty:
|
||||||
|
conversation_data = filter_data_by_segment(conversation_data, day_segment)
|
||||||
|
|
||||||
|
if not conversation_data.empty:
|
||||||
|
conversation_features = pd.DataFrame()
|
||||||
|
|
||||||
|
conversation_data = conversation_data.drop_duplicates(subset=["local_date", "local_time"], keep="first")
|
||||||
|
|
||||||
|
if "minutessilence" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_minutessilence"] = conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60
|
||||||
|
|
||||||
|
if "minutesnoise" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_minutesnoise"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60
|
||||||
|
|
||||||
|
if "minutesvoice" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_minutesvoice"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60
|
||||||
|
|
||||||
|
if "minutesunknown" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_minutesunknown"] = conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60
|
||||||
|
|
||||||
|
if "countconversation" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_countconversation"] = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['double_convo_start'].nunique()
|
||||||
|
|
||||||
|
conv_duration = (conversation_data['double_convo_end']/1000 - conversation_data['double_convo_start']/1000)/60
|
||||||
|
conversation_data = conversation_data.assign(conv_duration = conv_duration.values)
|
||||||
|
|
||||||
|
conv_totalDuration = conversation_data[(conversation_data['inference'] >= 0) & (conversation_data['inference'] < 4)].groupby(["local_segment"])['inference'].count()/60
|
||||||
|
|
||||||
|
if "silencesensedfraction" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_silencesensedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
|
||||||
|
|
||||||
|
if "noisesensedfraction" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_noisesensedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
|
||||||
|
|
||||||
|
if "voicesensedfraction" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_voicesensedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
|
||||||
|
|
||||||
|
if "unknownsensedfraction" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_unknownsensedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
|
||||||
|
|
||||||
|
if "silenceexpectedfraction" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_silenceexpectedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
|
||||||
|
|
||||||
|
if "noiseexpectedfraction" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_noiseexpectedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
|
||||||
|
|
||||||
|
if "voiceexpectedfraction" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_voiceexpectedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
|
||||||
|
|
||||||
|
if "unknownexpectedfraction" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_unknownexpectedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
|
||||||
|
|
||||||
|
if "sumconversationduration" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_sumconversationduration"] = conversation_data.groupby(["local_segment"])["conv_duration"].sum()
|
||||||
|
|
||||||
|
if "avgconversationduration" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_avgconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].mean()
|
||||||
|
|
||||||
|
if "sdconversationduration" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_sdconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].std()
|
||||||
|
|
||||||
|
if "minconversationduration" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_minconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].min()
|
||||||
|
|
||||||
|
if "maxconversationduration" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_maxconversationduration"] = conversation_data.groupby(["local_segment"])["conv_duration"].max()
|
||||||
|
|
||||||
|
if "timefirstconversation" in features_to_compute:
|
||||||
|
timestampsLastConversation = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['timestamp'].min()
|
||||||
|
if len(list(timestampsLastConversation.index)) > 0:
|
||||||
|
for date in list(timestampsLastConversation.index):
|
||||||
|
lastimestamp = timestampsLastConversation.loc[date]
|
||||||
|
lasttime = (conversation_data.query('timestamp == @lastimestamp', inplace = False))['local_time'].iat[0]
|
||||||
|
conversation_features.loc[date,"conversation_rapids" + "_timefirstconversation"] = int(lasttime.split(':')[0])*60 + int(lasttime.split(':')[1])
|
||||||
|
else:
|
||||||
|
conversation_features["conversation_rapids" + "_timefirstconversation"] = np.nan
|
||||||
|
|
||||||
|
if "timelastconversation" in features_to_compute:
|
||||||
|
timestampsLastConversation = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['timestamp'].max()
|
||||||
|
if len(list(timestampsLastConversation.index)) > 0:
|
||||||
|
for date in list(timestampsLastConversation.index):
|
||||||
|
lastimestamp = timestampsLastConversation.loc[date]
|
||||||
|
lasttime = (conversation_data.query('timestamp == @lastimestamp', inplace = False))['local_time'].iat[0]
|
||||||
|
conversation_features.loc[date,"conversation_rapids" + "_timelastconversation"] = int(lasttime.split(':')[0])*60 + int(lasttime.split(':')[1])
|
||||||
|
else:
|
||||||
|
conversation_features["conversation_rapids" + "_timelastconversation"] = np.nan
|
||||||
|
|
||||||
|
if "sumenergy" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_sumenergy"] = conversation_data.groupby(["local_segment"])["double_energy"].sum()
|
||||||
|
|
||||||
|
if "avgenergy" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_avgenergy"] = conversation_data.groupby(["local_segment"])["double_energy"].mean()
|
||||||
|
|
||||||
|
if "sdenergy" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_sdenergy"] = conversation_data.groupby(["local_segment"])["double_energy"].std()
|
||||||
|
|
||||||
|
if "minenergy" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_minenergy"] = conversation_data.groupby(["local_segment"])["double_energy"].min()
|
||||||
|
|
||||||
|
if "maxenergy" in features_to_compute:
|
||||||
|
conversation_features["conversation_rapids" + "_maxenergy"] = conversation_data.groupby(["local_segment"])["double_energy"].max()
|
||||||
|
|
||||||
|
|
||||||
|
conversation_features = conversation_features.reset_index()
|
||||||
|
|
||||||
|
return conversation_features
|
Loading…
Reference in New Issue