From 9b21196f35317872d2fa8a87efa826bdd1e5be1d Mon Sep 17 00:00:00 2001 From: Nikunj Goel Date: Wed, 26 May 2021 11:44:48 -0400 Subject: [PATCH] Fixed `expected_minutes` to account for different time segments. (#136) --- src/features/phone_conversation/rapids/main.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/features/phone_conversation/rapids/main.py b/src/features/phone_conversation/rapids/main.py index 902cad69..c842a1cf 100644 --- a/src/features/phone_conversation/rapids/main.py +++ b/src/features/phone_conversation/rapids/main.py @@ -7,9 +7,7 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se requested_features = provider["FEATURES"] recordingMinutes = provider["RECORDING_MINUTES"] - pausedMinutes = provider["PAUSED_MINUTES"] - expectedMinutes = 1440 / (recordingMinutes + pausedMinutes) - + pausedMinutes = provider["PAUSED_MINUTES"] # name of the features this function can compute base_features_names = ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration", "sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","noisesumenergy", @@ -29,6 +27,9 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se conversation_features = pd.DataFrame() conversation_data = conversation_data.drop_duplicates(subset=["local_date", "local_time"], keep="first") + conversation_data[['start_ts','end_ts']] = conversation_data['timestamps_segment'].str.split(',',expand=True) + expectedMinutesDf = conversation_data[['local_segment','start_ts','end_ts']].drop_duplicates(subset=['local_segment']).set_index(['local_segment']) + expectedMinutes = (expectedMinutesDf['end_ts'].astype(int) - expectedMinutesDf['start_ts'].astype(int)) / ((60000) *(recordingMinutes + pausedMinutes)) if "minutessilence" in features_to_compute: conversation_features["minutessilence"] = conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60