Fixed `expected_minutes` to account for different time segments. (#136)

feature/aware_sql_split
Nikunj Goel 2021-05-26 11:44:48 -04:00 committed by GitHub
parent 772e114eb5
commit 9b21196f35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 4 additions and 3 deletions

View File

@ -8,8 +8,6 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
requested_features = provider["FEATURES"] requested_features = provider["FEATURES"]
recordingMinutes = provider["RECORDING_MINUTES"] recordingMinutes = provider["RECORDING_MINUTES"]
pausedMinutes = provider["PAUSED_MINUTES"] pausedMinutes = provider["PAUSED_MINUTES"]
expectedMinutes = 1440 / (recordingMinutes + pausedMinutes)
# name of the features this function can compute # name of the features this function can compute
base_features_names = ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration", base_features_names = ["minutessilence", "minutesnoise", "minutesvoice", "minutesunknown","sumconversationduration","avgconversationduration",
"sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","noisesumenergy", "sdconversationduration","minconversationduration","maxconversationduration","timefirstconversation","timelastconversation","noisesumenergy",
@ -29,6 +27,9 @@ def rapids_features(sensor_data_files, time_segment, provider, filter_data_by_se
conversation_features = pd.DataFrame() conversation_features = pd.DataFrame()
conversation_data = conversation_data.drop_duplicates(subset=["local_date", "local_time"], keep="first") conversation_data = conversation_data.drop_duplicates(subset=["local_date", "local_time"], keep="first")
conversation_data[['start_ts','end_ts']] = conversation_data['timestamps_segment'].str.split(',',expand=True)
expectedMinutesDf = conversation_data[['local_segment','start_ts','end_ts']].drop_duplicates(subset=['local_segment']).set_index(['local_segment'])
expectedMinutes = (expectedMinutesDf['end_ts'].astype(int) - expectedMinutesDf['start_ts'].astype(int)) / ((60000) *(recordingMinutes + pausedMinutes))
if "minutessilence" in features_to_compute: if "minutessilence" in features_to_compute:
conversation_features["minutessilence"] = conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60 conversation_features["minutessilence"] = conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60