From 6ab0ac5329d0be06bbe8357203422c7d981c325a Mon Sep 17 00:00:00 2001 From: Primoz Date: Wed, 26 Oct 2022 09:57:26 +0000 Subject: [PATCH] Optimize memory consumption with dtype definition while reading csv file. --- src/features/empatica_accelerometer/cr/main.py | 6 +++++- src/features/empatica_electrodermal_activity/cr/main.py | 6 +++++- src/features/empatica_inter_beat_interval/cr/main.py | 5 +++++ src/features/empatica_temperature/cr/main.py | 5 ++++- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/features/empatica_accelerometer/cr/main.py b/src/features/empatica_accelerometer/cr/main.py index 77d18bfe..332114ac 100644 --- a/src/features/empatica_accelerometer/cr/main.py +++ b/src/features/empatica_accelerometer/cr/main.py @@ -43,7 +43,11 @@ def extract_acc_features_from_intraday_data(acc_intraday_data, features, window_ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): - acc_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) + + data_types = {'local_timezone': 'str', 'device_id': 'str', 'timestamp': 'int64', 'double_values_0': int, + 'double_values_1': int, 'double_values_2': int, 'local_date_time': 'str', 'local_date': "str", + 'local_time': "str", 'local_hour': "str", 'local_minute': "str", 'assigned_segments': "str"} + acc_intraday_data = pd.read_csv(sensor_data_files["sensor_data"], dtype=data_types) requested_intraday_features = provider["FEATURES"] diff --git a/src/features/empatica_electrodermal_activity/cr/main.py b/src/features/empatica_electrodermal_activity/cr/main.py index 0b09f02b..8a3ac540 100644 --- a/src/features/empatica_electrodermal_activity/cr/main.py +++ b/src/features/empatica_electrodermal_activity/cr/main.py @@ -44,7 +44,11 @@ def extract_eda_features_from_intraday_data(eda_intraday_data, features, window_ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): - eda_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) + + data_types = {'local_timezone': 'str', 'device_id': 'str', 'timestamp': 'int64', 'electrodermal_activity': int, 'local_date_time': 'str', + 'local_date': "str", 'local_time': "str", 'local_hour': "str", 'local_minute': "str", 'assigned_segments': "str"} + + eda_intraday_data = pd.read_csv(sensor_data_files["sensor_data"], dtype=data_types) requested_intraday_features = provider["FEATURES"] diff --git a/src/features/empatica_inter_beat_interval/cr/main.py b/src/features/empatica_inter_beat_interval/cr/main.py index 803bf3a8..6413f1c0 100644 --- a/src/features/empatica_inter_beat_interval/cr/main.py +++ b/src/features/empatica_inter_beat_interval/cr/main.py @@ -50,6 +50,11 @@ def extract_ibi_features_from_intraday_data(ibi_intraday_data, features, window_ def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): + + data_types = {'local_timezone': 'str', 'device_id': 'str', 'timestamp': 'int64', 'inter_beat_interval': int, 'timings': 'int64', 'local_date_time': 'str', + 'local_date': "str", 'local_time': "str", 'local_hour': "str", 'local_minute': "str", 'assigned_segments': "str"} + + temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"], dtype=data_types) ibi_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) requested_intraday_features = provider["FEATURES"] diff --git a/src/features/empatica_temperature/cr/main.py b/src/features/empatica_temperature/cr/main.py index 36e720bd..8dea752f 100644 --- a/src/features/empatica_temperature/cr/main.py +++ b/src/features/empatica_temperature/cr/main.py @@ -37,7 +37,10 @@ def extract_temp_features_from_intraday_data(temperature_intraday_data, features def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): - temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) + data_types = {'local_timezone': 'str', 'device_id': 'str', 'timestamp': 'int64', 'temperature': int, 'local_date_time': 'str', + 'local_date': "str", 'local_time': "str", 'local_hour': "str", 'local_minute': "str", 'assigned_segments': "str"} + + temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"], dtype=data_types) requested_intraday_features = provider["FEATURES"]