Optimize memory consumption with dtype definition while reading csv file.

notes
Primoz 2022-10-26 09:57:26 +00:00
parent b92a3aa37a
commit 6ab0ac5329
4 changed files with 19 additions and 3 deletions

View File

@ -43,7 +43,11 @@ def extract_acc_features_from_intraday_data(acc_intraday_data, features, window_
def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
acc_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
data_types = {'local_timezone': 'str', 'device_id': 'str', 'timestamp': 'int64', 'double_values_0': int,
'double_values_1': int, 'double_values_2': int, 'local_date_time': 'str', 'local_date': "str",
'local_time': "str", 'local_hour': "str", 'local_minute': "str", 'assigned_segments': "str"}
acc_intraday_data = pd.read_csv(sensor_data_files["sensor_data"], dtype=data_types)
requested_intraday_features = provider["FEATURES"] requested_intraday_features = provider["FEATURES"]

View File

@ -44,7 +44,11 @@ def extract_eda_features_from_intraday_data(eda_intraday_data, features, window_
def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
eda_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
data_types = {'local_timezone': 'str', 'device_id': 'str', 'timestamp': 'int64', 'electrodermal_activity': int, 'local_date_time': 'str',
'local_date': "str", 'local_time': "str", 'local_hour': "str", 'local_minute': "str", 'assigned_segments': "str"}
eda_intraday_data = pd.read_csv(sensor_data_files["sensor_data"], dtype=data_types)
requested_intraday_features = provider["FEATURES"] requested_intraday_features = provider["FEATURES"]

View File

@ -50,6 +50,11 @@ def extract_ibi_features_from_intraday_data(ibi_intraday_data, features, window_
def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
data_types = {'local_timezone': 'str', 'device_id': 'str', 'timestamp': 'int64', 'inter_beat_interval': int, 'timings': 'int64', 'local_date_time': 'str',
'local_date': "str", 'local_time': "str", 'local_hour': "str", 'local_minute': "str", 'assigned_segments': "str"}
temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"], dtype=data_types)
ibi_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) ibi_intraday_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_intraday_features = provider["FEATURES"] requested_intraday_features = provider["FEATURES"]

View File

@ -37,7 +37,10 @@ def extract_temp_features_from_intraday_data(temperature_intraday_data, features
def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): def cr_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"]) data_types = {'local_timezone': 'str', 'device_id': 'str', 'timestamp': 'int64', 'temperature': int, 'local_date_time': 'str',
'local_date': "str", 'local_time': "str", 'local_hour': "str", 'local_minute': "str", 'assigned_segments': "str"}
temperature_intraday_data = pd.read_csv(sensor_data_files["sensor_data"], dtype=data_types)
requested_intraday_features = provider["FEATURES"] requested_intraday_features = provider["FEATURES"]