raise exception for invalid frequency segmentation info; fixed inconsistency in frequency segmentation output columns
parent
5f672b1f39
commit
4b6ba12e62
|
@ -2,28 +2,45 @@ import pandas as pd
|
|||
|
||||
def is_valid_frequency_segments(day_segments):
|
||||
"""
|
||||
returns true if day_segment has the expected structure for generating frequency segments
|
||||
returns true if day_segment has the expected structure for generating frequency segments;
|
||||
raises ValueError exception otherwise.
|
||||
"""
|
||||
if day_segments is None:
|
||||
return False
|
||||
message = 'Table of frequency segmentation info is None. ' \
|
||||
'Check the file under DAY_SEGMENTS in config.yaml'
|
||||
raise ValueError(message)
|
||||
|
||||
if day_segments.shape[0] == 0:
|
||||
return False
|
||||
message = 'Table of frequency segmentation info is empty. ' \
|
||||
'Check the file under DAY_SEGMENTS in config.yaml'
|
||||
raise ValueError(message)
|
||||
if day_segments.shape[0] > 1:
|
||||
return False
|
||||
message = 'Table of frequency segmentation info provides multiple specification but only one is allowed. ' \
|
||||
'Check the file under DAY_SEGMENTS in config.yaml'
|
||||
raise ValueError(message)
|
||||
|
||||
if 'length' not in day_segments.columns:
|
||||
return False
|
||||
message = 'Table of frequency segmentation info must provide segment length. ' \
|
||||
'Check the file under DAY_SEGMENTS in config.yaml'
|
||||
raise ValueError(message)
|
||||
if 'label' not in day_segments.columns:
|
||||
return False
|
||||
message = 'Table of frequency segmentation info must provide segment label. ' \
|
||||
'Check the file under DAY_SEGMENTS in config.yaml'
|
||||
raise ValueError(message)
|
||||
|
||||
if not pd.api.types.is_integer_dtype(day_segments.dtypes['length']):
|
||||
return False
|
||||
message = 'Only integer segment length is allowed in the table of frequency segmentation; ' \
|
||||
'found {}. Check the file under DAY_SEGMENTS in config.yaml'.format(day_segments.dtypes['length'])
|
||||
raise ValueError(message)
|
||||
|
||||
if day_segments.iloc[0].loc['length'] < 0:
|
||||
return False
|
||||
message = 'Only positive integer segment length is allowed in the table of frequency segmentation; ' \
|
||||
'found {}. Check the file under DAY_SEGMENTS in config.yaml'.format(day_segments.iloc[0].loc['length'])
|
||||
raise ValueError(message)
|
||||
if day_segments.iloc[0].loc['length'] >= 1440:
|
||||
return False
|
||||
message = 'Segment length in the table of frequency segmentation should be shorter than a day (in minutes); ' \
|
||||
'found {}. Check the file under DAY_SEGMENTS in config.yaml'.format(day_segments.iloc[0].loc['length'])
|
||||
raise ValueError(message)
|
||||
|
||||
return True
|
||||
|
||||
|
@ -36,21 +53,24 @@ def is_valid_event_segments(day_segments):
|
|||
|
||||
def parse_frequency_segments(day_segments: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
returns a table with row identifying start and end of time slots with frequency freq (in minutes). For example,
|
||||
for freq = 10 it outputs
|
||||
returns a table with rows identifying start and end of time slots with frequency freq (in minutes). For example,
|
||||
for freq = 10 it outputs:
|
||||
bin_id start end label
|
||||
0 00:00 00:10 epoch_0000
|
||||
1 00:10 00:20 epoch_0001
|
||||
2 00:20 00:30 epoch_0002
|
||||
...
|
||||
143 23:50 00:00 epoch_0143
|
||||
day_segments argument is expected to have the following structure:
|
||||
label length
|
||||
epoch 10
|
||||
"""
|
||||
freq = day_segments.iloc[0].loc['length']
|
||||
slots = pd.date_range(start='2020-01-01', end='2020-01-02', freq='{}min'.format(freq))
|
||||
slots = ['{:02d}:{:02d}'.format(x.hour, x.minute) for x in slots]
|
||||
|
||||
table = pd.DataFrame(slots, columns=['start'])
|
||||
table['end'] = table['start'].shift(-1)
|
||||
table = pd.DataFrame(slots, columns=['start_time'])
|
||||
table['end_time'] = table['start_time'].shift(-1)
|
||||
table = table.iloc[:-1, :]
|
||||
|
||||
label = day_segments.loc[0, 'label']
|
||||
|
@ -59,7 +79,7 @@ def parse_frequency_segments(day_segments: pd.DataFrame) -> pd.DataFrame:
|
|||
|
||||
table['local_date'] = None
|
||||
|
||||
return table[['local_date', 'start', 'end', 'label']]
|
||||
return table[['local_date', 'start_time', 'end_time', 'label']]
|
||||
|
||||
def parse_interval_segments(day_segments):
|
||||
day_segments["local_date"] = 1
|
||||
|
|
Loading…
Reference in New Issue