Fix filter_data_by_segment bug

pull/107/head
JulioV 2020-12-12 17:10:59 -05:00
parent 5bd1bfe856
commit 05627296f4
1 changed files with 5 additions and 1 deletions

View File

@ -1,13 +1,17 @@
rapids_log_tag = "RAPIDS:" rapids_log_tag = "RAPIDS:"
def filter_data_by_segment(data, time_segment): def filter_data_by_segment(data, time_segment):
if(data.shape[0] == 0): # data is empty
data["local_segment"] = data["timestamps_segment"] = None
return data
datetime_regex = "[0-9]{4}[\-|\/][0-9]{2}[\-|\/][0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}" datetime_regex = "[0-9]{4}[\-|\/][0-9]{2}[\-|\/][0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}"
timestamps_regex = "[0-9]{13}" timestamps_regex = "[0-9]{13}"
segment_regex = "\[({}#{},{};{},{})\]".format(time_segment, datetime_regex, datetime_regex, timestamps_regex, timestamps_regex) segment_regex = "\[({}#{},{};{},{})\]".format(time_segment, datetime_regex, datetime_regex, timestamps_regex, timestamps_regex)
data["local_segment"] = data["assigned_segments"].str.extract(segment_regex, expand=True) data["local_segment"] = data["assigned_segments"].str.extract(segment_regex, expand=True)
data = data.drop(columns=["assigned_segments"]) data = data.drop(columns=["assigned_segments"])
data = data.dropna(subset = ["local_segment"]) data = data.dropna(subset = ["local_segment"])
if(data.shape[0] == 0): # there are no rows belonging to time_segment if(data.shape[0] == 0): # there are no rows belonging to time_segment after droping na
data["timestamps_segment"] = None data["timestamps_segment"] = None
else: else:
data[["local_segment","timestamps_segment"]] = data["local_segment"].str.split(pat =";",n=1, expand=True) data[["local_segment","timestamps_segment"]] = data["local_segment"].str.split(pat =";",n=1, expand=True)