Update filter_data_by_segment() function: call chunk_episodes() inside the filter function
parent
ca8c815446
commit
d3241c79f1
|
@ -3,8 +3,6 @@ import numpy as np
|
||||||
|
|
||||||
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||||
|
|
||||||
chunk_episodes = kwargs["chunk_episodes"]
|
|
||||||
|
|
||||||
ar_episodes = pd.read_csv(sensor_data_files["sensor_episodes"])
|
ar_episodes = pd.read_csv(sensor_data_files["sensor_episodes"])
|
||||||
activity_classes = provider["ACTIVITY_CLASSES"]
|
activity_classes = provider["ACTIVITY_CLASSES"]
|
||||||
|
|
||||||
|
@ -18,10 +16,6 @@ def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_seg
|
||||||
if not ar_episodes.empty:
|
if not ar_episodes.empty:
|
||||||
ar_episodes = filter_data_by_segment(ar_episodes, day_segment)
|
ar_episodes = filter_data_by_segment(ar_episodes, day_segment)
|
||||||
|
|
||||||
if not ar_episodes.empty:
|
|
||||||
# chunk episodes
|
|
||||||
ar_episodes = chunk_episodes(ar_episodes)
|
|
||||||
|
|
||||||
if not ar_episodes.empty:
|
if not ar_episodes.empty:
|
||||||
ar_features = pd.DataFrame()
|
ar_features = pd.DataFrame()
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,6 @@ from datetime import datetime, timedelta, time
|
||||||
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||||
|
|
||||||
battery_data = pd.read_csv(sensor_data_files["sensor_episodes"])
|
battery_data = pd.read_csv(sensor_data_files["sensor_episodes"])
|
||||||
chunk_episodes = kwargs["chunk_episodes"]
|
|
||||||
|
|
||||||
# name of the features this function can compute
|
# name of the features this function can compute
|
||||||
base_features_names = ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
|
base_features_names = ["countdischarge", "sumdurationdischarge", "countcharge", "sumdurationcharge", "avgconsumptionrate", "maxconsumptionrate"]
|
||||||
|
@ -16,10 +15,6 @@ def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_seg
|
||||||
if not battery_data.empty:
|
if not battery_data.empty:
|
||||||
battery_data = filter_data_by_segment(battery_data, day_segment)
|
battery_data = filter_data_by_segment(battery_data, day_segment)
|
||||||
|
|
||||||
if not battery_data.empty:
|
|
||||||
# chunk_episodes
|
|
||||||
battery_data = chunk_episodes(battery_data)
|
|
||||||
|
|
||||||
if not battery_data.empty:
|
if not battery_data.empty:
|
||||||
|
|
||||||
battery_data["episode_id"] = ((battery_data.battery_status != battery_data.battery_status.shift()) | (battery_data.start_timestamp - battery_data.end_timestamp.shift() > 1)).cumsum()
|
battery_data["episode_id"] = ((battery_data.battery_status != battery_data.battery_status.shift()) | (battery_data.start_timestamp - battery_data.end_timestamp.shift() > 1)).cumsum()
|
||||||
|
|
|
@ -34,7 +34,6 @@ def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_seg
|
||||||
requested_episode_types = provider["EPISODE_TYPES"]
|
requested_episode_types = provider["EPISODE_TYPES"]
|
||||||
ignore_episodes_shorter_than = provider["IGNORE_EPISODES_SHORTER_THAN"]
|
ignore_episodes_shorter_than = provider["IGNORE_EPISODES_SHORTER_THAN"]
|
||||||
ignore_episodes_longer_than = provider["IGNORE_EPISODES_LONGER_THAN"]
|
ignore_episodes_longer_than = provider["IGNORE_EPISODES_LONGER_THAN"]
|
||||||
chunk_episodes = kwargs["chunk_episodes"]
|
|
||||||
|
|
||||||
# name of the features this function can compute
|
# name of the features this function can compute
|
||||||
base_features_episodes = ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"]
|
base_features_episodes = ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"]
|
||||||
|
@ -50,10 +49,8 @@ def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_seg
|
||||||
if not screen_data.empty:
|
if not screen_data.empty:
|
||||||
|
|
||||||
screen_data = filter_data_by_segment(screen_data, day_segment)
|
screen_data = filter_data_by_segment(screen_data, day_segment)
|
||||||
if not screen_data.empty:
|
|
||||||
# chunk_episodes
|
|
||||||
screen_data = chunk_episodes(screen_data)
|
|
||||||
|
|
||||||
|
if not screen_data.empty:
|
||||||
if ignore_episodes_shorter_than > 0:
|
if ignore_episodes_shorter_than > 0:
|
||||||
screen_data = screen_data.query('@ignore_episodes_shorter_than <= duration')
|
screen_data = screen_data.query('@ignore_episodes_shorter_than <= duration')
|
||||||
if ignore_episodes_longer_than > 0:
|
if ignore_episodes_longer_than > 0:
|
||||||
|
|
|
@ -11,7 +11,12 @@ def filter_data_by_segment(data, day_segment):
|
||||||
data["timestamps_segment"] = None
|
data["timestamps_segment"] = None
|
||||||
else:
|
else:
|
||||||
data[["local_segment","timestamps_segment"]] = data["local_segment"].str.split(pat =";",n=1, expand=True)
|
data[["local_segment","timestamps_segment"]] = data["local_segment"].str.split(pat =";",n=1, expand=True)
|
||||||
return(data)
|
|
||||||
|
# chunk episodes
|
||||||
|
if (not data.empty) and ("start_timestamp" in data.columns) and ("end_timestamp" in data.columns):
|
||||||
|
data = chunk_episodes(data)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
# Each minute could fall into two segments.
|
# Each minute could fall into two segments.
|
||||||
# Firstly, we generate two rows for each resampled minute via resample_episodes rule:
|
# Firstly, we generate two rows for each resampled minute via resample_episodes rule:
|
||||||
|
|
Loading…
Reference in New Issue