diff --git a/config.yaml b/config.yaml index ca9faf25..7738b1d3 100644 --- a/config.yaml +++ b/config.yaml @@ -65,5 +65,5 @@ BATTERY: SCREEN: DAY_SEGMENTS: *day_segments METRICS_EVENT: ["counton", "countunlock"] - METRICS_EPISODE: ["sumduration", "maxduration", "minduration", "avgduration", "stdduration"] + METRICS_DELTAS: ["sumduration", "maxduration", "minduration", "avgduration", "stdduration"] EPISODES: ["unlock"] \ No newline at end of file diff --git a/rules/features.snakefile b/rules/features.snakefile index b9dd835e..30a4073c 100644 --- a/rules/features.snakefile +++ b/rules/features.snakefile @@ -98,7 +98,7 @@ rule screen_metrics: params: day_segment = "{day_segment}", metrics_event = config["SCREEN"]["METRICS_EVENT"], - metrics_episode = config["SCREEN"]["METRICS_EPISODE"], + metrics_deltas = config["SCREEN"]["METRICS_DELTAS"], episodes = config["SCREEN"]["EPISODES"] output: "data/processed/{pid}/screen_{day_segment}.csv" diff --git a/src/features/features_utils.py b/src/features/features_utils.py index 5d631f21..b55424b2 100644 --- a/src/features/features_utils.py +++ b/src/features/features_utils.py @@ -40,8 +40,12 @@ def splitOvernightEpisodes(sensor_deltas, extra_cols, fixed_cols): # calculate new time_diff and extra_cols for split overnight periods overnight = computeTruncatedDifferences(overnight, extra_cols) + + # sort by local_start_date_time and reset the index + days = pd.concat([not_overnight, overnight], axis=0, sort=False) + days = days.sort_values(by=['local_start_date_time']).reset_index(drop=True) - return pd.concat([not_overnight, overnight], axis=0, sort=False) + return days def splitMultiSegmentEpisodes(sensor_deltas, day_segment, extra_cols): # extract episodes that start and end at the same epochs @@ -74,4 +78,8 @@ def splitMultiSegmentEpisodes(sensor_deltas, day_segment, extra_cols): if not across_segments.empty: accross_segments = computeTruncatedDifferences(across_segments, extra_cols) - return pd.concat([exact_segments, across_segments], axis=0, sort=False) \ No newline at end of file + # sort by local_start_date_time and reset the index + segments = pd.concat([exact_segments, across_segments], axis=0, sort=False) + segments = segments.sort_values(by=['local_start_date_time']).reset_index(drop=True) + + return segments \ No newline at end of file diff --git a/src/features/screen_metrics.py b/src/features/screen_metrics.py index b3786d13..2e04cbc9 100644 --- a/src/features/screen_metrics.py +++ b/src/features/screen_metrics.py @@ -54,12 +54,12 @@ screen_data = pd.read_csv(snakemake.input["screen_events"], parse_dates=["local_ screen_deltas = pd.read_csv(snakemake.input["screen_deltas"], parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"]) day_segment = snakemake.params["day_segment"] metrics_event = snakemake.params["metrics_event"] -metrics_episode = snakemake.params["metrics_episode"] +metrics_deltas = snakemake.params["metrics_deltas"] episodes = snakemake.params["episodes"] if screen_data.empty: - metrics_episode_name = ["".join(metric) for metric in itertools.product(metrics_episode,episodes)] - screen_features = pd.DataFrame(columns=["local_date"]+["screen_" + day_segment + "_" + x for x in metrics_event + metrics_episode_name]) + metrics_deltas_name = ["".join(metric) for metric in itertools.product(metrics_deltas,episodes)] + screen_features = pd.DataFrame(columns=["local_date"]+["screen_" + day_segment + "_" + x for x in metrics_event + metrics_deltas_name]) else: # drop consecutive duplicates of screen_status keeping the last one screen_data = screen_data.loc[(screen_data[["screen_status"]].shift(-1) != screen_data[["screen_status"]]).any(axis=1)].reset_index(drop=True) @@ -75,9 +75,9 @@ else: event_features = getEventFeatures(screen_data, metrics_event) duration_features = pd.DataFrame() for episode in episodes: - duration_features = pd.concat([duration_features, getEpisodeDurationFeatures(screen_deltas, episode, metrics_episode)], axis=1) + duration_features = pd.concat([duration_features, getEpisodeDurationFeatures(screen_deltas, episode, metrics_deltas)], axis=1) screen_features = pd.concat([event_features, duration_features], axis = 1).fillna(0) - screen_features = screen_features.rename_axis('local_date').reset_index() + screen_features = screen_features.rename_axis("local_date").reset_index() screen_features.to_csv(snakemake.output[0], index=False) \ No newline at end of file