Fixed issue #17 and #18

replace/947a1f30ee9b9903e05894c8aeab7a7235e8f997
Meng Li 2019-12-02 11:22:50 -05:00
parent 80376f0c35
commit c7ca35d13e
4 changed files with 17 additions and 9 deletions

View File

@ -65,5 +65,5 @@ BATTERY:
SCREEN:
DAY_SEGMENTS: *day_segments
METRICS_EVENT: ["counton", "countunlock"]
METRICS_EPISODE: ["sumduration", "maxduration", "minduration", "avgduration", "stdduration"]
METRICS_DELTAS: ["sumduration", "maxduration", "minduration", "avgduration", "stdduration"]
EPISODES: ["unlock"]

View File

@ -98,7 +98,7 @@ rule screen_metrics:
params:
day_segment = "{day_segment}",
metrics_event = config["SCREEN"]["METRICS_EVENT"],
metrics_episode = config["SCREEN"]["METRICS_EPISODE"],
metrics_deltas = config["SCREEN"]["METRICS_DELTAS"],
episodes = config["SCREEN"]["EPISODES"]
output:
"data/processed/{pid}/screen_{day_segment}.csv"

View File

@ -40,8 +40,12 @@ def splitOvernightEpisodes(sensor_deltas, extra_cols, fixed_cols):
# calculate new time_diff and extra_cols for split overnight periods
overnight = computeTruncatedDifferences(overnight, extra_cols)
# sort by local_start_date_time and reset the index
days = pd.concat([not_overnight, overnight], axis=0, sort=False)
days = days.sort_values(by=['local_start_date_time']).reset_index(drop=True)
return pd.concat([not_overnight, overnight], axis=0, sort=False)
return days
def splitMultiSegmentEpisodes(sensor_deltas, day_segment, extra_cols):
# extract episodes that start and end at the same epochs
@ -74,4 +78,8 @@ def splitMultiSegmentEpisodes(sensor_deltas, day_segment, extra_cols):
if not across_segments.empty:
accross_segments = computeTruncatedDifferences(across_segments, extra_cols)
return pd.concat([exact_segments, across_segments], axis=0, sort=False)
# sort by local_start_date_time and reset the index
segments = pd.concat([exact_segments, across_segments], axis=0, sort=False)
segments = segments.sort_values(by=['local_start_date_time']).reset_index(drop=True)
return segments

View File

@ -54,12 +54,12 @@ screen_data = pd.read_csv(snakemake.input["screen_events"], parse_dates=["local_
screen_deltas = pd.read_csv(snakemake.input["screen_deltas"], parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"])
day_segment = snakemake.params["day_segment"]
metrics_event = snakemake.params["metrics_event"]
metrics_episode = snakemake.params["metrics_episode"]
metrics_deltas = snakemake.params["metrics_deltas"]
episodes = snakemake.params["episodes"]
if screen_data.empty:
metrics_episode_name = ["".join(metric) for metric in itertools.product(metrics_episode,episodes)]
screen_features = pd.DataFrame(columns=["local_date"]+["screen_" + day_segment + "_" + x for x in metrics_event + metrics_episode_name])
metrics_deltas_name = ["".join(metric) for metric in itertools.product(metrics_deltas,episodes)]
screen_features = pd.DataFrame(columns=["local_date"]+["screen_" + day_segment + "_" + x for x in metrics_event + metrics_deltas_name])
else:
# drop consecutive duplicates of screen_status keeping the last one
screen_data = screen_data.loc[(screen_data[["screen_status"]].shift(-1) != screen_data[["screen_status"]]).any(axis=1)].reset_index(drop=True)
@ -75,9 +75,9 @@ else:
event_features = getEventFeatures(screen_data, metrics_event)
duration_features = pd.DataFrame()
for episode in episodes:
duration_features = pd.concat([duration_features, getEpisodeDurationFeatures(screen_deltas, episode, metrics_episode)], axis=1)
duration_features = pd.concat([duration_features, getEpisodeDurationFeatures(screen_deltas, episode, metrics_deltas)], axis=1)
screen_features = pd.concat([event_features, duration_features], axis = 1).fillna(0)
screen_features = screen_features.rename_axis('local_date').reset_index()
screen_features = screen_features.rename_axis("local_date").reset_index()
screen_features.to_csv(snakemake.output[0], index=False)