Fixed issue #17 and #18

replace/947a1f30ee9b9903e05894c8aeab7a7235e8f997
Meng Li 2019-12-02 11:22:50 -05:00
parent 80376f0c35
commit c7ca35d13e
4 changed files with 17 additions and 9 deletions

View File

@ -65,5 +65,5 @@ BATTERY:
SCREEN: SCREEN:
DAY_SEGMENTS: *day_segments DAY_SEGMENTS: *day_segments
METRICS_EVENT: ["counton", "countunlock"] METRICS_EVENT: ["counton", "countunlock"]
METRICS_EPISODE: ["sumduration", "maxduration", "minduration", "avgduration", "stdduration"] METRICS_DELTAS: ["sumduration", "maxduration", "minduration", "avgduration", "stdduration"]
EPISODES: ["unlock"] EPISODES: ["unlock"]

View File

@ -98,7 +98,7 @@ rule screen_metrics:
params: params:
day_segment = "{day_segment}", day_segment = "{day_segment}",
metrics_event = config["SCREEN"]["METRICS_EVENT"], metrics_event = config["SCREEN"]["METRICS_EVENT"],
metrics_episode = config["SCREEN"]["METRICS_EPISODE"], metrics_deltas = config["SCREEN"]["METRICS_DELTAS"],
episodes = config["SCREEN"]["EPISODES"] episodes = config["SCREEN"]["EPISODES"]
output: output:
"data/processed/{pid}/screen_{day_segment}.csv" "data/processed/{pid}/screen_{day_segment}.csv"

View File

@ -41,7 +41,11 @@ def splitOvernightEpisodes(sensor_deltas, extra_cols, fixed_cols):
# calculate new time_diff and extra_cols for split overnight periods # calculate new time_diff and extra_cols for split overnight periods
overnight = computeTruncatedDifferences(overnight, extra_cols) overnight = computeTruncatedDifferences(overnight, extra_cols)
return pd.concat([not_overnight, overnight], axis=0, sort=False) # sort by local_start_date_time and reset the index
days = pd.concat([not_overnight, overnight], axis=0, sort=False)
days = days.sort_values(by=['local_start_date_time']).reset_index(drop=True)
return days
def splitMultiSegmentEpisodes(sensor_deltas, day_segment, extra_cols): def splitMultiSegmentEpisodes(sensor_deltas, day_segment, extra_cols):
# extract episodes that start and end at the same epochs # extract episodes that start and end at the same epochs
@ -74,4 +78,8 @@ def splitMultiSegmentEpisodes(sensor_deltas, day_segment, extra_cols):
if not across_segments.empty: if not across_segments.empty:
accross_segments = computeTruncatedDifferences(across_segments, extra_cols) accross_segments = computeTruncatedDifferences(across_segments, extra_cols)
return pd.concat([exact_segments, across_segments], axis=0, sort=False) # sort by local_start_date_time and reset the index
segments = pd.concat([exact_segments, across_segments], axis=0, sort=False)
segments = segments.sort_values(by=['local_start_date_time']).reset_index(drop=True)
return segments

View File

@ -54,12 +54,12 @@ screen_data = pd.read_csv(snakemake.input["screen_events"], parse_dates=["local_
screen_deltas = pd.read_csv(snakemake.input["screen_deltas"], parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"]) screen_deltas = pd.read_csv(snakemake.input["screen_deltas"], parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"])
day_segment = snakemake.params["day_segment"] day_segment = snakemake.params["day_segment"]
metrics_event = snakemake.params["metrics_event"] metrics_event = snakemake.params["metrics_event"]
metrics_episode = snakemake.params["metrics_episode"] metrics_deltas = snakemake.params["metrics_deltas"]
episodes = snakemake.params["episodes"] episodes = snakemake.params["episodes"]
if screen_data.empty: if screen_data.empty:
metrics_episode_name = ["".join(metric) for metric in itertools.product(metrics_episode,episodes)] metrics_deltas_name = ["".join(metric) for metric in itertools.product(metrics_deltas,episodes)]
screen_features = pd.DataFrame(columns=["local_date"]+["screen_" + day_segment + "_" + x for x in metrics_event + metrics_episode_name]) screen_features = pd.DataFrame(columns=["local_date"]+["screen_" + day_segment + "_" + x for x in metrics_event + metrics_deltas_name])
else: else:
# drop consecutive duplicates of screen_status keeping the last one # drop consecutive duplicates of screen_status keeping the last one
screen_data = screen_data.loc[(screen_data[["screen_status"]].shift(-1) != screen_data[["screen_status"]]).any(axis=1)].reset_index(drop=True) screen_data = screen_data.loc[(screen_data[["screen_status"]].shift(-1) != screen_data[["screen_status"]]).any(axis=1)].reset_index(drop=True)
@ -75,9 +75,9 @@ else:
event_features = getEventFeatures(screen_data, metrics_event) event_features = getEventFeatures(screen_data, metrics_event)
duration_features = pd.DataFrame() duration_features = pd.DataFrame()
for episode in episodes: for episode in episodes:
duration_features = pd.concat([duration_features, getEpisodeDurationFeatures(screen_deltas, episode, metrics_episode)], axis=1) duration_features = pd.concat([duration_features, getEpisodeDurationFeatures(screen_deltas, episode, metrics_deltas)], axis=1)
screen_features = pd.concat([event_features, duration_features], axis = 1).fillna(0) screen_features = pd.concat([event_features, duration_features], axis = 1).fillna(0)
screen_features = screen_features.rename_axis('local_date').reset_index() screen_features = screen_features.rename_axis("local_date").reset_index()
screen_features.to_csv(snakemake.output[0], index=False) screen_features.to_csv(snakemake.output[0], index=False)