From 2720b24c0ae2c315a72914c7326d009c884abef2 Mon Sep 17 00:00:00 2001 From: Meng Li <34143965+Meng6@users.noreply.github.com> Date: Thu, 30 Apr 2020 10:49:49 -0400 Subject: [PATCH] Make screen_features.py script compatible with both Pandas 0.25.2 and 1.0.3. --- src/features/screen_features.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/features/screen_features.py b/src/features/screen_features.py index f84f2d0d..e56ef0c0 100644 --- a/src/features/screen_features.py +++ b/src/features/screen_features.py @@ -9,22 +9,22 @@ def getEpisodeDurationFeatures(screen_deltas, episode, features, phone_sensed_bi screen_deltas_episode = screen_deltas[screen_deltas["episode"] == episode] duration_helper = pd.DataFrame() if "countepisode" in features: - duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).count()[["time_diff"]].rename(columns = {"time_diff": "screen_" + day_segment + "_countepisode" + episode})], axis = 1) + duration_helper = pd.concat([duration_helper, screen_deltas_episode[["time_diff"]].groupby(["local_start_date"]).count().rename(columns = {"time_diff": "screen_" + day_segment + "_countepisode" + episode})], axis = 1) if "episodepersensedminutes" in features: - for date, row in screen_deltas_episode.groupby(["local_start_date"]).count()[["time_diff"]].iterrows(): + for date, row in screen_deltas_episode[["time_diff"]].groupby(["local_start_date"]).count().iterrows(): sensed_minutes = phone_sensed_bins.loc[date, :].sum() * bin_size episode_per_sensedminutes = row["time_diff"] / (1 if sensed_minutes == 0 else sensed_minutes) duration_helper.loc[date, "screen_" + day_segment + "_episodepersensedminutes" + episode] = episode_per_sensedminutes if "sumduration" in features: - duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).sum()[["time_diff"]].rename(columns = {"time_diff": "screen_" + day_segment + "_sumduration" + episode})], axis = 1) + duration_helper = pd.concat([duration_helper, screen_deltas_episode[["time_diff"]].groupby(["local_start_date"]).sum().rename(columns = {"time_diff": "screen_" + day_segment + "_sumduration" + episode})], axis = 1) if "maxduration" in features: - duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).max()[["time_diff"]].rename(columns = {"time_diff": "screen_" + day_segment + "_maxduration" + episode})], axis = 1) + duration_helper = pd.concat([duration_helper, screen_deltas_episode[["time_diff"]].groupby(["local_start_date"]).max().rename(columns = {"time_diff": "screen_" + day_segment + "_maxduration" + episode})], axis = 1) if "minduration" in features: - duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).min()[["time_diff"]].rename(columns = {"time_diff": "screen_" + day_segment + "_minduration" + episode})], axis = 1) + duration_helper = pd.concat([duration_helper, screen_deltas_episode[["time_diff"]].groupby(["local_start_date"]).min().rename(columns = {"time_diff": "screen_" + day_segment + "_minduration" + episode})], axis = 1) if "avgduration" in features: - duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).mean()[["time_diff"]].rename(columns = {"time_diff":"screen_" + day_segment + "_avgduration" + episode})], axis = 1) + duration_helper = pd.concat([duration_helper, screen_deltas_episode[["time_diff"]].groupby(["local_start_date"]).mean().rename(columns = {"time_diff":"screen_" + day_segment + "_avgduration" + episode})], axis = 1) if "stdduration" in features: - duration_helper = pd.concat([duration_helper, screen_deltas_episode.groupby(["local_start_date"]).std()[["time_diff"]].rename(columns = {"time_diff":"screen_" + day_segment + "_stdduration" + episode})], axis = 1) + duration_helper = pd.concat([duration_helper, screen_deltas_episode[["time_diff"]].groupby(["local_start_date"]).std().rename(columns = {"time_diff":"screen_" + day_segment + "_stdduration" + episode})], axis = 1) if "firstuseafter" + "{0:0=2d}".format(reference_hour_first_use) in features: duration_helper = pd.concat([duration_helper, pd.DataFrame(screen_deltas_episode.groupby(["local_start_date"]).first()[["local_start_date_time"]].local_start_date_time.apply(lambda x: (x.to_pydatetime().hour - reference_hour_first_use) * 3600 + x.to_pydatetime().minute * 60 + x.to_pydatetime().second)).rename(columns = {"local_start_date_time":"screen_" + day_segment + "_firstuseafter" + "{0:0=2d}".format(reference_hour_first_use) + episode})], axis = 1) return duration_helper