diff --git a/.idea/misc.xml b/.idea/misc.xml
index 8962e54..b6b261d 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -4,4 +4,17 @@
+
+
+
\ No newline at end of file
diff --git a/exploration/debug_heatmap.py b/exploration/debug_heatmap.py
index e7df82f..1e5663b 100644
--- a/exploration/debug_heatmap.py
+++ b/exploration/debug_heatmap.py
@@ -227,7 +227,7 @@ phone_data_yield_per_segment.tail()
# # A workaround
# %%
-phone_data_yield_per_segment = phone_data_yield_per_segment[
+phone_data_yield_per_segment["local_segment_start_datetimes", "minutes_after_segment_start"] = phone_data_yield_per_segment[
["local_segment_start_datetimes", "minutes_after_segment_start"]
].drop_duplicates(keep="first")
@@ -240,4 +240,84 @@ phone_data_yield_per_segment.set_index(
# %%
phone_data_yield_per_segment.head()
+
+# %% [markdown]
+# # Retry
+
+# %%
+def getDataForPlot(phone_data_yield_per_segment):
+ # calculate the length (in minute) of per segment instance
+ phone_data_yield_per_segment["length"] = (
+ phone_data_yield_per_segment["timestamps_segment"]
+ .str.split(",")
+ .apply(lambda x: int((int(x[1]) - int(x[0])) / (1000 * 60)))
+ )
+ # calculate the number of sensors logged at least one row of data per minute.
+ phone_data_yield_per_segment = (
+ phone_data_yield_per_segment.groupby(
+ ["local_segment", "length", "local_date", "local_hour", "local_minute"]
+ )[["sensor", "local_date_time"]]
+ .max()
+ .reset_index()
+ )
+ # extract local start datetime of the segment from "local_segment" column
+ phone_data_yield_per_segment["local_segment_start_datetimes"] = pd.to_datetime(
+ phone_data_yield_per_segment["local_segment"].apply(
+ lambda x: x.split("#")[1].split(",")[0]
+ )
+ )
+ # calculate the number of minutes after local start datetime of the segment
+ phone_data_yield_per_segment["minutes_after_segment_start"] = (
+ (
+ phone_data_yield_per_segment["local_date_time"]
+ - phone_data_yield_per_segment["local_segment_start_datetimes"]
+ )
+ / pd.Timedelta(minutes=1)
+ ).astype("int")
+
+ # impute missing rows with 0
+ columns_for_full_index = phone_data_yield_per_segment[
+ ["local_segment_start_datetimes", "length"]
+ ].drop_duplicates(keep="first")
+ columns_for_full_index = columns_for_full_index.apply(
+ lambda row: [
+ [row["local_segment_start_datetimes"], x] for x in range(row["length"] + 1)
+ ],
+ axis=1,
+ )
+ full_index = []
+ for columns in columns_for_full_index:
+ full_index = full_index + columns
+ full_index = pd.MultiIndex.from_tuples(
+ full_index,
+ names=("local_segment_start_datetimes", "minutes_after_segment_start"),
+ )
+ phone_data_yield_per_segment = phone_data_yield_per_segment.drop_duplicates(subset=["local_segment_start_datetimes", "minutes_after_segment_start"],keep="first")
+ phone_data_yield_per_segment = (
+ phone_data_yield_per_segment.set_index(
+ ["local_segment_start_datetimes", "minutes_after_segment_start"]
+ )
+ .reindex(full_index)
+ .reset_index()
+ .fillna(0)
+ )
+
+ # transpose the dataframe per local start datetime of the segment and discard the useless index layer
+ phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(
+ "local_segment_start_datetimes"
+ )[["minutes_after_segment_start", "sensor"]].apply(
+ lambda x: x.set_index("minutes_after_segment_start").transpose()
+ )
+ phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values(
+ "local_segment_start_datetimes"
+ )
+ return phone_data_yield_per_segment
+
+
+# %%
+phone_data_yield_per_segment = filter_data_by_segment(phone_data_yield, time_segment)
+
+# %%
+data_for_plot_per_segment = getDataForPlot(phone_data_yield_per_segment)
+
# %%
diff --git a/rapids b/rapids
index d2ed73d..e5cc025 160000
--- a/rapids
+++ b/rapids
@@ -1 +1 @@
-Subproject commit d2ed73dccfac65ce503c1b510182fe5ef1516508
+Subproject commit e5cc02501f629c96641dfd1bcd1f7fcfd0d55462