From 7e8e922d71429ee9792cc2db1cdb6feb54c42317 Mon Sep 17 00:00:00 2001 From: junos Date: Wed, 15 Dec 2021 18:25:53 +0100 Subject: [PATCH] Debugging of RAPIDS. --- .idea/misc.xml | 13 ++++++ exploration/debug_heatmap.py | 82 +++++++++++++++++++++++++++++++++++- rapids | 2 +- 3 files changed, 95 insertions(+), 2 deletions(-) diff --git a/.idea/misc.xml b/.idea/misc.xml index 8962e54..b6b261d 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -4,4 +4,17 @@ + + + \ No newline at end of file diff --git a/exploration/debug_heatmap.py b/exploration/debug_heatmap.py index e7df82f..1e5663b 100644 --- a/exploration/debug_heatmap.py +++ b/exploration/debug_heatmap.py @@ -227,7 +227,7 @@ phone_data_yield_per_segment.tail() # # A workaround # %% -phone_data_yield_per_segment = phone_data_yield_per_segment[ +phone_data_yield_per_segment["local_segment_start_datetimes", "minutes_after_segment_start"] = phone_data_yield_per_segment[ ["local_segment_start_datetimes", "minutes_after_segment_start"] ].drop_duplicates(keep="first") @@ -240,4 +240,84 @@ phone_data_yield_per_segment.set_index( # %% phone_data_yield_per_segment.head() + +# %% [markdown] +# # Retry + +# %% +def getDataForPlot(phone_data_yield_per_segment): + # calculate the length (in minute) of per segment instance + phone_data_yield_per_segment["length"] = ( + phone_data_yield_per_segment["timestamps_segment"] + .str.split(",") + .apply(lambda x: int((int(x[1]) - int(x[0])) / (1000 * 60))) + ) + # calculate the number of sensors logged at least one row of data per minute. + phone_data_yield_per_segment = ( + phone_data_yield_per_segment.groupby( + ["local_segment", "length", "local_date", "local_hour", "local_minute"] + )[["sensor", "local_date_time"]] + .max() + .reset_index() + ) + # extract local start datetime of the segment from "local_segment" column + phone_data_yield_per_segment["local_segment_start_datetimes"] = pd.to_datetime( + phone_data_yield_per_segment["local_segment"].apply( + lambda x: x.split("#")[1].split(",")[0] + ) + ) + # calculate the number of minutes after local start datetime of the segment + phone_data_yield_per_segment["minutes_after_segment_start"] = ( + ( + phone_data_yield_per_segment["local_date_time"] + - phone_data_yield_per_segment["local_segment_start_datetimes"] + ) + / pd.Timedelta(minutes=1) + ).astype("int") + + # impute missing rows with 0 + columns_for_full_index = phone_data_yield_per_segment[ + ["local_segment_start_datetimes", "length"] + ].drop_duplicates(keep="first") + columns_for_full_index = columns_for_full_index.apply( + lambda row: [ + [row["local_segment_start_datetimes"], x] for x in range(row["length"] + 1) + ], + axis=1, + ) + full_index = [] + for columns in columns_for_full_index: + full_index = full_index + columns + full_index = pd.MultiIndex.from_tuples( + full_index, + names=("local_segment_start_datetimes", "minutes_after_segment_start"), + ) + phone_data_yield_per_segment = phone_data_yield_per_segment.drop_duplicates(subset=["local_segment_start_datetimes", "minutes_after_segment_start"],keep="first") + phone_data_yield_per_segment = ( + phone_data_yield_per_segment.set_index( + ["local_segment_start_datetimes", "minutes_after_segment_start"] + ) + .reindex(full_index) + .reset_index() + .fillna(0) + ) + + # transpose the dataframe per local start datetime of the segment and discard the useless index layer + phone_data_yield_per_segment = phone_data_yield_per_segment.groupby( + "local_segment_start_datetimes" + )[["minutes_after_segment_start", "sensor"]].apply( + lambda x: x.set_index("minutes_after_segment_start").transpose() + ) + phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values( + "local_segment_start_datetimes" + ) + return phone_data_yield_per_segment + + +# %% +phone_data_yield_per_segment = filter_data_by_segment(phone_data_yield, time_segment) + +# %% +data_for_plot_per_segment = getDataForPlot(phone_data_yield_per_segment) + # %% diff --git a/rapids b/rapids index d2ed73d..e5cc025 160000 --- a/rapids +++ b/rapids @@ -1 +1 @@ -Subproject commit d2ed73dccfac65ce503c1b510182fe5ef1516508 +Subproject commit e5cc02501f629c96641dfd1bcd1f7fcfd0d55462