diff --git a/.flake b/.flake8 similarity index 100% rename from .flake rename to .flake8 diff --git a/exploration/debug_heatmap.py b/exploration/debug_heatmap.py index 1e5663b..401faf0 100644 --- a/exploration/debug_heatmap.py +++ b/exploration/debug_heatmap.py @@ -14,15 +14,9 @@ # --- # %% -import os, sys -import importlib import pandas as pd -import numpy as np -# import plotly.graph_objects as go -from importlib import util -from pathlib import Path -import yaml +from rapids.src.features.utils.utils import chunk_episodes # %% phone_data_yield = pd.read_csv( @@ -36,23 +30,29 @@ time_segments_labels = pd.read_csv( # %% phone_data_yield["assigned_segments"] = phone_data_yield[ "assigned_segments" -].str.replace(r"_RR\d+SS#", "#") +].str.replace(r"_RR\d+SS#", "#", regex=True) time_segments_labels["label"] = time_segments_labels["label"].str.replace( - r"_RR\d+SS$", "" + r"_RR\d+SS$", "", regex=True ) # %% tags=[] -def filter_data_by_segment(data, time_segment): +def filter_data_by_segment(data, time_segment_current): data.dropna(subset=["assigned_segments"], inplace=True) if data.shape[0] == 0: # data is empty data["local_segment"] = data["timestamps_segment"] = None return data - datetime_regex = "[0-9]{4}[\-|\/][0-9]{2}[\-|\/][0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}" - timestamps_regex = "[0-9]{13}" - segment_regex = "\[({}#{},{};{},{})\]".format( - time_segment, datetime_regex, datetime_regex, timestamps_regex, timestamps_regex + datetime_regex = ( + r"[0-9]{4}[\-|\/][0-9]{2}[\-|\/][0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}" + ) + timestamps_regex = r"[0-9]{13}" + segment_regex = r"\[({}#{},{};{},{})\]".format( + time_segment_current, + datetime_regex, + datetime_regex, + timestamps_regex, + timestamps_regex, ) data["local_segment"] = data["assigned_segments"].str.extract( segment_regex, expand=True @@ -147,14 +147,17 @@ def getDataForPlot(phone_data_yield_per_segment): .fillna(0) ) - # transpose the dataframe per local start datetime of the segment and discard the useless index layer + # transpose the dataframe per local start datetime of the segment + # and discard the useless index layer phone_data_yield_per_segment = phone_data_yield_per_segment.groupby( "local_segment_start_datetimes" )[["minutes_after_segment_start", "sensor"]].apply( lambda x: x.set_index("minutes_after_segment_start").transpose() ) - phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values( - "local_segment_start_datetimes" + phone_data_yield_per_segment.index = ( + phone_data_yield_per_segment.index.get_level_values( + "local_segment_start_datetimes" + ) ) return phone_data_yield_per_segment @@ -227,9 +230,13 @@ phone_data_yield_per_segment.tail() # # A workaround # %% -phone_data_yield_per_segment["local_segment_start_datetimes", "minutes_after_segment_start"] = phone_data_yield_per_segment[ +phone_data_yield_per_segment[ + "local_segment_start_datetimes", "minutes_after_segment_start" +] = phone_data_yield_per_segment[ ["local_segment_start_datetimes", "minutes_after_segment_start"] -].drop_duplicates(keep="first") +].drop_duplicates( + keep="first" +) # %% phone_data_yield_per_segment.set_index( @@ -244,8 +251,9 @@ phone_data_yield_per_segment.head() # %% [markdown] # # Retry + # %% -def getDataForPlot(phone_data_yield_per_segment): +def get_data_for_plot(phone_data_yield_per_segment): # calculate the length (in minute) of per segment instance phone_data_yield_per_segment["length"] = ( phone_data_yield_per_segment["timestamps_segment"] @@ -292,7 +300,10 @@ def getDataForPlot(phone_data_yield_per_segment): full_index, names=("local_segment_start_datetimes", "minutes_after_segment_start"), ) - phone_data_yield_per_segment = phone_data_yield_per_segment.drop_duplicates(subset=["local_segment_start_datetimes", "minutes_after_segment_start"],keep="first") + phone_data_yield_per_segment = phone_data_yield_per_segment.drop_duplicates( + subset=["local_segment_start_datetimes", "minutes_after_segment_start"], + keep="first", + ) phone_data_yield_per_segment = ( phone_data_yield_per_segment.set_index( ["local_segment_start_datetimes", "minutes_after_segment_start"] @@ -302,14 +313,17 @@ def getDataForPlot(phone_data_yield_per_segment): .fillna(0) ) - # transpose the dataframe per local start datetime of the segment and discard the useless index layer + # transpose the dataframe per local start datetime of the segment + # and discard the useless index layer phone_data_yield_per_segment = phone_data_yield_per_segment.groupby( "local_segment_start_datetimes" )[["minutes_after_segment_start", "sensor"]].apply( lambda x: x.set_index("minutes_after_segment_start").transpose() ) - phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values( - "local_segment_start_datetimes" + phone_data_yield_per_segment.index = ( + phone_data_yield_per_segment.index.get_level_values( + "local_segment_start_datetimes" + ) ) return phone_data_yield_per_segment @@ -318,6 +332,6 @@ def getDataForPlot(phone_data_yield_per_segment): phone_data_yield_per_segment = filter_data_by_segment(phone_data_yield, time_segment) # %% -data_for_plot_per_segment = getDataForPlot(phone_data_yield_per_segment) +data_for_plot_per_segment = get_data_for_plot(phone_data_yield_per_segment) # %% diff --git a/rapids b/rapids index 63f5a52..059774b 160000 --- a/rapids +++ b/rapids @@ -1 +1 @@ -Subproject commit 63f5a526fce4d288499168e1701adadb8b885d82 +Subproject commit 059774bda10545a83ab282f59eb7a329fef9ee4c