Update rapids and add regex=True.

Reformat debug_heatmap.
master
junos 2023-05-10 15:12:27 +02:00
parent 3e38b64b45
commit cd5d8b6a10
3 changed files with 40 additions and 26 deletions

View File

View File

@ -14,15 +14,9 @@
# --- # ---
# %% # %%
import os, sys
import importlib
import pandas as pd import pandas as pd
import numpy as np
# import plotly.graph_objects as go from rapids.src.features.utils.utils import chunk_episodes
from importlib import util
from pathlib import Path
import yaml
# %% # %%
phone_data_yield = pd.read_csv( phone_data_yield = pd.read_csv(
@ -36,23 +30,29 @@ time_segments_labels = pd.read_csv(
# %% # %%
phone_data_yield["assigned_segments"] = phone_data_yield[ phone_data_yield["assigned_segments"] = phone_data_yield[
"assigned_segments" "assigned_segments"
].str.replace(r"_RR\d+SS#", "#") ].str.replace(r"_RR\d+SS#", "#", regex=True)
time_segments_labels["label"] = time_segments_labels["label"].str.replace( time_segments_labels["label"] = time_segments_labels["label"].str.replace(
r"_RR\d+SS$", "" r"_RR\d+SS$", "", regex=True
) )
# %% tags=[] # %% tags=[]
def filter_data_by_segment(data, time_segment): def filter_data_by_segment(data, time_segment_current):
data.dropna(subset=["assigned_segments"], inplace=True) data.dropna(subset=["assigned_segments"], inplace=True)
if data.shape[0] == 0: # data is empty if data.shape[0] == 0: # data is empty
data["local_segment"] = data["timestamps_segment"] = None data["local_segment"] = data["timestamps_segment"] = None
return data return data
datetime_regex = "[0-9]{4}[\-|\/][0-9]{2}[\-|\/][0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}" datetime_regex = (
timestamps_regex = "[0-9]{13}" r"[0-9]{4}[\-|\/][0-9]{2}[\-|\/][0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}"
segment_regex = "\[({}#{},{};{},{})\]".format( )
time_segment, datetime_regex, datetime_regex, timestamps_regex, timestamps_regex timestamps_regex = r"[0-9]{13}"
segment_regex = r"\[({}#{},{};{},{})\]".format(
time_segment_current,
datetime_regex,
datetime_regex,
timestamps_regex,
timestamps_regex,
) )
data["local_segment"] = data["assigned_segments"].str.extract( data["local_segment"] = data["assigned_segments"].str.extract(
segment_regex, expand=True segment_regex, expand=True
@ -147,14 +147,17 @@ def getDataForPlot(phone_data_yield_per_segment):
.fillna(0) .fillna(0)
) )
# transpose the dataframe per local start datetime of the segment and discard the useless index layer # transpose the dataframe per local start datetime of the segment
# and discard the useless index layer
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby( phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(
"local_segment_start_datetimes" "local_segment_start_datetimes"
)[["minutes_after_segment_start", "sensor"]].apply( )[["minutes_after_segment_start", "sensor"]].apply(
lambda x: x.set_index("minutes_after_segment_start").transpose() lambda x: x.set_index("minutes_after_segment_start").transpose()
) )
phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values( phone_data_yield_per_segment.index = (
"local_segment_start_datetimes" phone_data_yield_per_segment.index.get_level_values(
"local_segment_start_datetimes"
)
) )
return phone_data_yield_per_segment return phone_data_yield_per_segment
@ -227,9 +230,13 @@ phone_data_yield_per_segment.tail()
# # A workaround # # A workaround
# %% # %%
phone_data_yield_per_segment["local_segment_start_datetimes", "minutes_after_segment_start"] = phone_data_yield_per_segment[ phone_data_yield_per_segment[
"local_segment_start_datetimes", "minutes_after_segment_start"
] = phone_data_yield_per_segment[
["local_segment_start_datetimes", "minutes_after_segment_start"] ["local_segment_start_datetimes", "minutes_after_segment_start"]
].drop_duplicates(keep="first") ].drop_duplicates(
keep="first"
)
# %% # %%
phone_data_yield_per_segment.set_index( phone_data_yield_per_segment.set_index(
@ -244,8 +251,9 @@ phone_data_yield_per_segment.head()
# %% [markdown] # %% [markdown]
# # Retry # # Retry
# %% # %%
def getDataForPlot(phone_data_yield_per_segment): def get_data_for_plot(phone_data_yield_per_segment):
# calculate the length (in minute) of per segment instance # calculate the length (in minute) of per segment instance
phone_data_yield_per_segment["length"] = ( phone_data_yield_per_segment["length"] = (
phone_data_yield_per_segment["timestamps_segment"] phone_data_yield_per_segment["timestamps_segment"]
@ -292,7 +300,10 @@ def getDataForPlot(phone_data_yield_per_segment):
full_index, full_index,
names=("local_segment_start_datetimes", "minutes_after_segment_start"), names=("local_segment_start_datetimes", "minutes_after_segment_start"),
) )
phone_data_yield_per_segment = phone_data_yield_per_segment.drop_duplicates(subset=["local_segment_start_datetimes", "minutes_after_segment_start"],keep="first") phone_data_yield_per_segment = phone_data_yield_per_segment.drop_duplicates(
subset=["local_segment_start_datetimes", "minutes_after_segment_start"],
keep="first",
)
phone_data_yield_per_segment = ( phone_data_yield_per_segment = (
phone_data_yield_per_segment.set_index( phone_data_yield_per_segment.set_index(
["local_segment_start_datetimes", "minutes_after_segment_start"] ["local_segment_start_datetimes", "minutes_after_segment_start"]
@ -302,14 +313,17 @@ def getDataForPlot(phone_data_yield_per_segment):
.fillna(0) .fillna(0)
) )
# transpose the dataframe per local start datetime of the segment and discard the useless index layer # transpose the dataframe per local start datetime of the segment
# and discard the useless index layer
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby( phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(
"local_segment_start_datetimes" "local_segment_start_datetimes"
)[["minutes_after_segment_start", "sensor"]].apply( )[["minutes_after_segment_start", "sensor"]].apply(
lambda x: x.set_index("minutes_after_segment_start").transpose() lambda x: x.set_index("minutes_after_segment_start").transpose()
) )
phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values( phone_data_yield_per_segment.index = (
"local_segment_start_datetimes" phone_data_yield_per_segment.index.get_level_values(
"local_segment_start_datetimes"
)
) )
return phone_data_yield_per_segment return phone_data_yield_per_segment
@ -318,6 +332,6 @@ def getDataForPlot(phone_data_yield_per_segment):
phone_data_yield_per_segment = filter_data_by_segment(phone_data_yield, time_segment) phone_data_yield_per_segment = filter_data_by_segment(phone_data_yield, time_segment)
# %% # %%
data_for_plot_per_segment = getDataForPlot(phone_data_yield_per_segment) data_for_plot_per_segment = get_data_for_plot(phone_data_yield_per_segment)
# %% # %%

2
rapids

@ -1 +1 @@
Subproject commit 63f5a526fce4d288499168e1701adadb8b885d82 Subproject commit 059774bda10545a83ab282f59eb7a329fef9ee4c