parent
3e38b64b45
commit
cd5d8b6a10
|
@ -14,15 +14,9 @@
|
||||||
# ---
|
# ---
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
import os, sys
|
|
||||||
import importlib
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
# import plotly.graph_objects as go
|
from rapids.src.features.utils.utils import chunk_episodes
|
||||||
from importlib import util
|
|
||||||
from pathlib import Path
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
phone_data_yield = pd.read_csv(
|
phone_data_yield = pd.read_csv(
|
||||||
|
@ -36,23 +30,29 @@ time_segments_labels = pd.read_csv(
|
||||||
# %%
|
# %%
|
||||||
phone_data_yield["assigned_segments"] = phone_data_yield[
|
phone_data_yield["assigned_segments"] = phone_data_yield[
|
||||||
"assigned_segments"
|
"assigned_segments"
|
||||||
].str.replace(r"_RR\d+SS#", "#")
|
].str.replace(r"_RR\d+SS#", "#", regex=True)
|
||||||
time_segments_labels["label"] = time_segments_labels["label"].str.replace(
|
time_segments_labels["label"] = time_segments_labels["label"].str.replace(
|
||||||
r"_RR\d+SS$", ""
|
r"_RR\d+SS$", "", regex=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# %% tags=[]
|
# %% tags=[]
|
||||||
def filter_data_by_segment(data, time_segment):
|
def filter_data_by_segment(data, time_segment_current):
|
||||||
data.dropna(subset=["assigned_segments"], inplace=True)
|
data.dropna(subset=["assigned_segments"], inplace=True)
|
||||||
if data.shape[0] == 0: # data is empty
|
if data.shape[0] == 0: # data is empty
|
||||||
data["local_segment"] = data["timestamps_segment"] = None
|
data["local_segment"] = data["timestamps_segment"] = None
|
||||||
return data
|
return data
|
||||||
|
|
||||||
datetime_regex = "[0-9]{4}[\-|\/][0-9]{2}[\-|\/][0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}"
|
datetime_regex = (
|
||||||
timestamps_regex = "[0-9]{13}"
|
r"[0-9]{4}[\-|\/][0-9]{2}[\-|\/][0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}"
|
||||||
segment_regex = "\[({}#{},{};{},{})\]".format(
|
)
|
||||||
time_segment, datetime_regex, datetime_regex, timestamps_regex, timestamps_regex
|
timestamps_regex = r"[0-9]{13}"
|
||||||
|
segment_regex = r"\[({}#{},{};{},{})\]".format(
|
||||||
|
time_segment_current,
|
||||||
|
datetime_regex,
|
||||||
|
datetime_regex,
|
||||||
|
timestamps_regex,
|
||||||
|
timestamps_regex,
|
||||||
)
|
)
|
||||||
data["local_segment"] = data["assigned_segments"].str.extract(
|
data["local_segment"] = data["assigned_segments"].str.extract(
|
||||||
segment_regex, expand=True
|
segment_regex, expand=True
|
||||||
|
@ -147,14 +147,17 @@ def getDataForPlot(phone_data_yield_per_segment):
|
||||||
.fillna(0)
|
.fillna(0)
|
||||||
)
|
)
|
||||||
|
|
||||||
# transpose the dataframe per local start datetime of the segment and discard the useless index layer
|
# transpose the dataframe per local start datetime of the segment
|
||||||
|
# and discard the useless index layer
|
||||||
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(
|
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(
|
||||||
"local_segment_start_datetimes"
|
"local_segment_start_datetimes"
|
||||||
)[["minutes_after_segment_start", "sensor"]].apply(
|
)[["minutes_after_segment_start", "sensor"]].apply(
|
||||||
lambda x: x.set_index("minutes_after_segment_start").transpose()
|
lambda x: x.set_index("minutes_after_segment_start").transpose()
|
||||||
)
|
)
|
||||||
phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values(
|
phone_data_yield_per_segment.index = (
|
||||||
"local_segment_start_datetimes"
|
phone_data_yield_per_segment.index.get_level_values(
|
||||||
|
"local_segment_start_datetimes"
|
||||||
|
)
|
||||||
)
|
)
|
||||||
return phone_data_yield_per_segment
|
return phone_data_yield_per_segment
|
||||||
|
|
||||||
|
@ -227,9 +230,13 @@ phone_data_yield_per_segment.tail()
|
||||||
# # A workaround
|
# # A workaround
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
phone_data_yield_per_segment["local_segment_start_datetimes", "minutes_after_segment_start"] = phone_data_yield_per_segment[
|
phone_data_yield_per_segment[
|
||||||
|
"local_segment_start_datetimes", "minutes_after_segment_start"
|
||||||
|
] = phone_data_yield_per_segment[
|
||||||
["local_segment_start_datetimes", "minutes_after_segment_start"]
|
["local_segment_start_datetimes", "minutes_after_segment_start"]
|
||||||
].drop_duplicates(keep="first")
|
].drop_duplicates(
|
||||||
|
keep="first"
|
||||||
|
)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
phone_data_yield_per_segment.set_index(
|
phone_data_yield_per_segment.set_index(
|
||||||
|
@ -244,8 +251,9 @@ phone_data_yield_per_segment.head()
|
||||||
# %% [markdown]
|
# %% [markdown]
|
||||||
# # Retry
|
# # Retry
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
def getDataForPlot(phone_data_yield_per_segment):
|
def get_data_for_plot(phone_data_yield_per_segment):
|
||||||
# calculate the length (in minute) of per segment instance
|
# calculate the length (in minute) of per segment instance
|
||||||
phone_data_yield_per_segment["length"] = (
|
phone_data_yield_per_segment["length"] = (
|
||||||
phone_data_yield_per_segment["timestamps_segment"]
|
phone_data_yield_per_segment["timestamps_segment"]
|
||||||
|
@ -292,7 +300,10 @@ def getDataForPlot(phone_data_yield_per_segment):
|
||||||
full_index,
|
full_index,
|
||||||
names=("local_segment_start_datetimes", "minutes_after_segment_start"),
|
names=("local_segment_start_datetimes", "minutes_after_segment_start"),
|
||||||
)
|
)
|
||||||
phone_data_yield_per_segment = phone_data_yield_per_segment.drop_duplicates(subset=["local_segment_start_datetimes", "minutes_after_segment_start"],keep="first")
|
phone_data_yield_per_segment = phone_data_yield_per_segment.drop_duplicates(
|
||||||
|
subset=["local_segment_start_datetimes", "minutes_after_segment_start"],
|
||||||
|
keep="first",
|
||||||
|
)
|
||||||
phone_data_yield_per_segment = (
|
phone_data_yield_per_segment = (
|
||||||
phone_data_yield_per_segment.set_index(
|
phone_data_yield_per_segment.set_index(
|
||||||
["local_segment_start_datetimes", "minutes_after_segment_start"]
|
["local_segment_start_datetimes", "minutes_after_segment_start"]
|
||||||
|
@ -302,14 +313,17 @@ def getDataForPlot(phone_data_yield_per_segment):
|
||||||
.fillna(0)
|
.fillna(0)
|
||||||
)
|
)
|
||||||
|
|
||||||
# transpose the dataframe per local start datetime of the segment and discard the useless index layer
|
# transpose the dataframe per local start datetime of the segment
|
||||||
|
# and discard the useless index layer
|
||||||
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(
|
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(
|
||||||
"local_segment_start_datetimes"
|
"local_segment_start_datetimes"
|
||||||
)[["minutes_after_segment_start", "sensor"]].apply(
|
)[["minutes_after_segment_start", "sensor"]].apply(
|
||||||
lambda x: x.set_index("minutes_after_segment_start").transpose()
|
lambda x: x.set_index("minutes_after_segment_start").transpose()
|
||||||
)
|
)
|
||||||
phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values(
|
phone_data_yield_per_segment.index = (
|
||||||
"local_segment_start_datetimes"
|
phone_data_yield_per_segment.index.get_level_values(
|
||||||
|
"local_segment_start_datetimes"
|
||||||
|
)
|
||||||
)
|
)
|
||||||
return phone_data_yield_per_segment
|
return phone_data_yield_per_segment
|
||||||
|
|
||||||
|
@ -318,6 +332,6 @@ def getDataForPlot(phone_data_yield_per_segment):
|
||||||
phone_data_yield_per_segment = filter_data_by_segment(phone_data_yield, time_segment)
|
phone_data_yield_per_segment = filter_data_by_segment(phone_data_yield, time_segment)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
data_for_plot_per_segment = getDataForPlot(phone_data_yield_per_segment)
|
data_for_plot_per_segment = get_data_for_plot(phone_data_yield_per_segment)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
|
2
rapids
2
rapids
|
@ -1 +1 @@
|
||||||
Subproject commit 63f5a526fce4d288499168e1701adadb8b885d82
|
Subproject commit 059774bda10545a83ab282f59eb7a329fef9ee4c
|
Loading…
Reference in New Issue