Debugging of RAPIDS.
parent
c4aacfffe1
commit
7e8e922d71
|
@ -4,4 +4,17 @@
|
|||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
<component name="RMarkdownSettings">
|
||||
<option name="renderProfiles">
|
||||
<map>
|
||||
<entry key="file://$PROJECT_DIR$/rapids/src/visualization/merge_heatmap_sensors_per_minute_per_time_segment.Rmd">
|
||||
<value>
|
||||
<RMarkdownRenderProfile>
|
||||
<option name="outputDirectoryUrl" value="file://$PROJECT_DIR$/rapids/src/visualization" />
|
||||
</RMarkdownRenderProfile>
|
||||
</value>
|
||||
</entry>
|
||||
</map>
|
||||
</option>
|
||||
</component>
|
||||
</project>
|
|
@ -227,7 +227,7 @@ phone_data_yield_per_segment.tail()
|
|||
# # A workaround
|
||||
|
||||
# %%
|
||||
phone_data_yield_per_segment = phone_data_yield_per_segment[
|
||||
phone_data_yield_per_segment["local_segment_start_datetimes", "minutes_after_segment_start"] = phone_data_yield_per_segment[
|
||||
["local_segment_start_datetimes", "minutes_after_segment_start"]
|
||||
].drop_duplicates(keep="first")
|
||||
|
||||
|
@ -240,4 +240,84 @@ phone_data_yield_per_segment.set_index(
|
|||
# %%
|
||||
phone_data_yield_per_segment.head()
|
||||
|
||||
|
||||
# %% [markdown]
|
||||
# # Retry
|
||||
|
||||
# %%
|
||||
def getDataForPlot(phone_data_yield_per_segment):
|
||||
# calculate the length (in minute) of per segment instance
|
||||
phone_data_yield_per_segment["length"] = (
|
||||
phone_data_yield_per_segment["timestamps_segment"]
|
||||
.str.split(",")
|
||||
.apply(lambda x: int((int(x[1]) - int(x[0])) / (1000 * 60)))
|
||||
)
|
||||
# calculate the number of sensors logged at least one row of data per minute.
|
||||
phone_data_yield_per_segment = (
|
||||
phone_data_yield_per_segment.groupby(
|
||||
["local_segment", "length", "local_date", "local_hour", "local_minute"]
|
||||
)[["sensor", "local_date_time"]]
|
||||
.max()
|
||||
.reset_index()
|
||||
)
|
||||
# extract local start datetime of the segment from "local_segment" column
|
||||
phone_data_yield_per_segment["local_segment_start_datetimes"] = pd.to_datetime(
|
||||
phone_data_yield_per_segment["local_segment"].apply(
|
||||
lambda x: x.split("#")[1].split(",")[0]
|
||||
)
|
||||
)
|
||||
# calculate the number of minutes after local start datetime of the segment
|
||||
phone_data_yield_per_segment["minutes_after_segment_start"] = (
|
||||
(
|
||||
phone_data_yield_per_segment["local_date_time"]
|
||||
- phone_data_yield_per_segment["local_segment_start_datetimes"]
|
||||
)
|
||||
/ pd.Timedelta(minutes=1)
|
||||
).astype("int")
|
||||
|
||||
# impute missing rows with 0
|
||||
columns_for_full_index = phone_data_yield_per_segment[
|
||||
["local_segment_start_datetimes", "length"]
|
||||
].drop_duplicates(keep="first")
|
||||
columns_for_full_index = columns_for_full_index.apply(
|
||||
lambda row: [
|
||||
[row["local_segment_start_datetimes"], x] for x in range(row["length"] + 1)
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
full_index = []
|
||||
for columns in columns_for_full_index:
|
||||
full_index = full_index + columns
|
||||
full_index = pd.MultiIndex.from_tuples(
|
||||
full_index,
|
||||
names=("local_segment_start_datetimes", "minutes_after_segment_start"),
|
||||
)
|
||||
phone_data_yield_per_segment = phone_data_yield_per_segment.drop_duplicates(subset=["local_segment_start_datetimes", "minutes_after_segment_start"],keep="first")
|
||||
phone_data_yield_per_segment = (
|
||||
phone_data_yield_per_segment.set_index(
|
||||
["local_segment_start_datetimes", "minutes_after_segment_start"]
|
||||
)
|
||||
.reindex(full_index)
|
||||
.reset_index()
|
||||
.fillna(0)
|
||||
)
|
||||
|
||||
# transpose the dataframe per local start datetime of the segment and discard the useless index layer
|
||||
phone_data_yield_per_segment = phone_data_yield_per_segment.groupby(
|
||||
"local_segment_start_datetimes"
|
||||
)[["minutes_after_segment_start", "sensor"]].apply(
|
||||
lambda x: x.set_index("minutes_after_segment_start").transpose()
|
||||
)
|
||||
phone_data_yield_per_segment.index = phone_data_yield_per_segment.index.get_level_values(
|
||||
"local_segment_start_datetimes"
|
||||
)
|
||||
return phone_data_yield_per_segment
|
||||
|
||||
|
||||
# %%
|
||||
phone_data_yield_per_segment = filter_data_by_segment(phone_data_yield, time_segment)
|
||||
|
||||
# %%
|
||||
data_for_plot_per_segment = getDataForPlot(phone_data_yield_per_segment)
|
||||
|
||||
# %%
|
||||
|
|
2
rapids
2
rapids
|
@ -1 +1 @@
|
|||
Subproject commit d2ed73dccfac65ce503c1b510182fe5ef1516508
|
||||
Subproject commit e5cc02501f629c96641dfd1bcd1f7fcfd0d55462
|
Loading…
Reference in New Issue