30 lines
1.5 KiB
Python
30 lines
1.5 KiB
Python
import pandas as pd
|
|
|
|
def extract_second_order_features(intraday_features, so_features_names):
|
|
if not intraday_features.empty:
|
|
so_features = pd.DataFrame()
|
|
if "mean" in so_features_names:
|
|
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).mean().add_suffix("_SO_mean")], axis=1)
|
|
if "median" in so_features_names:
|
|
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).median().add_suffix("_SO_median")], axis=1)
|
|
if "sd" in so_features_names:
|
|
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).std().add_suffix("_SO_sd")], axis=1)
|
|
if "max" in so_features_names:
|
|
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).max().add_suffix("_SO_max")], axis=1)
|
|
if "min" in so_features_names:
|
|
so_features = pd.concat([so_features, intraday_features.drop("level_1", axis=1).groupby(["local_segment"]).min().add_suffix("_SO_min")], axis=1)
|
|
|
|
so_features.reset_index(inplace=True)
|
|
|
|
else:
|
|
so_features = pd.DataFrame(columns=["local_segment"])
|
|
|
|
return so_features
|
|
|
|
def get_sample_rate(data):
|
|
try:
|
|
timestamps_diff = data['timestamp'].diff().dropna().mean()
|
|
except:
|
|
raise Exception("Error occured while trying to get the mean sample rate from the data.")
|
|
|
|
return int(1000/timestamps_diff) |