rapids/src/features/screen_features.py

19 lines
1.3 KiB
Python
Raw Normal View History

2019-11-27 20:25:17 +01:00
import pandas as pd
import itertools
2020-06-04 00:55:36 +02:00
from screen.screen_base import base_screen_features
2019-11-27 20:25:17 +01:00
2020-06-04 00:55:36 +02:00
screen_data = pd.read_csv(snakemake.input["screen_deltas"], parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"])
phone_sensed_bins = pd.read_csv(snakemake.input["phone_sensed_bins"], parse_dates=["local_date"], index_col="local_date")
phone_sensed_bins[phone_sensed_bins > 0] = 1
2019-11-27 20:25:17 +01:00
day_segment = snakemake.params["day_segment"]
2020-06-04 00:55:36 +02:00
screen_features = pd.DataFrame(columns=["local_date"])
2020-07-23 20:38:59 +02:00
requested_features_deltas = ["firstuseafter" + "{0:0=2d}".format(snakemake.params["reference_hour_first_use"]) if feature_name == "firstuseafter" else feature_name for feature_name in snakemake.params["features_deltas"]]
requested_features = ["".join(feature) for feature in itertools.product(requested_features_deltas, snakemake.params["episode_types"])]
2019-11-27 20:25:17 +01:00
2020-07-23 20:38:59 +02:00
screen_features = screen_features.merge(base_screen_features(screen_data, phone_sensed_bins, day_segment, snakemake.params), on="local_date", how="outer")
2019-11-27 20:25:17 +01:00
2020-06-04 00:55:36 +02:00
assert len(requested_features) + 1 == screen_features.shape[1], "The number of features in the output dataframe (=" + str(screen_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your screen feature extraction functions"
2019-11-27 20:25:17 +01:00
2020-06-04 00:55:36 +02:00
screen_features.to_csv(snakemake.output[0], index=False)