2019-11-27 20:25:17 +01:00
import pandas as pd
2019-12-04 18:04:20 +01:00
import itertools
2020-06-04 00:55:36 +02:00
from screen . screen_base import base_screen_features
2019-11-27 20:25:17 +01:00
2020-06-04 00:55:36 +02:00
screen_data = pd . read_csv ( snakemake . input [ " screen_deltas " ] , parse_dates = [ " local_start_date_time " , " local_end_date_time " , " local_start_date " , " local_end_date " ] )
2019-12-04 22:04:37 +01:00
phone_sensed_bins = pd . read_csv ( snakemake . input [ " phone_sensed_bins " ] , parse_dates = [ " local_date " ] , index_col = " local_date " )
phone_sensed_bins [ phone_sensed_bins > 0 ] = 1
2019-11-27 20:25:17 +01:00
day_segment = snakemake . params [ " day_segment " ]
2020-06-04 00:55:36 +02:00
screen_features = pd . DataFrame ( columns = [ " local_date " ] )
2020-03-04 18:21:36 +01:00
2020-07-23 20:38:59 +02:00
requested_features_deltas = [ " firstuseafter " + " {0:0=2d} " . format ( snakemake . params [ " reference_hour_first_use " ] ) if feature_name == " firstuseafter " else feature_name for feature_name in snakemake . params [ " features_deltas " ] ]
requested_features = [ " " . join ( feature ) for feature in itertools . product ( requested_features_deltas , snakemake . params [ " episode_types " ] ) ]
2019-11-27 20:25:17 +01:00
2020-07-23 20:38:59 +02:00
screen_features = screen_features . merge ( base_screen_features ( screen_data , phone_sensed_bins , day_segment , snakemake . params ) , on = " local_date " , how = " outer " )
2019-11-27 20:25:17 +01:00
2020-06-04 00:55:36 +02:00
assert len ( requested_features ) + 1 == screen_features . shape [ 1 ] , " The number of features in the output dataframe (= " + str ( screen_features . shape [ 1 ] ) + " ) does not match the expected value (= " + str ( len ( requested_features ) ) + " + 1). Verify your screen feature extraction functions "
2019-11-27 20:25:17 +01:00
2020-06-04 00:55:36 +02:00
screen_features . to_csv ( snakemake . output [ 0 ] , index = False )