Add screen episode filter parameters

pull/95/head
JulioV 2020-07-23 14:38:59 -04:00
parent 587f20dc58
commit 5f771618ae
5 changed files with 27 additions and 20 deletions

View File

@ -91,7 +91,7 @@ DORYAB_LOCATION:
DBSCAN_MINSAMPLES: 5 DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
MAXIMUM_GAP_ALLOWED: 300 MAXIMUM_GAP_ALLOWED: 300
MINUTES_DATA_USED: True MINUTES_DATA_USED: False
BLUETOOTH: BLUETOOTH:
COMPUTE: False COMPUTE: False
@ -118,6 +118,8 @@ SCREEN:
DB_TABLE: screen DB_TABLE: screen
DAY_SEGMENTS: *day_segments DAY_SEGMENTS: *day_segments
REFERENCE_HOUR_FIRST_USE: 0 REFERENCE_HOUR_FIRST_USE: 0
IGNORE_EPISODES_SHORTER_THAN: 0 # in minutes, set to 0 to disable
IGNORE_EPISODES_LONGER_THAN: 0 # in minutes, set to 0 to disable
FEATURES_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"] FEATURES_DELTAS: ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"]
EPISODE_TYPES: ["unlock"] EPISODE_TYPES: ["unlock"]

View File

@ -740,14 +740,16 @@ See `Screen Config Code`_
**Screen Rule Parameters (screen_features):** **Screen Rule Parameters (screen_features):**
========================= =================== ============================ ===================
Name Description Name Description
========================= =================== ============================ ===================
day_segment The particular ``day_segments`` that will be analyzed. The available options are ``daily``, ``morning``, ``afternoon``, ``evening``, ``night`` day_segment The particular ``day_segments`` that will be analyzed. The available options are ``daily``, ``morning``, ``afternoon``, ``evening``, ``night``
reference_hour_first_use The reference point from which ``firstuseafter`` is to be computed, default is midnight reference_hour_first_use The reference point from which ``firstuseafter`` is to be computed, default is midnight
features_deltas Features to be computed, see table below ignore_episodes_shorter_than Ignore episodes that are shorter than this threshold (minutes). Set to 0 to disable this filter.
episode_types Currently we only support unlock episodes (from when the phone is unlocked until the screen is off) ignore_episodes_longer_than Ignore episodes that are longer than this threshold (minutes). Set to 0 to disable this filter.
========================= =================== features_deltas Features to be computed, see table below
episode_types Currently we only support unlock episodes (from when the phone is unlocked until the screen is off)
============================ ===================
.. _screen-episodes-available-features: .. _screen-episodes-available-features:

View File

@ -179,6 +179,8 @@ rule screen_features:
reference_hour_first_use = config["SCREEN"]["REFERENCE_HOUR_FIRST_USE"], reference_hour_first_use = config["SCREEN"]["REFERENCE_HOUR_FIRST_USE"],
features_deltas = config["SCREEN"]["FEATURES_DELTAS"], features_deltas = config["SCREEN"]["FEATURES_DELTAS"],
episode_types = config["SCREEN"]["EPISODE_TYPES"], episode_types = config["SCREEN"]["EPISODE_TYPES"],
ignore_episodes_shorter_than = config["SCREEN"]["IGNORE_EPISODES_SHORTER_THAN"],
ignore_episodes_longer_than = config["SCREEN"]["IGNORE_EPISODES_LONGER_THAN"],
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"] bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
output: output:
"data/processed/{pid}/screen_{day_segment}.csv" "data/processed/{pid}/screen_{day_segment}.csv"

View File

@ -44,8 +44,10 @@ def base_screen_features(screen_data, phone_sensed_bins, day_segment, params):
reference_hour_first_use = params["reference_hour_first_use"] reference_hour_first_use = params["reference_hour_first_use"]
bin_size = params["bin_size"] bin_size = params["bin_size"]
requested_features_deltas = params["requested_features_deltas"] requested_features_deltas = params["features_deltas"]
requested_episode_types = params["requested_episode_types"] requested_episode_types = params["episode_types"]
ignore_episodes_shorter_than = params["ignore_episodes_shorter_than"]
ignore_episodes_longer_than = params["ignore_episodes_longer_than"]
# name of the features this function can compute # name of the features this function can compute
base_features_deltas = ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"] base_features_deltas = ["countepisode", "episodepersensedminutes", "sumduration", "maxduration", "minduration", "avgduration", "stdduration", "firstuseafter"]
@ -65,6 +67,11 @@ def base_screen_features(screen_data, phone_sensed_bins, day_segment, params):
screen_data = splitMultiSegmentEpisodes(screen_data, day_segment, []) screen_data = splitMultiSegmentEpisodes(screen_data, day_segment, [])
screen_data.set_index(["local_start_date"],inplace=True) screen_data.set_index(["local_start_date"],inplace=True)
if ignore_episodes_shorter_than > 0:
screen_data = screen_data.query('@ignore_episodes_shorter_than <= time_diff')
if ignore_episodes_longer_than > 0:
screen_data = screen_data.query('time_diff <= @ignore_episodes_longer_than')
if not screen_data.empty: if not screen_data.empty:
screen_features = pd.DataFrame() screen_features = pd.DataFrame()
for episode in episode_type_to_compute: for episode in episode_type_to_compute:

View File

@ -8,16 +8,10 @@ phone_sensed_bins[phone_sensed_bins > 0] = 1
day_segment = snakemake.params["day_segment"] day_segment = snakemake.params["day_segment"]
screen_features = pd.DataFrame(columns=["local_date"]) screen_features = pd.DataFrame(columns=["local_date"])
params = {} requested_features_deltas = ["firstuseafter" + "{0:0=2d}".format(snakemake.params["reference_hour_first_use"]) if feature_name == "firstuseafter" else feature_name for feature_name in snakemake.params["features_deltas"]]
params["reference_hour_first_use"] = snakemake.params["reference_hour_first_use"] requested_features = ["".join(feature) for feature in itertools.product(requested_features_deltas, snakemake.params["episode_types"])]
params["bin_size"] = snakemake.params["bin_size"]
params["requested_features_deltas"] = snakemake.params["features_deltas"]
params["requested_episode_types"] = snakemake.params["episode_types"]
requested_features_deltas = ["firstuseafter" + "{0:0=2d}".format(params["reference_hour_first_use"]) if feature_name == "firstuseafter" else feature_name for feature_name in params["requested_features_deltas"]] screen_features = screen_features.merge(base_screen_features(screen_data, phone_sensed_bins, day_segment, snakemake.params), on="local_date", how="outer")
requested_features = ["".join(feature) for feature in itertools.product(requested_features_deltas, params["requested_episode_types"])]
screen_features = screen_features.merge(base_screen_features(screen_data, phone_sensed_bins, day_segment, params), on="local_date", how="outer")
assert len(requested_features) + 1 == screen_features.shape[1], "The number of features in the output dataframe (=" + str(screen_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your screen feature extraction functions" assert len(requested_features) + 1 == screen_features.shape[1], "The number of features in the output dataframe (=" + str(screen_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your screen feature extraction functions"