Add phone yield sensor
parent
555811211d
commit
f02ca2624d
53
Snakefile
53
Snakefile
|
@ -12,20 +12,13 @@ files_to_compute = []
|
||||||
if len(config["PIDS"]) == 0:
|
if len(config["PIDS"]) == 0:
|
||||||
raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")
|
raise ValueError("Add participants IDs to PIDS in config.yaml. Remember to create their participant files in data/external")
|
||||||
|
|
||||||
if config["PHONE_VALID_SENSED_BINS"]["COMPUTE"] or config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]: # valid sensed bins is necessary for sensed days, so we add these files anyways if sensed days are requested
|
for provider in config["PHONE_DATA_YIELD"]["PROVIDERS"].keys():
|
||||||
if len(config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]) == 0:
|
if config["PHONE_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
raise ValueError("If you want to compute PHONE_VALID_SENSED_BINS or PHONE_VALID_SENSED_DAYS, you need to add at least one PHONE_SENSOR to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml")
|
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_DATA_YIELD"]["SENSORS"])))
|
||||||
|
files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"])))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=config["PIDS"], sensor=map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"])))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_data_yield_features/phone_data_yield_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_DATA_YIELD"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_data_yield.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_timestamps.csv", pid=config["PIDS"]))
|
|
||||||
|
|
||||||
if config["PHONE_VALID_SENSED_DAYS"]["COMPUTE"]:
|
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_valid_sensed_days_{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins.csv",
|
|
||||||
pid=config["PIDS"],
|
|
||||||
min_valid_hours_per_day=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_HOURS_PER_DAY"],
|
|
||||||
min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
|
||||||
|
|
||||||
for provider in config["PHONE_MESSAGES"]["PROVIDERS"].keys():
|
for provider in config["PHONE_MESSAGES"]["PROVIDERS"].keys():
|
||||||
if config["PHONE_MESSAGES"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_MESSAGES"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
|
@ -73,10 +66,10 @@ for provider in config["PHONE_BATTERY"]["PROVIDERS"].keys():
|
||||||
|
|
||||||
for provider in config["PHONE_SCREEN"]["PROVIDERS"].keys():
|
for provider in config["PHONE_SCREEN"]["PROVIDERS"].keys():
|
||||||
if config["PHONE_SCREEN"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_SCREEN"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
if "PHONE_SCREEN" in config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]:
|
if "PHONE_SCREEN" in config["PHONE_DATA_YIELD"]["SENSORS"]:
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||||
else:
|
else:
|
||||||
raise ValueError("Error: Add PHONE_SCREEN (and as many phone sensor as you have in your database) to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
raise ValueError("Error: Add PHONE_SCREEN (and as many phone sensor as you have in your database) to [PHONE_DATA_YIELD][SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data)")
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_raw.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime_unified.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_screen_with_datetime_unified.csv", pid=config["PIDS"]))
|
||||||
|
@ -133,10 +126,10 @@ for provider in config["PHONE_CONVERSATION"]["PROVIDERS"].keys():
|
||||||
for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
||||||
if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "FUSED_RESAMPLED":
|
if config["PHONE_LOCATIONS"]["LOCATIONS_TO_USE"] == "FUSED_RESAMPLED":
|
||||||
if "PHONE_LOCATIONS" in config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]:
|
if "PHONE_LOCATIONS" in config["PHONE_DATA_YIELD"]["SENSORS"]:
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_sensed_bins.csv", pid=config["PIDS"]))
|
||||||
else:
|
else:
|
||||||
raise ValueError("Error: Add PHONE_LOCATIONS (and as many PHONE_SENSORS as you have) to [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
|
raise ValueError("Error: Add PHONE_LOCATIONS (and as many SENSORS as you have) to [PHONE_DATA_YIELD][SENSORS] in config.yaml. This is necessary to compute phone_sensed_bins (bins of time when the smartphone was sensing data) which is used to resample fused location data (RESAMPLED_FUSED)")
|
||||||
|
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"]))
|
||||||
|
@ -200,22 +193,22 @@ for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys():
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
|
||||||
|
|
||||||
# visualization for data exploration
|
# visualization for data exploration
|
||||||
if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
# if config["HEATMAP_FEATURES_CORRELATIONS"]["PLOT"]:
|
||||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_features_correlations.html", min_valid_hours_per_day=config["HEATMAP_FEATURES_CORRELATIONS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||||
|
|
||||||
if config["HISTOGRAM_VALID_SENSED_HOURS"]["PLOT"]:
|
# if config["HISTOGRAM_VALID_SENSED_HOURS"]["PLOT"]:
|
||||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/histogram_valid_sensed_hours.html", min_valid_hours_per_day=config["HISTOGRAM_VALID_SENSED_HOURS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/histogram_valid_sensed_hours.html", min_valid_hours_per_day=config["HISTOGRAM_VALID_SENSED_HOURS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||||
|
|
||||||
if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]:
|
# if config["HEATMAP_DAYS_BY_SENSORS"]["PLOT"]:
|
||||||
files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
# files_to_compute.extend(expand("reports/interim/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/{pid}/heatmap_days_by_sensors.html", pid=config["PIDS"], min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/heatmap_days_by_sensors_all_participants.html", min_valid_hours_per_day=config["HEATMAP_DAYS_BY_SENSORS"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||||
|
|
||||||
if config["HEATMAP_SENSED_BINS"]["PLOT"]:
|
# if config["HEATMAP_SENSED_BINS"]["PLOT"]:
|
||||||
files_to_compute.extend(expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"]))
|
# files_to_compute.extend(expand("reports/interim/heatmap_sensed_bins/{pid}/heatmap_sensed_bins.html", pid=config["PIDS"]))
|
||||||
files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
|
# files_to_compute.extend(["reports/data_exploration/heatmap_sensed_bins_all_participants.html"])
|
||||||
|
|
||||||
if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
|
# if config["OVERALL_COMPLIANCE_HEATMAP"]["PLOT"]:
|
||||||
files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
# files_to_compute.extend(expand("reports/data_exploration/{min_valid_hours_per_day}hours_{min_valid_bins_per_hour}bins/overall_compliance_heatmap.html", min_valid_hours_per_day=config["OVERALL_COMPLIANCE_HEATMAP"]["MIN_VALID_HOURS_PER_DAY"], min_valid_bins_per_hour=config["PHONE_VALID_SENSED_DAYS"]["MIN_VALID_BINS_PER_HOUR"]))
|
||||||
|
|
||||||
|
|
||||||
rule all:
|
rule all:
|
||||||
|
|
49
config.yaml
49
config.yaml
|
@ -27,8 +27,8 @@ CREATE_PARTICIPANT_FILES:
|
||||||
|
|
||||||
# See https://www.rapids.science/setup/configuration/#day-segments
|
# See https://www.rapids.science/setup/configuration/#day-segments
|
||||||
DAY_SEGMENTS: &day_segments
|
DAY_SEGMENTS: &day_segments
|
||||||
TYPE: EVENT # FREQUENCY, PERIODIC, EVENT
|
TYPE: PERIODIC # FREQUENCY, PERIODIC, EVENT
|
||||||
FILE: "data/external/daysegments_event.csv"
|
FILE: "data/external/daysegments_periodic.csv"
|
||||||
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs
|
INCLUDE_PAST_PERIODIC_SEGMENTS: FALSE # Only relevant if TYPE=PERIODIC, see docs
|
||||||
|
|
||||||
# See https://www.rapids.science/setup/configuration/#device-data-source-configuration
|
# See https://www.rapids.science/setup/configuration/#device-data-source-configuration
|
||||||
|
@ -54,27 +54,22 @@ DEVICE_DATA:
|
||||||
############## PHONE ###########################################################
|
############## PHONE ###########################################################
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
||||||
PHONE_VALID_SENSED_BINS:
|
PHONE_DATA_YIELD:
|
||||||
COMPUTE: False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features
|
SENSORS: [PHONE_MESSAGES, PHONE_CALLS, PHONE_ACCELEROMETER]
|
||||||
BIN_SIZE: &bin_size 5 # (in minutes)
|
PROVIDERS:
|
||||||
# Add as many PHONE sensors as you have, they all improve the computation of PHONE_VALID_SENSED_BINS and PHONE_VALID_SENSED_DAYS.
|
RAPIDS:
|
||||||
# If you are extracting screen or Barnett/Doryab location features, PHONE_SCREEN and PHONE_LOCATIONS tables are mandatory.
|
COMPUTE: True
|
||||||
# You can choose any of the keys shown below, just make sure its TABLE exists in your database!
|
FEATURES: [ratiovalidyieldedminutes, ratiovalidyieldedhours]
|
||||||
# PHONE_MESSAGES, PHONE_CALLS, PHONE_LOCATIONS, PHONE_BLUETOOTH, PHONE_ACTIVITY_RECOGNITION, PHONE_BATTERY, PHONE_SCREEN, PHONE_LIGHT,
|
MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS: 0.5 # 0 to 1 representing the number of minutes with at least
|
||||||
# PHONE_ACCELEROMETER, PHONE_APPLICATIONS_FOREGROUND, PHONE_WIFI_VISIBLE, PHONE_WIFI_CONNECTED, PHONE_CONVERSATION
|
SRC_LANGUAGE: "r"
|
||||||
PHONE_SENSORS: []
|
SRC_FOLDER: "rapids" # inside src/features/phone_data_yield
|
||||||
|
|
||||||
PHONE_VALID_SENSED_DAYS:
|
|
||||||
COMPUTE: False
|
|
||||||
MIN_VALID_BINS_PER_HOUR: &min_valid_bins_per_hour [6] # (out of 60min/BIN_SIZE bins)
|
|
||||||
MIN_VALID_HOURS_PER_DAY: &min_valid_hours_per_day [16] # (out of 24) MIN_HOURS_PER_DAY
|
|
||||||
|
|
||||||
# Communication SMS features config, TYPES and FEATURES keys need to match
|
# Communication SMS features config, TYPES and FEATURES keys need to match
|
||||||
PHONE_MESSAGES:
|
PHONE_MESSAGES:
|
||||||
TABLE: messages
|
TABLE: messages
|
||||||
PROVIDERS:
|
PROVIDERS:
|
||||||
RAPIDS:
|
RAPIDS:
|
||||||
COMPUTE: True
|
COMPUTE: False
|
||||||
MESSAGES_TYPES : [received, sent]
|
MESSAGES_TYPES : [received, sent]
|
||||||
FEATURES:
|
FEATURES:
|
||||||
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
|
received: [count, distinctcontacts, timefirstmessage, timelastmessage, countmostfrequentcontact]
|
||||||
|
@ -329,8 +324,8 @@ FITBIT_CALORIES:
|
||||||
HEATMAP_FEATURES_CORRELATIONS:
|
HEATMAP_FEATURES_CORRELATIONS:
|
||||||
PLOT: False
|
PLOT: False
|
||||||
MIN_ROWS_RATIO: 0.5
|
MIN_ROWS_RATIO: 0.5
|
||||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
|
||||||
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
|
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
|
||||||
PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
|
PHONE_FEATURES: [accelerometer, activity_recognition, applications_foreground, battery, calls_incoming, calls_missed, calls_outgoing, conversation, light, location_doryab, messages_received, messages_sent, screen]
|
||||||
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
|
FITBIT_FEATURES: [fitbit_heartrate, fitbit_step, fitbit_sleep]
|
||||||
CORR_THRESHOLD: 0.1
|
CORR_THRESHOLD: 0.1
|
||||||
|
@ -338,25 +333,25 @@ HEATMAP_FEATURES_CORRELATIONS:
|
||||||
|
|
||||||
HISTOGRAM_VALID_SENSED_HOURS:
|
HISTOGRAM_VALID_SENSED_HOURS:
|
||||||
PLOT: False
|
PLOT: False
|
||||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
|
||||||
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
|
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
|
||||||
|
|
||||||
HEATMAP_DAYS_BY_SENSORS:
|
HEATMAP_DAYS_BY_SENSORS:
|
||||||
PLOT: False
|
PLOT: False
|
||||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
|
||||||
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
|
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
|
||||||
EXPECTED_NUM_OF_DAYS: -1
|
EXPECTED_NUM_OF_DAYS: -1
|
||||||
DB_TABLES: [accelerometer, applications_foreground, battery, bluetooth, calls, light, locations, messages, screen, wifi, sensor_wifi, plugin_google_activity_recognition, plugin_ios_activity_recognition, plugin_studentlife_audio_android, plugin_studentlife_audio]
|
DB_TABLES: [accelerometer, applications_foreground, battery, bluetooth, calls, light, locations, messages, screen, wifi, sensor_wifi, plugin_google_activity_recognition, plugin_ios_activity_recognition, plugin_studentlife_audio_android, plugin_studentlife_audio]
|
||||||
|
|
||||||
HEATMAP_SENSED_BINS:
|
HEATMAP_SENSED_BINS:
|
||||||
PLOT: False
|
PLOT: False
|
||||||
BIN_SIZE: *bin_size
|
BIN_SIZE: #*bin_size
|
||||||
|
|
||||||
OVERALL_COMPLIANCE_HEATMAP:
|
OVERALL_COMPLIANCE_HEATMAP:
|
||||||
PLOT: False
|
PLOT: False
|
||||||
ONLY_SHOW_VALID_DAYS: False
|
ONLY_SHOW_VALID_DAYS: False
|
||||||
EXPECTED_NUM_OF_DAYS: -1
|
EXPECTED_NUM_OF_DAYS: -1
|
||||||
BIN_SIZE: *bin_size
|
BIN_SIZE: #*bin_size
|
||||||
MIN_VALID_HOURS_PER_DAY: *min_valid_hours_per_day
|
MIN_VALID_HOURS_PER_DAY: #*min_valid_hours_per_day
|
||||||
MIN_VALID_BINS_PER_HOUR: *min_valid_bins_per_hour
|
MIN_VALID_BINS_PER_HOUR: #*min_valid_bins_per_hour
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,32 @@ rule join_features_from_providers:
|
||||||
script:
|
script:
|
||||||
"../src/features/join_features_from_providers.R"
|
"../src/features/join_features_from_providers.R"
|
||||||
|
|
||||||
|
rule phone_data_yield_python_features:
|
||||||
|
input:
|
||||||
|
sensor_data = "data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv",
|
||||||
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["PHONE_DATA_YIELD"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
provider_key = "{provider_key}",
|
||||||
|
sensor_key = "phone_data_yield"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/phone_data_yield_features/phone_data_yield_python_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/entry.py"
|
||||||
|
|
||||||
|
rule phone_data_yield_r_features:
|
||||||
|
input:
|
||||||
|
sensor_data = "data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv",
|
||||||
|
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["PHONE_DATA_YIELD"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
provider_key = "{provider_key}",
|
||||||
|
sensor_key = "phone_data_yield"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/phone_data_yield_features/phone_data_yield_r_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/entry.R"
|
||||||
|
|
||||||
rule phone_accelerometer_python_features:
|
rule phone_accelerometer_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = "data/raw/{pid}/phone_accelerometer_with_datetime.csv",
|
sensor_data = "data/raw/{pid}/phone_accelerometer_with_datetime.csv",
|
||||||
|
|
|
@ -77,23 +77,27 @@ rule phone_readable_datetime:
|
||||||
script:
|
script:
|
||||||
"../src/data/readable_datetime.R"
|
"../src/data/readable_datetime.R"
|
||||||
|
|
||||||
rule phone_sensed_bins:
|
rule phone_yielded_timestamps:
|
||||||
input:
|
input:
|
||||||
all_sensors = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor = map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]))
|
all_sensors = expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor = map(str.lower, config["PHONE_DATA_YIELD"]["SENSORS"]))
|
||||||
params:
|
|
||||||
bin_size = config["PHONE_VALID_SENSED_BINS"]["BIN_SIZE"]
|
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/phone_sensed_bins.csv"
|
"data/interim/{pid}/phone_yielded_timestamps.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/phone_sensed_bins.R"
|
"../src/data/phone_yielded_timestamps.R"
|
||||||
|
|
||||||
rule phone_sensed_timestamps:
|
rule phone_yielded_timestamps_with_datetime:
|
||||||
input:
|
input:
|
||||||
all_sensors = expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor = map(str.lower, config["PHONE_VALID_SENSED_BINS"]["PHONE_SENSORS"]))
|
sensor_input = "data/interim/{pid}/phone_yielded_timestamps.csv",
|
||||||
|
day_segments = "data/interim/day_segments/{pid}_day_segments.csv"
|
||||||
|
params:
|
||||||
|
timezones = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["TYPE"],
|
||||||
|
fixed_timezone = config["DEVICE_DATA"]["PHONE"]["TIMEZONE"]["VALUE"],
|
||||||
|
day_segments_type = config["DAY_SEGMENTS"]["TYPE"],
|
||||||
|
include_past_periodic_segments = config["DAY_SEGMENTS"]["INCLUDE_PAST_PERIODIC_SEGMENTS"]
|
||||||
output:
|
output:
|
||||||
"data/interim/{pid}/phone_sensed_timestamps.csv"
|
"data/interim/{pid}/phone_yielded_timestamps_with_datetime.csv"
|
||||||
script:
|
script:
|
||||||
"../src/data/phone_sensed_timestamps.R"
|
"../src/data/readable_datetime.R"
|
||||||
|
|
||||||
rule phone_valid_sensed_days:
|
rule phone_valid_sensed_days:
|
||||||
input:
|
input:
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
library("dplyr", warn.conflicts = F)
|
||||||
|
library(tidyr)
|
||||||
|
library(readr)
|
||||||
|
|
||||||
|
compute_data_yield_features <- function(data, feature_name, day_segment, provider){
|
||||||
|
data <- data %>% filter_data_by_segment(day_segment)
|
||||||
|
features <- data %>%
|
||||||
|
separate(timestamps_segment, into = c("start_timestamp", "end_timestamp"), convert = T, sep = ",") %>%
|
||||||
|
mutate(duration_minutes = (end_timestamp - start_timestamp) / 60000,
|
||||||
|
timestamp_since_segment_start = timestamp - start_timestamp,
|
||||||
|
minute_bin = timestamp_since_segment_start %/% 60000, # 60 * 1000
|
||||||
|
hour_bin = timestamp_since_segment_start %/% 3600000) %>% # (60 * 60 * 1000)
|
||||||
|
group_by(local_segment, hour_bin) %>%
|
||||||
|
summarise(minute_count = n_distinct(minute_bin),
|
||||||
|
duration_minutes = first(duration_minutes),
|
||||||
|
valid_hour = (minute_count/min(duration_minutes, 60)) > provider$MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS) %>%
|
||||||
|
group_by(local_segment) %>%
|
||||||
|
summarise(valid_yielded_minutes = sum(minute_count),
|
||||||
|
valid_yielded_hours = sum(valid_hour == TRUE) / 1.0,
|
||||||
|
duration_minutes = first(duration_minutes),
|
||||||
|
duration_hours = duration_minutes / 60.0,
|
||||||
|
phone_data_yield_rapids_ratiovalidyieldedminutes = valid_yielded_minutes / duration_minutes,
|
||||||
|
phone_data_yield_rapids_ratiovalidyieldedhours = if_else(duration_hours > 1, valid_yielded_hours / duration_hours, valid_yielded_hours))
|
||||||
|
return(features)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
rapids_features <- function(sensor_data_files, day_segment, provider){
|
||||||
|
|
||||||
|
yield_data <- read_csv(sensor_data_files[["sensor_data"]], col_types = cols_only(timestamp ="d", assigned_segments = "c"))
|
||||||
|
requested_features <- provider[["FEATURES"]]
|
||||||
|
|
||||||
|
# Output dataframe
|
||||||
|
features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
|
||||||
|
|
||||||
|
# The name of the features this function can compute
|
||||||
|
base_features_names <- c("ratiovalidyieldedminutes", "ratiovalidyieldedhours")
|
||||||
|
|
||||||
|
# The subset of requested features this function can compute
|
||||||
|
features_to_compute <- intersect(base_features_names, requested_features)
|
||||||
|
|
||||||
|
features <- compute_data_yield_features(yield_data, feature_name, day_segment, provider) %>%
|
||||||
|
select(c("local_segment", paste0("phone_data_yield_rapids_", features_to_compute)))
|
||||||
|
|
||||||
|
return(features)
|
||||||
|
}
|
Loading…
Reference in New Issue