Update AR module for segments; Refactor input format
parent
01ab59a3b6
commit
236b1cd809
26
Snakefile
26
Snakefile
|
@ -55,16 +55,22 @@ for provider in config["BLUETOOTH"]["PROVIDERS"].keys():
|
|||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["BLUETOOTH"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="BLUETOOTH".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="BLUETOOTH".lower()))
|
||||
|
||||
if config["ACTIVITY_RECOGNITION"]["COMPUTE"]:
|
||||
pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
|
||||
pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
|
||||
|
||||
for pids,table in zip([pids_android, pids_ios], [config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]):
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/processed/{pid}/{sensor}_deltas.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/processed/{pid}/activity_recognition_{day_segment}.csv",pid=config["PIDS"], day_segment = config["ACTIVITY_RECOGNITION"]["DAY_SEGMENTS"]))
|
||||
for provider in config["ACTIVITY_RECOGNITION"]["PROVIDERS"].keys():
|
||||
if config["ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
pids_android = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "android", config["PIDS"]))
|
||||
pids_ios = list(filter(lambda pid: infer_participant_platform("data/external/" + pid) == "ios", config["PIDS"]))
|
||||
|
||||
for pids,table in zip([pids_android, pids_ios], [config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"], config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]]):
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_raw.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime.csv", pid=pids, sensor=table))
|
||||
files_to_compute.extend(expand("data/raw/{pid}/{sensor}_with_datetime_unified.csv", pid=pids, sensor=table))
|
||||
|
||||
files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes_resampled.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
|
||||
files_to_compute.extend(expand("data/interim/{pid}/{sensor_key}_features/{sensor_key}_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["ACTIVITY_RECOGNITION"]["PROVIDERS"][provider]["SRC_LANGUAGE"], provider_key=provider, sensor_key="ACTIVITY_RECOGNITION".lower()))
|
||||
files_to_compute.extend(expand("data/processed/features/{pid}/{sensor_key}.csv", pid=config["PIDS"], sensor_key="ACTIVITY_RECOGNITION".lower()))
|
||||
|
||||
|
||||
for provider in config["BATTERY"]["PROVIDERS"].keys():
|
||||
if config["BATTERY"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||
|
|
13
config.yaml
13
config.yaml
|
@ -113,12 +113,19 @@ BLUETOOTH:
|
|||
|
||||
|
||||
ACTIVITY_RECOGNITION:
|
||||
COMPUTE: False
|
||||
DB_TABLE:
|
||||
ANDROID: plugin_google_activity_recognition
|
||||
IOS: plugin_ios_activity_recognition
|
||||
DAY_SEGMENTS: *day_segments
|
||||
FEATURES: ["count","mostcommonactivity","countuniqueactivities","activitychangecount","sumstationary","summobile","sumvehicle"]
|
||||
PROVIDERS:
|
||||
RAPIDS:
|
||||
COMPUTE: False
|
||||
FEATURES: ["count", "mostcommonactivity", "countuniqueactivities", "durationstationary", "durationmobile", "durationvehicle"]
|
||||
ACTIVITY_CLASSES:
|
||||
STATIONARY: ["still", "tilting"]
|
||||
MOBILE: ["on_foot", "walking", "running", "on_bicycle"]
|
||||
VEHICLE: ["in_vehicle"]
|
||||
SRC_FOLDER: "rapids" # inside src/features/activity_recognition
|
||||
SRC_LANGUAGE: "python"
|
||||
|
||||
BATTERY:
|
||||
DB_TABLE: battery
|
||||
|
|
|
@ -46,21 +46,19 @@ def find_features_files(wildcards):
|
|||
|
||||
def optional_ar_input(wildcards):
|
||||
platform = infer_participant_platform("data/external/"+wildcards.pid)
|
||||
|
||||
|
||||
if platform == "android":
|
||||
return ["data/raw/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_with_datetime_unified.csv",
|
||||
"data/interim/{pid}/" + config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"] + "_episodes.csv"]
|
||||
return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
|
||||
elif platform == "ios":
|
||||
return ["data/raw/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_with_datetime_unified.csv",
|
||||
"data/interim/{pid}/"+config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"]+"_episodes.csv"]
|
||||
return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
|
||||
|
||||
def optional_conversation_input(wildcards):
|
||||
platform = infer_participant_platform("data/external/"+wildcards.pid)
|
||||
|
||||
if platform == "android":
|
||||
return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["ANDROID"] + "_with_datetime_unified.csv"]
|
||||
return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CONVERSATION"]["DB_TABLE"]["ANDROID"])[0]
|
||||
elif platform == "ios":
|
||||
return ["data/raw/{pid}/" + config["CONVERSATION"]["DB_TABLE"]["IOS"] + "_with_datetime_unified.csv"]
|
||||
return expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CONVERSATION"]["DB_TABLE"]["IOS"])[0]
|
||||
|
||||
def optional_steps_sleep_input(wildcards):
|
||||
if config["STEP"]["EXCLUDE_SLEEP"]["EXCLUDE"] == True and config["STEP"]["EXCLUDE_SLEEP"]["TYPE"] == "FITBIT_BASED":
|
||||
|
|
|
@ -6,70 +6,6 @@ rule join_features_from_providers:
|
|||
script:
|
||||
"../src/features/join_features_from_providers.R"
|
||||
|
||||
rule messages_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"]),
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/messages_features/messages_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/messages/messages_entry.R"
|
||||
|
||||
rule messages_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"]),
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/messages_features/messages_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/messages/messages_entry.py"
|
||||
|
||||
rule calls_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"]),
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/calls_features/calls_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/calls/calls_entry.py"
|
||||
|
||||
rule calls_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"]),
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/calls_features/calls_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/calls/calls_entry.R"
|
||||
|
||||
rule battery_episodes:
|
||||
input:
|
||||
expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor=config["BATTERY"]["DB_TABLE"])
|
||||
output:
|
||||
"data/interim/{pid}/battery_episodes.csv"
|
||||
script:
|
||||
"../src/features/battery/episodes/battery_episodes.R"
|
||||
|
||||
rule screen_episodes:
|
||||
input:
|
||||
screen = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["SCREEN"]["DB_TABLE"])
|
||||
output:
|
||||
"data/interim/{pid}/screen_episodes.csv"
|
||||
script:
|
||||
"../src/features/screen/episodes/screen_episodes.R"
|
||||
|
||||
rule resample_episodes:
|
||||
input:
|
||||
"data/interim/{pid}/{sensor}_episodes.csv"
|
||||
|
@ -92,178 +28,6 @@ rule resample_episodes_with_datetime:
|
|||
script:
|
||||
"../src/data/readable_datetime.R"
|
||||
|
||||
|
||||
rule google_activity_recognition_deltas:
|
||||
input:
|
||||
expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
|
||||
output:
|
||||
expand("data/interim/{{pid}}/{sensor}_episodes.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["ANDROID"])
|
||||
script:
|
||||
"../src/features/ar/episodes/activity_recognition_episodes.R"
|
||||
|
||||
rule ios_activity_recognition_deltas:
|
||||
input:
|
||||
expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
|
||||
output:
|
||||
expand("data/interim/{{pid}}/{sensor}_episodes.csv", sensor=config["ACTIVITY_RECOGNITION"]["DB_TABLE"]["IOS"])
|
||||
script:
|
||||
"../src/features/ar/episodes/activity_recognition_episodes.R"
|
||||
|
||||
rule locations_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]),
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
output:
|
||||
"data/interim/{pid}/locations_features/locations_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/locations/locations_entry.py"
|
||||
|
||||
rule locations_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"]),
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/locations_features/locations_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/locations/locations_entry.R"
|
||||
|
||||
rule bluetooth_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"]),
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/bluetooth_features/bluetooth_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/bluetooth/bluetooth_entry.R"
|
||||
|
||||
rule bluetooth_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"]),
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/bluetooth_features/bluetooth_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/bluetooth/bluetooth_entry.py"
|
||||
|
||||
rule activity_features:
|
||||
input:
|
||||
optional_ar_input
|
||||
params:
|
||||
segment = "{day_segment}",
|
||||
features = config["ACTIVITY_RECOGNITION"]["FEATURES"]
|
||||
output:
|
||||
"data/processed/{pid}/activity_recognition_{day_segment}.csv"
|
||||
script:
|
||||
"../src/features/activity_recognition.py"
|
||||
|
||||
rule battery_r_features:
|
||||
input:
|
||||
battery_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/battery_features/battery_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/battery/battery_entry.R"
|
||||
|
||||
rule battery_python_features:
|
||||
input:
|
||||
battery_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/battery_features/battery_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/battery/battery_entry.py"
|
||||
|
||||
rule screen_r_features:
|
||||
input:
|
||||
screen_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/screen_features/screen_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/screen/screen_entry.R"
|
||||
|
||||
rule screen_python_features:
|
||||
input:
|
||||
screen_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/screen_features/screen_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/screen/screen_entry.py"
|
||||
|
||||
rule light_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"]),
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/light_features/light_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/light/light_entry.R"
|
||||
|
||||
rule light_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"]),
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/light_features/light_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/light/light_entry.py"
|
||||
|
||||
rule conversation_r_features:
|
||||
input:
|
||||
sensor_data = optional_conversation_input,
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/conversation_features/conversation_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/conversation/conversation_entry.R"
|
||||
|
||||
rule conversation_python_features:
|
||||
input:
|
||||
sensor_data = optional_conversation_input,
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
output:
|
||||
"data/interim/{pid}/conversation_features/conversation_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/conversation/conversation_entry.py"
|
||||
|
||||
rule accelerometer_features:
|
||||
input:
|
||||
expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["ACCELEROMETER"]["DB_TABLE"]),
|
||||
|
@ -278,53 +42,315 @@ rule accelerometer_features:
|
|||
script:
|
||||
"../src/features/accelerometer_features.py"
|
||||
|
||||
rule activity_recognition_episodes:
|
||||
input:
|
||||
optional_ar_input
|
||||
output:
|
||||
"data/interim/{pid}/activity_recognition_episodes.csv"
|
||||
script:
|
||||
"../src/features/activity_recognition/episodes/activity_recognition_episodes.R"
|
||||
|
||||
rule activity_recognition_r_features:
|
||||
input:
|
||||
sensor_episodes = "data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "activity_recognition"
|
||||
output:
|
||||
"data/interim/{pid}/activity_recognition_features/activity_recognition_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule activity_recognition_python_features:
|
||||
input:
|
||||
sensor_episodes = "data/interim/{pid}/activity_recognition_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["ACTIVITY_RECOGNITION"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "activity_recognition"
|
||||
output:
|
||||
"data/interim/{pid}/activity_recognition_features/activity_recognition_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule applications_foreground_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]),
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "applications_foreground"
|
||||
output:
|
||||
"data/interim/{pid}/applications_foreground_features/applications_foreground_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/applications_foreground/applications_foreground_entry.R"
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule applications_foreground_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"]),
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_with_genre.csv", sensor=config["APPLICATIONS_FOREGROUND"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["APPLICATIONS_FOREGROUND"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "applications_foreground"
|
||||
output:
|
||||
"data/interim/{pid}/applications_foreground_features/applications_foreground_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/applications_foreground/applications_foreground_entry.py"
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule battery_episodes:
|
||||
input:
|
||||
expand("data/raw/{{pid}}/{sensor}_raw.csv", sensor=config["BATTERY"]["DB_TABLE"])
|
||||
output:
|
||||
"data/interim/{pid}/battery_episodes.csv"
|
||||
script:
|
||||
"../src/features/battery/episodes/battery_episodes.R"
|
||||
|
||||
rule battery_r_features:
|
||||
input:
|
||||
sensor_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "battery"
|
||||
output:
|
||||
"data/interim/{pid}/battery_features/battery_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule battery_python_features:
|
||||
input:
|
||||
sensor_episodes = "data/interim/{pid}/battery_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BATTERY"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "battery"
|
||||
output:
|
||||
"data/interim/{pid}/battery_features/battery_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule bluetooth_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "bluetooth"
|
||||
output:
|
||||
"data/interim/{pid}/bluetooth_features/bluetooth_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule bluetooth_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["BLUETOOTH"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["BLUETOOTH"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "bluetooth"
|
||||
output:
|
||||
"data/interim/{pid}/bluetooth_features/bluetooth_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule calls_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "calls"
|
||||
output:
|
||||
"data/interim/{pid}/calls_features/calls_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule calls_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["CALLS"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["CALLS"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "calls"
|
||||
output:
|
||||
"data/interim/{pid}/calls_features/calls_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule conversation_r_features:
|
||||
input:
|
||||
sensor_data = optional_conversation_input,
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "conversation"
|
||||
output:
|
||||
"data/interim/{pid}/conversation_features/conversation_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule conversation_python_features:
|
||||
input:
|
||||
sensor_data = optional_conversation_input,
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["CONVERSATION"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "conversation"
|
||||
output:
|
||||
"data/interim/{pid}/conversation_features/conversation_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule light_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "light"
|
||||
output:
|
||||
"data/interim/{pid}/light_features/light_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule light_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["LIGHT"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["LIGHT"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "light"
|
||||
output:
|
||||
"data/interim/{pid}/light_features/light_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule locations_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "locations"
|
||||
output:
|
||||
"data/interim/{pid}/locations_features/locations_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule locations_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/interim/{{pid}}/{sensor}_processed_{locations_to_use}_with_datetime.csv", sensor=config["LOCATIONS"]["DB_TABLE"], locations_to_use=config["LOCATIONS"]["LOCATIONS_TO_USE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["LOCATIONS"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "locations"
|
||||
output:
|
||||
"data/interim/{pid}/locations_features/locations_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule messages_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "messages"
|
||||
output:
|
||||
"data/interim/{pid}/messages_features/messages_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule messages_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor}_with_datetime.csv", sensor=config["MESSAGES"]["DB_TABLE"])[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["MESSAGES"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "messages"
|
||||
output:
|
||||
"data/interim/{pid}/messages_features/messages_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule screen_episodes:
|
||||
input:
|
||||
screen = expand("data/raw/{{pid}}/{sensor}_with_datetime_unified.csv", sensor=config["SCREEN"]["DB_TABLE"])
|
||||
output:
|
||||
"data/interim/{pid}/screen_episodes.csv"
|
||||
script:
|
||||
"../src/features/screen/episodes/screen_episodes.R"
|
||||
|
||||
rule screen_r_features:
|
||||
input:
|
||||
sensor_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "screen"
|
||||
output:
|
||||
"data/interim/{pid}/screen_features/screen_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule screen_python_features:
|
||||
input:
|
||||
sensor_episodes = "data/interim/{pid}/screen_episodes_resampled_with_datetime.csv",
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["SCREEN"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "screen"
|
||||
output:
|
||||
"data/interim/{pid}/screen_features/screen_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule wifi_r_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower()),
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower())[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["WIFI"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "wifi"
|
||||
output:
|
||||
"data/interim/{pid}/wifi_features/wifi_r_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/wifi/wifi_entry.R"
|
||||
"../src/features/entry.R"
|
||||
|
||||
rule wifi_python_features:
|
||||
input:
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower()),
|
||||
sensor_data = expand("data/raw/{{pid}}/{sensor_key}_with_datetime_visibleandconnected.csv", sensor_key="WIFI".lower())[0],
|
||||
day_segments_labels = "data/interim/day_segments/{pid}_day_segments_labels.csv"
|
||||
params:
|
||||
provider = lambda wildcards: config["WIFI"]["PROVIDERS"][wildcards.provider_key],
|
||||
provider_key = "{provider_key}"
|
||||
provider_key = "{provider_key}",
|
||||
sensor_key = "wifi"
|
||||
output:
|
||||
"data/interim/{pid}/wifi_features/wifi_python_{provider_key}.csv"
|
||||
script:
|
||||
"../src/features/wifi/wifi_entry.py"
|
||||
"../src/features/entry.py"
|
||||
|
||||
rule fitbit_heartrate_features:
|
||||
input:
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
import pandas as pd
|
||||
from ar.ar_base import base_ar_features
|
||||
|
||||
ar_data = pd.read_csv(snakemake.input[0],parse_dates=["local_date_time"])
|
||||
ar_deltas = pd.read_csv(snakemake.input[1],parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"])
|
||||
day_segment = snakemake.params["segment"]
|
||||
requested_features = snakemake.params["features"]
|
||||
ar_features = pd.DataFrame(columns=["local_date"])
|
||||
|
||||
|
||||
ar_features = ar_features.merge(base_ar_features(ar_data, ar_deltas, day_segment, requested_features), on="local_date", how="outer")
|
||||
|
||||
assert len(requested_features) + 1 == ar_features.shape[1], "The number of features in the output dataframe (=" + str(ar_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your activity recognition feature extraction functions"
|
||||
|
||||
ar_features.to_csv(snakemake.output[0], index=False)
|
|
@ -0,0 +1,123 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
chunk_episodes = kwargs["chunk_episodes"]
|
||||
|
||||
ar_episodes = pd.read_csv(sensor_data_files["sensor_episodes"])
|
||||
activity_classes = provider["ACTIVITY_CLASSES"]
|
||||
|
||||
# name of the features this function can compute
|
||||
base_features_names = ["count","mostcommonactivity","countuniqueactivities","durationstationary","durationmobile","durationvehicle"]
|
||||
# the subset of requested features this function can compute
|
||||
requested_features = provider["FEATURES"]
|
||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||
|
||||
ar_features = pd.DataFrame(columns=["local_segment"] + ["ar_rapids_" + x for x in features_to_compute])
|
||||
if not ar_episodes.empty:
|
||||
ar_episodes = filter_data_by_segment(ar_episodes, day_segment)
|
||||
|
||||
if not ar_episodes.empty:
|
||||
# chunk episodes
|
||||
ar_episodes = chunk_episodes(ar_episodes)
|
||||
|
||||
if not ar_episodes.empty:
|
||||
ar_features = pd.DataFrame()
|
||||
|
||||
if "count" in features_to_compute:
|
||||
ar_features["ar_rapids_count"] = ar_episodes.groupby(["local_segment"]).count()["episode_id"]
|
||||
if "mostcommonactivity" in features_to_compute:
|
||||
ar_features["ar_rapids_mostcommonactivity"] = ar_episodes.groupby(["local_segment"])["activity_type"].agg(lambda x: pd.Series.mode(x)[0])
|
||||
if "countuniqueactivities" in features_to_compute:
|
||||
ar_features["ar_rapids_countuniqueactivities"] = ar_episodes.groupby(["local_segment"])["activity_type"].nunique()
|
||||
|
||||
# duration features
|
||||
for column, activity_labels in activity_classes.items():
|
||||
if "duration" + column.lower() in features_to_compute:
|
||||
filtered_data = ar_episodes[ar_episodes["activity_name"].isin(pd.Series(activity_labels))]
|
||||
if not filtered_data.empty:
|
||||
ar_features["ar_rapids_duration_" + column] = ar_episodes[ar_episodes["activity_name"].isin(pd.Series(activity_labels))].groupby(["local_segment"])["duration"].sum().fillna(0)
|
||||
else:
|
||||
ar_features["ar_rapids_duration_" + column] = 0
|
||||
|
||||
ar_features.index.names = ["local_segment"]
|
||||
ar_features = ar_features.reset_index()
|
||||
|
||||
return ar_features
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
|
||||
if not ar_data.empty:
|
||||
ar_data = filter_data_by_segment(ar_data, day_segment)
|
||||
|
||||
if not ar_data.empty:
|
||||
# chunk_episodes
|
||||
ar_data = chunk_episodes(ar_data)
|
||||
|
||||
if not ar_data.empty:
|
||||
|
||||
ar_data["episode_id"] = ((ar_data.ar_status != ar_data.ar_status.shift()) | (ar_data.start_timestamp - ar_data.end_timestamp.shift() > 1)).cumsum()
|
||||
grouped = ar_data.groupby(by=["local_segment", "episode_id", "ar_status"])
|
||||
ar_episodes= grouped[["duration"]].sum()
|
||||
ar_episodes["ar_diff"] = grouped["ar_level"].first() - grouped["ar_level"].last()
|
||||
ar_episodes["ar_consumption_rate"] = ar_episodes["ar_diff"] / ar_episodes["duration"]
|
||||
ar_episodes.reset_index(inplace=True)
|
||||
|
||||
# for discharge episodes
|
||||
ar_discharge_episodes = ar_episodes[(ar_episodes["ar_status"] == 3) | (ar_episodes["ar_status"] == 4)]
|
||||
ar_discharge_features = pd.DataFrame()
|
||||
if "countdischarge" in features_to_compute:
|
||||
ar_discharge_features["ar_rapids_countdischarge"] = ar_discharge_episodes.groupby(["local_segment"])["episode_id"].count()
|
||||
if "sumdurationdischarge" in features_to_compute:
|
||||
ar_discharge_features["ar_rapids_sumdurationdischarge"] = ar_discharge_episodes.groupby(["local_segment"])["duration"].sum()
|
||||
if "avgconsumptionrate" in features_to_compute:
|
||||
ar_discharge_features["ar_rapids_avgconsumptionrate"] = ar_discharge_episodes.groupby(["local_segment"])["ar_consumption_rate"].mean()
|
||||
if "maxconsumptionrate" in features_to_compute:
|
||||
ar_discharge_features["ar_rapids_maxconsumptionrate"] = ar_discharge_episodes.groupby(["local_segment"])["ar_consumption_rate"].max()
|
||||
|
||||
# for charge episodes
|
||||
ar_charge_episodes = ar_episodes[(ar_episodes["ar_status"] == 2) | (ar_episodes["ar_status"] == 5)]
|
||||
ar_charge_features = pd.DataFrame()
|
||||
if "countcharge" in features_to_compute:
|
||||
ar_charge_features["ar_rapids_countcharge"] = ar_charge_episodes.groupby(["local_segment"])["episode_id"].count()
|
||||
if "sumdurationcharge" in features_to_compute:
|
||||
ar_charge_features["ar_rapids_sumdurationcharge"] = ar_charge_episodes.groupby(["local_segment"])["duration"].sum()
|
||||
|
||||
# combine discharge features and charge features; fill the missing values with ZERO
|
||||
ar_features = pd.concat([ar_discharge_features, ar_charge_features], axis=1, sort=True).fillna(0)
|
||||
|
||||
ar_features.index.rename("local_segment", inplace=True)
|
||||
ar_features = ar_features.reset_index()
|
||||
|
||||
return ar_features
|
||||
"""
|
|
@ -1,13 +0,0 @@
|
|||
source("renv/activate.R")
|
||||
source("src/features/utils/utils.R")
|
||||
library("dplyr")
|
||||
library("tidyr")
|
||||
|
||||
sensor_data_file <- snakemake@input[["sensor_data"]]
|
||||
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||
provider <- snakemake@params["provider"][["provider"]]
|
||||
provider_key <- snakemake@params["provider_key"]
|
||||
|
||||
sensor_features <- fetch_provider_features(provider, provider_key, "applications_foreground", sensor_data_file, day_segments_file)
|
||||
|
||||
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -1,18 +0,0 @@
|
|||
import pandas as pd
|
||||
from importlib import import_module, util
|
||||
from pathlib import Path
|
||||
|
||||
# import fetch_provider_features from src/features/utils/utils.py
|
||||
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||
mod = util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||
|
||||
sensor_data_file = snakemake.input["sensor_data"][0]
|
||||
day_segments_file = snakemake.input["day_segments_labels"]
|
||||
provider = snakemake.params["provider"]
|
||||
provider_key = snakemake.params["provider_key"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, "applications_foreground", sensor_data_file, day_segments_file)
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -9,28 +9,31 @@ def compute_features(filtered_data, apps_type, requested_features, apps_features
|
|||
if "timeoffirstuse" in requested_features:
|
||||
time_first_event = filtered_data.sort_values(by="timestamp", ascending=True).drop_duplicates(subset="local_segment", keep="first").set_index("local_segment")
|
||||
if time_first_event.empty:
|
||||
apps_features["apps_rapids" + "_timeoffirstuse" + apps_type] = np.nan
|
||||
apps_features["apps_rapids_timeoffirstuse" + apps_type] = np.nan
|
||||
else:
|
||||
apps_features["apps_rapids" + "_timeoffirstuse" + apps_type] = time_first_event["local_hour"] * 60 + time_first_event["local_minute"]
|
||||
apps_features["apps_rapids_timeoffirstuse" + apps_type] = time_first_event["local_hour"] * 60 + time_first_event["local_minute"]
|
||||
if "timeoflastuse" in requested_features:
|
||||
time_last_event = filtered_data.sort_values(by="timestamp", ascending=False).drop_duplicates(subset="local_segment", keep="first").set_index("local_segment")
|
||||
if time_last_event.empty:
|
||||
apps_features["apps_rapids" + "_timeoflastuse" + apps_type] = np.nan
|
||||
apps_features["apps_rapids_timeoflastuse" + apps_type] = np.nan
|
||||
else:
|
||||
apps_features["apps_rapids" + "_timeoflastuse" + apps_type] = time_last_event["local_hour"] * 60 + time_last_event["local_minute"]
|
||||
apps_features["apps_rapids_timeoflastuse" + apps_type] = time_last_event["local_hour"] * 60 + time_last_event["local_minute"]
|
||||
if "frequencyentropy" in requested_features:
|
||||
apps_with_count = filtered_data.groupby(["local_segment","application_name"]).count().sort_values(by="timestamp", ascending=False).reset_index()
|
||||
if (len(apps_with_count.index) < 2 ):
|
||||
apps_features["apps_rapids" + "_frequencyentropy" + apps_type] = np.nan
|
||||
apps_features["apps_rapids_frequencyentropy" + apps_type] = np.nan
|
||||
else:
|
||||
apps_features["apps_rapids" + "_frequencyentropy" + apps_type] = apps_with_count.groupby("local_segment")["timestamp"].agg(entropy)
|
||||
apps_features["apps_rapids_frequencyentropy" + apps_type] = apps_with_count.groupby("local_segment")["timestamp"].agg(entropy)
|
||||
if "count" in requested_features:
|
||||
apps_features["apps_rapids" + "_count" + apps_type] = filtered_data.groupby(["local_segment"]).count()["timestamp"]
|
||||
apps_features.fillna(value={"apps_rapids" + "_count" + apps_type: 0}, inplace=True)
|
||||
apps_features["apps_rapids_count" + apps_type] = filtered_data.groupby(["local_segment"]).count()["timestamp"]
|
||||
apps_features.fillna(value={"apps_rapids_count" + apps_type: 0}, inplace=True)
|
||||
return apps_features
|
||||
|
||||
|
||||
def rapids_features(apps_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
apps_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
|
||||
requested_features = provider["FEATURES"]
|
||||
excluded_categories = provider["EXCLUDED_CATEGORIES"]
|
||||
excluded_apps = provider["EXCLUDED_APPS"]
|
||||
|
@ -49,10 +52,8 @@ def rapids_features(apps_data, day_segment, provider, filter_data_by_segment, *a
|
|||
apps_data = apps_data[~apps_data["genre"].isin(excluded_categories)]
|
||||
# exclude apps in the excluded_apps list
|
||||
apps_data = apps_data[~apps_data["package_name"].isin(excluded_apps)]
|
||||
|
||||
|
||||
|
||||
apps_features = pd.DataFrame(columns=["local_segment"] + ["apps_rapids_" + "_" + x for x in ["".join(feature) for feature in itertools.product(requested_features, single_categories + multiple_categories + single_apps)]])
|
||||
apps_features = pd.DataFrame(columns=["local_segment"] + ["apps_rapids_" + x for x in ["".join(feature) for feature in itertools.product(requested_features, single_categories + multiple_categories + single_apps)]])
|
||||
if not apps_data.empty:
|
||||
# deep copy the apps_data for the top1global computation
|
||||
apps_data_global = apps_data.copy()
|
||||
|
|
|
@ -1,63 +0,0 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import scipy.stats as stats
|
||||
from features_utils import splitOvernightEpisodes, splitMultiSegmentEpisodes
|
||||
|
||||
def base_ar_features(ar_data, ar_deltas, day_segment, requested_features):
|
||||
# name of the features this function can compute
|
||||
base_features_names = ["count","mostcommonactivity","countuniqueactivities","activitychangecount","sumstationary","summobile","sumvehicle"]
|
||||
# the subset of requested features this function can compute
|
||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||
|
||||
ar_features = pd.DataFrame(columns = ["local_date"] + ["ar_" + day_segment + "_" + x for x in features_to_compute])
|
||||
if not ar_data.empty:
|
||||
ar_deltas = splitOvernightEpisodes(ar_deltas, [],["activity"])
|
||||
|
||||
if day_segment != "daily":
|
||||
ar_deltas = splitMultiSegmentEpisodes(ar_deltas, day_segment, [])
|
||||
|
||||
ar_data.local_date_time = pd.to_datetime(ar_data.local_date_time)
|
||||
resampledData = ar_data.set_index(ar_data.local_date_time)
|
||||
resampledData.drop(columns=["local_date_time"], inplace=True)
|
||||
|
||||
if day_segment != "daily":
|
||||
resampledData = resampledData.loc[resampledData["local_day_segment"] == day_segment]
|
||||
|
||||
if not resampledData.empty:
|
||||
ar_features = pd.DataFrame()
|
||||
|
||||
# finding the count of samples of the day
|
||||
if "count" in features_to_compute:
|
||||
ar_features["ar_" + day_segment + "_count"] = resampledData["activity_type"].resample("D").count()
|
||||
|
||||
# finding most common activity of the day
|
||||
if "mostcommonactivity" in features_to_compute:
|
||||
ar_features["ar_" + day_segment + "_mostcommonactivity"] = resampledData["activity_type"].resample("D").apply(lambda x: stats.mode(x)[0] if len(stats.mode(x)[0]) != 0 else None)
|
||||
|
||||
# finding different number of activities during a day
|
||||
if "countuniqueactivities" in features_to_compute:
|
||||
ar_features["ar_" + day_segment + "_countuniqueactivities"] = resampledData["activity_type"].resample("D").nunique()
|
||||
|
||||
# finding Number of times activity changed
|
||||
if "activitychangecount" in features_to_compute:
|
||||
resampledData["activity_type_shift"] = resampledData["activity_type"].shift().fillna(resampledData["activity_type"].head(1))
|
||||
resampledData["different_activity"] = np.where(resampledData["activity_type"]!=resampledData["activity_type_shift"],1,0)
|
||||
ar_features["ar_" + day_segment + "_activitychangecount"] = resampledData["different_activity"].resample("D").sum()
|
||||
|
||||
|
||||
deltas_features = {"sumstationary":["still","tilting"],
|
||||
"summobile":["on_foot","walking","running","on_bicycle"],
|
||||
"sumvehicle":["in_vehicle"]}
|
||||
|
||||
for column, activity_labels in deltas_features.items():
|
||||
if column in features_to_compute:
|
||||
filtered_data = ar_deltas[ar_deltas["activity"].isin(pd.Series(activity_labels))]
|
||||
if not filtered_data.empty:
|
||||
ar_features["ar_" + day_segment + "_" + column] = ar_deltas[ar_deltas["activity"].isin(pd.Series(activity_labels))].groupby(["local_start_date"])["time_diff"].sum().fillna(0)
|
||||
else:
|
||||
ar_features["ar_" + day_segment + "_" + column] = 0
|
||||
|
||||
ar_features.index.names = ["local_date"]
|
||||
ar_features = ar_features.reset_index()
|
||||
|
||||
return ar_features
|
|
@ -1,13 +0,0 @@
|
|||
source("renv/activate.R")
|
||||
source("src/features/utils/utils.R")
|
||||
library("dplyr")
|
||||
library("tidyr")
|
||||
|
||||
sensor_data_file <- snakemake@input[["battery_episodes"]]
|
||||
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||
provider <- snakemake@params["provider"][["provider"]]
|
||||
provider_key <- snakemake@params["provider_key"]
|
||||
|
||||
sensor_features <- fetch_provider_features(provider, provider_key, "battery", sensor_data_file, day_segments_file)
|
||||
|
||||
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -1,18 +0,0 @@
|
|||
import pandas as pd
|
||||
from importlib import import_module, util
|
||||
from pathlib import Path
|
||||
|
||||
# import fetch_provider_features from src/features/utils/utils.py
|
||||
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||
mod = util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||
|
||||
battery_episodes_file = snakemake.input["battery_episodes"]
|
||||
day_segments_file = snakemake.input["day_segments_labels"]
|
||||
provider = snakemake.params["provider"]
|
||||
provider_key = snakemake.params["provider_key"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, "battery", battery_episodes_file, day_segments_file)
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -1,8 +1,9 @@
|
|||
import pandas as pd
|
||||
from datetime import datetime, timedelta, time
|
||||
|
||||
def rapids_features(battery_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
battery_data = pd.read_csv(sensor_data_files["sensor_episodes"])
|
||||
chunk_episodes = kwargs["chunk_episodes"]
|
||||
|
||||
# name of the features this function can compute
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
import pandas as pd
|
||||
from battery.battery_base import base_battery_features
|
||||
|
||||
battery_data = pd.read_csv(snakemake.input[0], parse_dates=["local_start_date_time", "local_end_date_time", "local_start_date", "local_end_date"])
|
||||
day_segment = snakemake.params["day_segment"]
|
||||
requested_features = snakemake.params["features"]
|
||||
battery_features = pd.DataFrame(columns=["local_date"])
|
||||
|
||||
battery_features = battery_features.merge(base_battery_features(battery_data, day_segment, requested_features), on="local_date", how="outer")
|
||||
|
||||
assert len(requested_features) + 1 == battery_features.shape[1], "The number of features in the output dataframe (=" + str(battery_features.shape[1]) + ") does not match the expected value (=" + str(len(requested_features)) + " + 1). Verify your battery feature extraction functions"
|
||||
|
||||
battery_features.to_csv(snakemake.output[0], index=False)
|
|
@ -1,13 +0,0 @@
|
|||
source("renv/activate.R")
|
||||
source("src/features/utils/utils.R")
|
||||
library("dplyr")
|
||||
library("tidyr")
|
||||
|
||||
sensor_data_file <- snakemake@input[["sensor_data"]]
|
||||
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||
provider <- snakemake@params["provider"][["provider"]]
|
||||
provider_key <- snakemake@params["provider_key"]
|
||||
|
||||
sensor_features <- fetch_provider_features(provider, provider_key, "bluetooth", sensor_data_file, day_segments_file)
|
||||
|
||||
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -1,18 +0,0 @@
|
|||
import pandas as pd
|
||||
from importlib import import_module, util
|
||||
from pathlib import Path
|
||||
|
||||
# import fetch_provider_features from src/features/utils/utils.py
|
||||
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||
mod = util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||
|
||||
sensor_data_file = snakemake.input["sensor_data"][0]
|
||||
day_segments_file = snakemake.input["day_segments_labels"]
|
||||
provider = snakemake.params["provider"]
|
||||
provider_key = snakemake.params["provider_key"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, "bluetooth", sensor_data_file, day_segments_file)
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -27,24 +27,26 @@ compute_bluetooth_feature <- function(data, feature, day_segment){
|
|||
}
|
||||
}
|
||||
|
||||
rapids_features <- function(bluetooth_data, day_segment, provider){
|
||||
requested_features <- provider[["FEATURES"]]
|
||||
|
||||
# Output dataframe
|
||||
features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
|
||||
rapids_features <- function(sensor_data_files, day_segment, provider){
|
||||
|
||||
bluetooth_data <- read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
|
||||
requested_features <- provider[["FEATURES"]]
|
||||
|
||||
# Output dataframe
|
||||
features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
|
||||
|
||||
# The name of the features this function can compute
|
||||
base_features_names <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
|
||||
# The name of the features this function can compute
|
||||
base_features_names <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
|
||||
|
||||
# The subset of requested features this function can compute
|
||||
features_to_compute <- intersect(base_features_names, requested_features)
|
||||
# The subset of requested features this function can compute
|
||||
features_to_compute <- intersect(base_features_names, requested_features)
|
||||
|
||||
for(feature_name in features_to_compute){
|
||||
feature <- compute_bluetooth_feature(bluetooth_data, feature_name, day_segment)
|
||||
features <- merge(features, feature, by="local_segment", all = TRUE)
|
||||
}
|
||||
for(feature_name in features_to_compute){
|
||||
feature <- compute_bluetooth_feature(bluetooth_data, feature_name, day_segment)
|
||||
features <- merge(features, feature, by="local_segment", all = TRUE)
|
||||
}
|
||||
|
||||
features <- features %>% mutate_at(vars(contains("countscansmostuniquedevice")), list( ~ replace_na(., 0)))
|
||||
features <- features %>% mutate_at(vars(contains("countscansmostuniquedevice")), list( ~ replace_na(., 0)))
|
||||
|
||||
return(features)
|
||||
return(features)
|
||||
}
|
|
@ -1,18 +0,0 @@
|
|||
import pandas as pd
|
||||
from importlib import import_module, util
|
||||
from pathlib import Path
|
||||
|
||||
# import fetch_provider_features from src/features/utils/utils.py
|
||||
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||
mod = util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||
|
||||
sensor_data_file = snakemake.input["sensor_data"][0]
|
||||
day_segments_file = snakemake.input["day_segments_labels"]
|
||||
provider = snakemake.params["provider"]
|
||||
provider_key = snakemake.params["provider_key"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, "calls", sensor_data_file, day_segments_file)
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -62,8 +62,9 @@ call_features_of_type <- function(calls, call_type, day_segment, requested_featu
|
|||
return(features)
|
||||
}
|
||||
|
||||
rapids_features <- function(calls, day_segment, provider){
|
||||
calls <- calls %>% filter_data_by_segment(day_segment)
|
||||
rapids_features <- function(sensor_data_files, day_segment, provider){
|
||||
calls_data <- read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
|
||||
calls_data <- calls_data %>% filter_data_by_segment(day_segment)
|
||||
call_types = provider[["CALL_TYPES"]]
|
||||
call_features <- setNames(data.frame(matrix(ncol=1, nrow=0)), c("local_segment"))
|
||||
|
||||
|
@ -74,7 +75,7 @@ rapids_features <- function(calls, day_segment, provider){
|
|||
stop(paste("Call type can online be incoming, outgoing or missed but instead you typed: ", call_type, " in config[CALLS][CALL_TYPES]"))
|
||||
|
||||
requested_features <- provider[["FEATURES"]][[call_type]]
|
||||
calls_of_type <- calls %>% filter(call_type == call_type_label)
|
||||
calls_of_type <- calls_data %>% filter(call_type == call_type_label)
|
||||
|
||||
features <- call_features_of_type(calls_of_type, call_type, day_segment, requested_features)
|
||||
call_features <- merge(call_features, features, all=TRUE)
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
source("renv/activate.R")
|
||||
source("src/features/utils/utils.R")
|
||||
library("dplyr")
|
||||
library("tidyr")
|
||||
|
||||
sensor_data_file <- snakemake@input[["sensor_data"]]
|
||||
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||
provider <- snakemake@params["provider"][["provider"]]
|
||||
provider_key <- snakemake@params["provider_key"]
|
||||
|
||||
sensor_features <- fetch_provider_features(provider, provider_key, "conversation", sensor_data_file, day_segments_file)
|
||||
|
||||
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -1,18 +0,0 @@
|
|||
import pandas as pd
|
||||
from importlib import import_module, util
|
||||
from pathlib import Path
|
||||
|
||||
# import fetch_provider_features from src/features/utils/utils.py
|
||||
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||
mod = util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||
|
||||
sensor_data_file = snakemake.input["sensor_data"][0]
|
||||
day_segments_file = snakemake.input["day_segments_labels"]
|
||||
provider = snakemake.params["provider"]
|
||||
provider_key = snakemake.params["provider_key"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, "conversation", sensor_data_file, day_segments_file)
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -1,8 +1,9 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
# def rapids_features(conversation_data, day_segment, requested_features,recordingMinutes,pausedMinutes,expectedMinutes):
|
||||
def rapids_features(conversation_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
conversation_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
|
||||
requested_features = provider["FEATURES"]
|
||||
recordingMinutes = provider["RECORDING_MINUTES"]
|
||||
|
@ -20,7 +21,7 @@ def rapids_features(conversation_data, day_segment, provider, filter_data_by_seg
|
|||
# the subset of requested features this function can compute
|
||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||
|
||||
conversation_features = pd.DataFrame(columns=["local_segment"] + ["conversation_rapids" + "_" + x for x in features_to_compute])
|
||||
conversation_features = pd.DataFrame(columns=["local_segment"] + ["conversation_rapids_" + x for x in features_to_compute])
|
||||
if not conversation_data.empty:
|
||||
conversation_data = filter_data_by_segment(conversation_data, day_segment)
|
||||
|
||||
|
@ -30,19 +31,19 @@ def rapids_features(conversation_data, day_segment, provider, filter_data_by_seg
|
|||
conversation_data = conversation_data.drop_duplicates(subset=["local_date", "local_time"], keep="first")
|
||||
|
||||
if "minutessilence" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_minutessilence"] = conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60
|
||||
conversation_features["conversation_rapids_minutessilence"] = conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60
|
||||
|
||||
if "minutesnoise" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_minutesnoise"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60
|
||||
conversation_features["conversation_rapids_minutesnoise"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60
|
||||
|
||||
if "minutesvoice" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_minutesvoice"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60
|
||||
conversation_features["conversation_rapids_minutesvoice"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60
|
||||
|
||||
if "minutesunknown" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_minutesunknown"] = conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60
|
||||
conversation_features["conversation_rapids_minutesunknown"] = conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60
|
||||
|
||||
if "countconversation" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_countconversation"] = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['double_convo_start'].nunique()
|
||||
conversation_features["conversation_rapids_countconversation"] = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['double_convo_start'].nunique()
|
||||
|
||||
conv_duration = (conversation_data['double_convo_end']/1000 - conversation_data['double_convo_start']/1000)/60
|
||||
conversation_data = conversation_data.assign(conv_duration = conv_duration.values)
|
||||
|
@ -50,43 +51,43 @@ def rapids_features(conversation_data, day_segment, provider, filter_data_by_seg
|
|||
conv_totalDuration = conversation_data[(conversation_data['inference'] >= 0) & (conversation_data['inference'] < 4)].groupby(["local_segment"])['inference'].count()/60
|
||||
|
||||
if "silencesensedfraction" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_silencesensedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
|
||||
conversation_features["conversation_rapids_silencesensedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
|
||||
|
||||
if "noisesensedfraction" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_noisesensedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
|
||||
conversation_features["conversation_rapids_noisesensedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
|
||||
|
||||
if "voicesensedfraction" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_voicesensedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
|
||||
conversation_features["conversation_rapids_voicesensedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
|
||||
|
||||
if "unknownsensedfraction" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_unknownsensedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
|
||||
conversation_features["conversation_rapids_unknownsensedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60)/ conv_totalDuration
|
||||
|
||||
if "silenceexpectedfraction" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_silenceexpectedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
|
||||
conversation_features["conversation_rapids_silenceexpectedfraction"] = (conversation_data[conversation_data['inference']==0].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
|
||||
|
||||
if "noiseexpectedfraction" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_noiseexpectedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
|
||||
conversation_features["conversation_rapids_noiseexpectedfraction"] = (conversation_data[conversation_data['inference']==1].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
|
||||
|
||||
if "voiceexpectedfraction" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_voiceexpectedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
|
||||
conversation_features["conversation_rapids_voiceexpectedfraction"] = (conversation_data[conversation_data['inference']==2].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
|
||||
|
||||
if "unknownexpectedfraction" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_unknownexpectedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
|
||||
conversation_features["conversation_rapids_unknownexpectedfraction"] = (conversation_data[conversation_data['inference']==3].groupby(["local_segment"])['inference'].count()/60)/ expectedMinutes
|
||||
|
||||
if "sumconversationduration" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_sumconversationduration"] = conversation_data.groupby(["local_segment"])["conv_duration"].sum()
|
||||
conversation_features["conversation_rapids_sumconversationduration"] = conversation_data.groupby(["local_segment"])["conv_duration"].sum()
|
||||
|
||||
if "avgconversationduration" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_avgconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].mean()
|
||||
conversation_features["conversation_rapids_avgconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].mean()
|
||||
|
||||
if "sdconversationduration" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_sdconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].std()
|
||||
conversation_features["conversation_rapids_sdconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].std()
|
||||
|
||||
if "minconversationduration" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_minconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].min()
|
||||
conversation_features["conversation_rapids_minconversationduration"] = conversation_data[conversation_data["conv_duration"] > 0].groupby(["local_segment"])["conv_duration"].min()
|
||||
|
||||
if "maxconversationduration" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_maxconversationduration"] = conversation_data.groupby(["local_segment"])["conv_duration"].max()
|
||||
conversation_features["conversation_rapids_maxconversationduration"] = conversation_data.groupby(["local_segment"])["conv_duration"].max()
|
||||
|
||||
if "timefirstconversation" in features_to_compute:
|
||||
timestampsLastConversation = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['timestamp'].min()
|
||||
|
@ -94,9 +95,9 @@ def rapids_features(conversation_data, day_segment, provider, filter_data_by_seg
|
|||
for date in list(timestampsLastConversation.index):
|
||||
lastimestamp = timestampsLastConversation.loc[date]
|
||||
lasttime = (conversation_data.query('timestamp == @lastimestamp', inplace = False))['local_time'].iat[0]
|
||||
conversation_features.loc[date,"conversation_rapids" + "_timefirstconversation"] = int(lasttime.split(':')[0])*60 + int(lasttime.split(':')[1])
|
||||
conversation_features.loc[date,"conversation_rapids_timefirstconversation"] = int(lasttime.split(':')[0])*60 + int(lasttime.split(':')[1])
|
||||
else:
|
||||
conversation_features["conversation_rapids" + "_timefirstconversation"] = np.nan
|
||||
conversation_features["conversation_rapids_timefirstconversation"] = np.nan
|
||||
|
||||
if "timelastconversation" in features_to_compute:
|
||||
timestampsLastConversation = conversation_data[conversation_data["double_convo_start"] > 0].groupby(["local_segment"])['timestamp'].max()
|
||||
|
@ -104,39 +105,39 @@ def rapids_features(conversation_data, day_segment, provider, filter_data_by_seg
|
|||
for date in list(timestampsLastConversation.index):
|
||||
lastimestamp = timestampsLastConversation.loc[date]
|
||||
lasttime = (conversation_data.query('timestamp == @lastimestamp', inplace = False))['local_time'].iat[0]
|
||||
conversation_features.loc[date,"conversation_rapids" + "_timelastconversation"] = int(lasttime.split(':')[0])*60 + int(lasttime.split(':')[1])
|
||||
conversation_features.loc[date,"conversation_rapids_timelastconversation"] = int(lasttime.split(':')[0])*60 + int(lasttime.split(':')[1])
|
||||
else:
|
||||
conversation_features["conversation_rapids" + "_timelastconversation"] = np.nan
|
||||
conversation_features["conversation_rapids_timelastconversation"] = np.nan
|
||||
|
||||
if "noisesumenergy" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_noisesumenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].sum()
|
||||
conversation_features["conversation_rapids_noisesumenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].sum()
|
||||
|
||||
if "noiseavgenergy" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_noiseavgenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].mean()
|
||||
conversation_features["conversation_rapids_noiseavgenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].mean()
|
||||
|
||||
if "noisesdenergy" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_noisesdenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].std()
|
||||
conversation_features["conversation_rapids_noisesdenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].std()
|
||||
|
||||
if "noiseminenergy" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_noiseminenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].min()
|
||||
conversation_features["conversation_rapids_noiseminenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].min()
|
||||
|
||||
if "noisemaxenergy" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_noisemaxenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].max()
|
||||
conversation_features["conversation_rapids_noisemaxenergy"] = conversation_data[conversation_data['inference']==1].groupby(["local_segment"])["double_energy"].max()
|
||||
|
||||
if "voicesumenergy" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_voicesumenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].sum()
|
||||
conversation_features["conversation_rapids_voicesumenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].sum()
|
||||
|
||||
if "voiceavgenergy" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_voiceavgenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].mean()
|
||||
conversation_features["conversation_rapids_voiceavgenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].mean()
|
||||
|
||||
if "voicesdenergy" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_voicesdenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].std()
|
||||
conversation_features["conversation_rapids_voicesdenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].std()
|
||||
|
||||
if "voiceminenergy" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_voiceminenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].min()
|
||||
conversation_features["conversation_rapids_voiceminenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].min()
|
||||
|
||||
if "voicemaxenergy" in features_to_compute:
|
||||
conversation_features["conversation_rapids" + "_voicemaxenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].max()
|
||||
conversation_features["conversation_rapids_voicemaxenergy"] = conversation_data[conversation_data['inference']==2].groupby(["local_segment"])["double_energy"].max()
|
||||
|
||||
|
||||
conversation_features = conversation_features.reset_index()
|
||||
|
|
|
@ -3,11 +3,14 @@ source("src/features/utils/utils.R")
|
|||
library("dplyr")
|
||||
library("tidyr")
|
||||
|
||||
sensor_data_file <- snakemake@input[["sensor_data"]]
|
||||
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||
sensor_data_files <- snakemake@input
|
||||
sensor_data_files$day_segments_labels <- NULL
|
||||
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||
|
||||
provider <- snakemake@params["provider"][["provider"]]
|
||||
provider_key <- snakemake@params["provider_key"]
|
||||
sensor_key <- snakemake@params["sensor_key"]
|
||||
|
||||
sensor_features <- fetch_provider_features(provider, provider_key, "calls", sensor_data_file, day_segments_file)
|
||||
sensor_features <- fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, day_segments_file)
|
||||
|
||||
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
||||
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -0,0 +1,14 @@
|
|||
import pandas as pd
|
||||
from utils.utils import fetch_provider_features
|
||||
|
||||
sensor_data_files = dict(snakemake.input)
|
||||
del sensor_data_files["day_segments_labels"]
|
||||
day_segments_file = snakemake.input["day_segments_labels"]
|
||||
|
||||
provider = snakemake.params["provider"]
|
||||
provider_key = snakemake.params["provider_key"]
|
||||
sensor_key = snakemake.params["sensor_key"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, day_segments_file)
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -1,13 +0,0 @@
|
|||
source("renv/activate.R")
|
||||
source("src/features/utils/utils.R")
|
||||
library("dplyr")
|
||||
library("tidyr")
|
||||
|
||||
sensor_data_file <- snakemake@input[["sensor_data"]]
|
||||
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||
provider <- snakemake@params["provider"][["provider"]]
|
||||
provider_key <- snakemake@params["provider_key"]
|
||||
|
||||
sensor_features <- fetch_provider_features(provider, provider_key, "light", sensor_data_file, day_segments_file)
|
||||
|
||||
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -1,18 +0,0 @@
|
|||
import pandas as pd
|
||||
from importlib import import_module, util
|
||||
from pathlib import Path
|
||||
|
||||
# import fetch_provider_features from src/features/utils/utils.py
|
||||
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||
mod = util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||
|
||||
sensor_data_file = snakemake.input["sensor_data"][0]
|
||||
day_segments_file = snakemake.input["day_segments_labels"]
|
||||
provider = snakemake.params["provider"]
|
||||
provider_key = snakemake.params["provider_key"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, "light", sensor_data_file, day_segments_file)
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -1,33 +1,35 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
def rapids_features(light_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
light_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
requested_features = provider["FEATURES"]
|
||||
# name of the features this function can compute
|
||||
base_features_names = ["count", "maxlux", "minlux", "avglux", "medianlux", "stdlux"]
|
||||
# the subset of requested features this function can compute
|
||||
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||
|
||||
light_features = pd.DataFrame(columns=["local_segment"] + ["light_rapids_" + "_" + x for x in features_to_compute])
|
||||
light_features = pd.DataFrame(columns=["local_segment"] + ["light_rapids_" + x for x in features_to_compute])
|
||||
if not light_data.empty:
|
||||
light_data = filter_data_by_segment(light_data, day_segment)
|
||||
|
||||
if not light_data.empty:
|
||||
light_features = pd.DataFrame()
|
||||
if "count" in features_to_compute:
|
||||
light_features["light_rapids_" + "_count"] = light_data.groupby(["local_segment"]).count()["timestamp"]
|
||||
light_features["light_rapids_count"] = light_data.groupby(["local_segment"]).count()["timestamp"]
|
||||
|
||||
# get light ambient luminance related features
|
||||
if "maxlux" in features_to_compute:
|
||||
light_features["light_rapids_" + "_maxlux"] = light_data.groupby(["local_segment"])["double_light_lux"].max()
|
||||
light_features["light_rapids_maxlux"] = light_data.groupby(["local_segment"])["double_light_lux"].max()
|
||||
if "minlux" in features_to_compute:
|
||||
light_features["light_rapids_" + "_minlux"] = light_data.groupby(["local_segment"])["double_light_lux"].min()
|
||||
light_features["light_rapids_minlux"] = light_data.groupby(["local_segment"])["double_light_lux"].min()
|
||||
if "avglux" in features_to_compute:
|
||||
light_features["light_rapids_" + "_avglux"] = light_data.groupby(["local_segment"])["double_light_lux"].mean()
|
||||
light_features["light_rapids_avglux"] = light_data.groupby(["local_segment"])["double_light_lux"].mean()
|
||||
if "medianlux" in features_to_compute:
|
||||
light_features["light_rapids_" + "_medianlux"] = light_data.groupby(["local_segment"])["double_light_lux"].median()
|
||||
light_features["light_rapids_medianlux"] = light_data.groupby(["local_segment"])["double_light_lux"].median()
|
||||
if "stdlux" in features_to_compute:
|
||||
light_features["light_rapids_" + "_stdlux"] = light_data.groupby(["local_segment"])["double_light_lux"].std()
|
||||
light_features["light_rapids_stdlux"] = light_data.groupby(["local_segment"])["double_light_lux"].std()
|
||||
|
||||
light_features = light_features.reset_index()
|
||||
|
||||
|
|
|
@ -27,8 +27,11 @@ create_empty_file <- function(requested_features){
|
|||
) %>% select(all_of(requested_features)))
|
||||
}
|
||||
|
||||
barnett_features <- function(location_data, day_segment, params){
|
||||
barnett_features <- function(sensor_data_files, day_segment, params){
|
||||
|
||||
location_data <- read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
|
||||
location_features <- NULL
|
||||
|
||||
location <- location_data
|
||||
accuracy_limit <- params[["ACCURACY_LIMIT"]]
|
||||
timezone <- params[["TIMEZONE"]]
|
||||
|
|
|
@ -4,7 +4,9 @@ from astropy.timeseries import LombScargle
|
|||
from sklearn.cluster import DBSCAN
|
||||
from math import radians, cos, sin, asin, sqrt
|
||||
|
||||
def doryab_features(location_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
def doryab_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
location_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||
requested_features = provider["FEATURES"]
|
||||
dbscan_eps = provider["DBSCAN_EPS"]
|
||||
dbscan_minsamples = provider["DBSCAN_MINSAMPLES"]
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
source("renv/activate.R")
|
||||
source("src/features/utils/utils.R")
|
||||
library("dplyr")
|
||||
library("tidyr")
|
||||
|
||||
sensor_data_file <- snakemake@input[["sensor_data"]]
|
||||
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||
provider <- snakemake@params["provider"][["provider"]]
|
||||
provider_key <- snakemake@params["provider_key"]
|
||||
|
||||
sensor_features <- fetch_provider_features(provider, provider_key, "locations", sensor_data_file, day_segments_file)
|
||||
|
||||
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -1,18 +0,0 @@
|
|||
import pandas as pd
|
||||
from importlib import import_module, util
|
||||
from pathlib import Path
|
||||
|
||||
# import fetch_provider_features from src/features/utils/utils.py
|
||||
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||
mod = util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||
|
||||
sensor_data_file = snakemake.input["sensor_data"][0]
|
||||
day_segments_file = snakemake.input["day_segments_labels"]
|
||||
provider = snakemake.params["provider"]
|
||||
provider_key = snakemake.params["provider_key"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, "locations", sensor_data_file, day_segments_file)
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -1,13 +0,0 @@
|
|||
source("renv/activate.R")
|
||||
source("src/features/utils/utils.R")
|
||||
library("dplyr")
|
||||
library("tidyr")
|
||||
|
||||
sensor_data_file <- snakemake@input[["sensor_data"]]
|
||||
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||
provider <- snakemake@params["provider"][["provider"]]
|
||||
provider_key <- snakemake@params["provider_key"]
|
||||
|
||||
sensor_features <- fetch_provider_features(provider, provider_key, "messages", sensor_data_file, day_segments_file)
|
||||
|
||||
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -1,18 +0,0 @@
|
|||
import pandas as pd
|
||||
from importlib import import_module, util
|
||||
from pathlib import Path
|
||||
|
||||
# import fetch_provider_features from src/features/utils/utils.py
|
||||
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||
mod = util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||
|
||||
sensor_data_file = snakemake.input["sensor_data"][0]
|
||||
day_segments_file = snakemake.input["day_segments_labels"]
|
||||
provider = snakemake.params["provider"]
|
||||
provider_key = snakemake.params["provider_key"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, "messages", sensor_data_file, day_segments_file)
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -50,8 +50,9 @@ message_features_of_type <- function(messages, messages_type, day_segment, reque
|
|||
return(features)
|
||||
}
|
||||
|
||||
rapids_features <- function(messages, day_segment, provider){
|
||||
messages <- messages %>% filter_data_by_segment(day_segment)
|
||||
rapids_features <- function(sensor_data_files, day_segment, provider){
|
||||
messages_data <- read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
|
||||
messages_data <- messages_data %>% filter_data_by_segment(day_segment)
|
||||
messages_types = provider[["MESSAGES_TYPES"]]
|
||||
messages_features <- setNames(data.frame(matrix(ncol=1, nrow=0)), c("local_segment"))
|
||||
|
||||
|
@ -62,7 +63,7 @@ rapids_features <- function(messages, day_segment, provider){
|
|||
stop(paste("Message type can online be received or sent but instead you typed: ", message_type, " in config[MESSAGES][MESSAGES_TYPES]"))
|
||||
|
||||
requested_features <- provider[["FEATURES"]][[message_type]]
|
||||
messages_of_type <- messages %>% filter(message_type == message_type_label)
|
||||
messages_of_type <- messages_data %>% filter(message_type == message_type_label)
|
||||
|
||||
features <- message_features_of_type(messages_of_type, message_type, day_segment, requested_features)
|
||||
messages_features <- merge(messages_features, features, all=TRUE)
|
||||
|
|
|
@ -25,7 +25,9 @@ def getEpisodeDurationFeatures(screen_data, day_segment, episode, features, refe
|
|||
return duration_helper
|
||||
|
||||
|
||||
def rapids_features(screen_data, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
def rapids_features(sensor_data_files, day_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||
|
||||
screen_data = pd.read_csv(sensor_data_files["sensor_episodes"])
|
||||
|
||||
reference_hour_first_use = provider["REFERENCE_HOUR_FIRST_USE"]
|
||||
requested_features_episodes = provider["FEATURES"]
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
source("renv/activate.R")
|
||||
source("src/features/utils/utils.R")
|
||||
library("dplyr")
|
||||
library("tidyr")
|
||||
|
||||
sensor_data_file <- snakemake@input[["screen_episodes"]]
|
||||
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||
provider <- snakemake@params["provider"][["provider"]]
|
||||
provider_key <- snakemake@params["provider_key"]
|
||||
|
||||
sensor_features <- fetch_provider_features(provider, provider_key, "screen", sensor_data_file, day_segments_file)
|
||||
|
||||
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -1,18 +0,0 @@
|
|||
import pandas as pd
|
||||
from importlib import import_module, util
|
||||
from pathlib import Path
|
||||
|
||||
# import fetch_provider_features from src/features/utils/utils.py
|
||||
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||
mod = util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||
|
||||
screen_episodes_file = snakemake.input["screen_episodes"]
|
||||
day_segments_file = snakemake.input["day_segments_labels"]
|
||||
provider = snakemake.params["provider"]
|
||||
provider_key = snakemake.params["provider_key"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, "screen", screen_episodes_file, day_segments_file)
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
|
@ -43,24 +43,23 @@ chunk_episodes <- function(sensor_episodes){
|
|||
return(chunked_episodes)
|
||||
}
|
||||
|
||||
fetch_provider_features <- function(provider, provider_key, config_key, sensor_data_file, day_segments_file){
|
||||
fetch_provider_features <- function(provider, provider_key, sensor_key, sensor_data_files, day_segments_file){
|
||||
sensor_features <- data.frame(local_segment = character(), stringsAsFactors = FALSE)
|
||||
|
||||
sensor_data <- read.csv(sensor_data_file, stringsAsFactors = FALSE)
|
||||
day_segments_labels <- read.csv(day_segments_file, stringsAsFactors = FALSE)
|
||||
|
||||
if(!"FEATURES" %in% names(provider))
|
||||
stop(paste0("Provider config[", config_key,"][PROVIDERS][", provider_key,"] is missing a FEATURES attribute in config.yaml"))
|
||||
stop(paste0("Provider config[", sensor_key,"][PROVIDERS][", provider_key,"] is missing a FEATURES attribute in config.yaml"))
|
||||
|
||||
if(provider[["COMPUTE"]] == TRUE){
|
||||
code_path <- paste0("src/features/", config_key,"/", provider[["SRC_FOLDER"]], "/main.R")
|
||||
code_path <- paste0("src/features/", sensor_key,"/", provider[["SRC_FOLDER"]], "/main.R")
|
||||
source(code_path)
|
||||
features_function <- match.fun(paste0(provider[["SRC_FOLDER"]], "_features"))
|
||||
day_segments <- day_segments_labels %>% pull(label)
|
||||
for (day_segment in day_segments){
|
||||
print(paste(rapids_log_tag,"Processing", config_key, provider_key, day_segment))
|
||||
print(paste(rapids_log_tag,"Processing", sensor_key, provider_key, day_segment))
|
||||
|
||||
features <- features_function(sensor_data, day_segment, provider)
|
||||
features <- features_function(sensor_data_files, day_segment, provider)
|
||||
|
||||
# Check all features names contain the provider key so they are unique
|
||||
features_names <- colnames(features %>% select(-local_segment))
|
||||
|
|
|
@ -67,24 +67,24 @@ def chunk_episodes(sensor_episodes):
|
|||
|
||||
return merged_sensor_episodes
|
||||
|
||||
def fetch_provider_features(provider, provider_key, config_key, sensor_data_file, day_segments_file):
|
||||
def fetch_provider_features(provider, provider_key, sensor_key, sensor_data_files, day_segments_file):
|
||||
import pandas as pd
|
||||
from importlib import import_module, util
|
||||
|
||||
sensor_features = pd.DataFrame(columns=["local_segment"])
|
||||
sensor_data = pd.read_csv(sensor_data_file)
|
||||
day_segments_labels = pd.read_csv(day_segments_file, header=0)
|
||||
if "FEATURES" not in provider:
|
||||
raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(config_key.upper(), provider_key))
|
||||
raise ValueError("Provider config[{}][PROVIDERS][{}] is missing a FEATURES attribute in config.yaml".format(sensor_key.upper(), provider_key))
|
||||
|
||||
if provider["COMPUTE"] == True:
|
||||
code_path = provider["SRC_FOLDER"] + ".main"
|
||||
|
||||
code_path = sensor_key + "." + provider["SRC_FOLDER"] + ".main"
|
||||
feature_module = import_module(code_path)
|
||||
feature_function = getattr(feature_module, provider["SRC_FOLDER"] + "_features")
|
||||
|
||||
for day_segment in day_segments_labels["label"]:
|
||||
print("{} Processing {} {} {}".format(rapids_log_tag, config_key, provider_key, day_segment))
|
||||
features = feature_function(sensor_data, day_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes)
|
||||
print("{} Processing {} {} {}".format(rapids_log_tag, sensor_key, provider_key, day_segment))
|
||||
features = feature_function(sensor_data_files, day_segment, provider, filter_data_by_segment=filter_data_by_segment, chunk_episodes=chunk_episodes)
|
||||
sensor_features = sensor_features.merge(features, how="outer")
|
||||
else:
|
||||
for feature in provider["FEATURES"]:
|
||||
|
|
|
@ -25,21 +25,22 @@ compute_wifi_feature <- function(data, feature, day_segment){
|
|||
}
|
||||
}
|
||||
|
||||
rapids_features <- function(wifi_data, day_segment, provider){
|
||||
requested_features <- provider[["FEATURES"]]
|
||||
# Output dataframe
|
||||
features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
|
||||
rapids_features <- function(sensor_data_files, day_segment, provider){
|
||||
wifi_data <- read.csv(sensor_data_files[["sensor_data"]], stringsAsFactors = FALSE)
|
||||
requested_features <- provider[["FEATURES"]]
|
||||
# Output dataframe
|
||||
features = data.frame(local_segment = character(), stringsAsFactors = FALSE)
|
||||
|
||||
# The name of the features this function can compute
|
||||
base_features_names <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
|
||||
# The name of the features this function can compute
|
||||
base_features_names <- c("countscans", "uniquedevices", "countscansmostuniquedevice")
|
||||
|
||||
# The subset of requested features this function can compute
|
||||
features_to_compute <- intersect(base_features_names, requested_features)
|
||||
# The subset of requested features this function can compute
|
||||
features_to_compute <- intersect(base_features_names, requested_features)
|
||||
|
||||
for(feature_name in features_to_compute){
|
||||
feature <- compute_wifi_feature(wifi_data, feature_name, day_segment)
|
||||
features <- merge(features, feature, by="local_segment", all = TRUE)
|
||||
}
|
||||
for(feature_name in features_to_compute){
|
||||
feature <- compute_wifi_feature(wifi_data, feature_name, day_segment)
|
||||
features <- merge(features, feature, by="local_segment", all = TRUE)
|
||||
}
|
||||
|
||||
return(features)
|
||||
return(features)
|
||||
}
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
source("renv/activate.R")
|
||||
source("src/features/utils/utils.R")
|
||||
library("dplyr")
|
||||
library("tidyr")
|
||||
|
||||
sensor_data_file <- snakemake@input[["sensor_data"]]
|
||||
day_segments_file <- snakemake@input[["day_segments_labels"]]
|
||||
provider <- snakemake@params["provider"][["provider"]]
|
||||
provider_key <- snakemake@params["provider_key"]
|
||||
|
||||
sensor_features <- fetch_provider_features(provider, provider_key, "wifi", sensor_data_file, day_segments_file)
|
||||
|
||||
write.csv(sensor_features, snakemake@output[[1]], row.names = FALSE)
|
|
@ -1,18 +0,0 @@
|
|||
import pandas as pd
|
||||
from importlib import import_module, util
|
||||
from pathlib import Path
|
||||
|
||||
# import fetch_provider_features from src/features/utils/utils.py
|
||||
spec = util.spec_from_file_location("util", str(Path(snakemake.scriptdir).parent / "utils" / "utils.py"))
|
||||
mod = util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
fetch_provider_features = getattr(mod, "fetch_provider_features")
|
||||
|
||||
sensor_data_file = snakemake.input["sensor_data"][0]
|
||||
day_segments_file = snakemake.input["day_segments_labels"]
|
||||
provider = snakemake.params["provider"]
|
||||
provider_key = snakemake.params["provider_key"]
|
||||
|
||||
sensor_features = fetch_provider_features(provider, provider_key, "wifi", sensor_data_file, day_segments_file)
|
||||
|
||||
sensor_features.to_csv(snakemake.output[0], index=False)
|
Loading…
Reference in New Issue