Merge branch 'speech_sensor'
commit
5958948af2
|
@ -174,6 +174,15 @@ for provider in config["PHONE_ESM"]["PROVIDERS"].keys():
|
||||||
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv",pid=config["PIDS"]))
|
# files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv",pid=config["PIDS"]))
|
||||||
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
# files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||||
|
|
||||||
|
for provider in config["PHONE_SPEECH"]["PROVIDERS"].keys():
|
||||||
|
if config["PHONE_SPEECH"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/phone_speech_raw.csv",pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/phone_speech_with_datetime.csv",pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/interim/{pid}/phone_speech_features/phone_speech_{language}_{provider_key}.csv",pid=config["PIDS"],language=get_script_language(config["PHONE_SPEECH"]["PROVIDERS"][provider]["SRC_SCRIPT"]),provider_key=provider.lower()))
|
||||||
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_speech.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||||
|
|
||||||
# We can delete these if's as soon as we add feature PROVIDERS to any of these sensors
|
# We can delete these if's as soon as we add feature PROVIDERS to any of these sensors
|
||||||
if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict):
|
if isinstance(config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"], dict):
|
||||||
for provider in config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"].keys():
|
for provider in config["PHONE_APPLICATIONS_CRASHES"]["PROVIDERS"].keys():
|
||||||
|
|
|
@ -329,6 +329,15 @@ PHONE_SCREEN:
|
||||||
EPISODE_TYPES: ["unlock"]
|
EPISODE_TYPES: ["unlock"]
|
||||||
SRC_SCRIPT: src/features/phone_screen/rapids/main.py
|
SRC_SCRIPT: src/features/phone_screen/rapids/main.py
|
||||||
|
|
||||||
|
# Custom added sensor
|
||||||
|
PHONE_SPEECH:
|
||||||
|
CONTAINER: speech
|
||||||
|
PROVIDERS:
|
||||||
|
STRAW:
|
||||||
|
COMPUTE: True
|
||||||
|
FEATURES: ["meanspeech", "stdspeech", "nlargest", "nsmallest", "medianspeech"]
|
||||||
|
SRC_SCRIPT: src/features/phone_speech/straw/main.py
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/features/phone-wifi-connected/
|
# See https://www.rapids.science/latest/features/phone-wifi-connected/
|
||||||
PHONE_WIFI_CONNECTED:
|
PHONE_WIFI_CONNECTED:
|
||||||
CONTAINER: sensor_wifi
|
CONTAINER: sensor_wifi
|
||||||
|
|
|
@ -345,6 +345,19 @@ rule esm_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.py"
|
"../src/features/entry.py"
|
||||||
|
|
||||||
|
rule phone_speech_python_features:
|
||||||
|
input:
|
||||||
|
sensor_data = "data/raw/{pid}/phone_speech_with_datetime.csv",
|
||||||
|
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["PHONE_SPEECH"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
provider_key = "{provider_key}",
|
||||||
|
sensor_key = "phone_speech"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/phone_speech_features/phone_speech_python_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/entry.py"
|
||||||
|
|
||||||
rule phone_keyboard_python_features:
|
rule phone_keyboard_python_features:
|
||||||
input:
|
input:
|
||||||
sensor_data = "data/raw/{pid}/phone_keyboard_with_datetime.csv",
|
sensor_data = "data/raw/{pid}/phone_keyboard_with_datetime.csv",
|
||||||
|
|
|
@ -349,3 +349,24 @@ PHONE_WIFI_VISIBLE:
|
||||||
COLUMN_MAPPINGS:
|
COLUMN_MAPPINGS:
|
||||||
SCRIPTS: # List any python or r scripts that mutate your raw data
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
PHONE_SPEECH:
|
||||||
|
ANDROID:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
SPEECH_PROPORTION: speech_proportion
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
IOS:
|
||||||
|
RAPIDS_COLUMN_MAPPINGS:
|
||||||
|
TIMESTAMP: timestamp
|
||||||
|
DEVICE_ID: device_id
|
||||||
|
SPEECH_PROPORTION: speech_proportion
|
||||||
|
MUTATION:
|
||||||
|
COLUMN_MAPPINGS:
|
||||||
|
SCRIPTS: # List any python or r scripts that mutate your raw data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def straw_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||||
|
speech_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||||
|
requested_features = provider["FEATURES"]
|
||||||
|
# name of the features this function can compute+
|
||||||
|
base_features_names = ["meanspeech", "stdspeech", "nlargest", "nsmallest", "medianspeech"]
|
||||||
|
features_to_compute = list(set(requested_features) & set(base_features_names))
|
||||||
|
speech_features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
|
||||||
|
|
||||||
|
if not speech_data.empty:
|
||||||
|
speech_data = filter_data_by_segment(speech_data, time_segment)
|
||||||
|
|
||||||
|
if not speech_data.empty:
|
||||||
|
speech_features = pd.DataFrame()
|
||||||
|
if "meanspeech" in features_to_compute:
|
||||||
|
speech_features["meanspeech"] = speech_data.groupby(["local_segment"])['speech_proportion'].mean()
|
||||||
|
if "stdspeech" in features_to_compute:
|
||||||
|
speech_features["stdspeech"] = speech_data.groupby(["local_segment"])['speech_proportion'].std()
|
||||||
|
if "nlargest" in features_to_compute:
|
||||||
|
speech_features["nlargest"] = speech_data.groupby(["local_segment"])['speech_proportion'].apply(lambda x: x.nlargest(5).mean())
|
||||||
|
if "nsmallest" in features_to_compute:
|
||||||
|
speech_features["nsmallest"] = speech_data.groupby(["local_segment"])['speech_proportion'].apply(lambda x: x.nsmallest(5).mean())
|
||||||
|
if "medianspeech" in features_to_compute:
|
||||||
|
speech_features["medianspeech"] = speech_data.groupby(["local_segment"])['speech_proportion'].median()
|
||||||
|
|
||||||
|
speech_features = speech_features.reset_index()
|
||||||
|
|
||||||
|
return speech_features
|
Loading…
Reference in New Issue