Compare commits
1 Commits
master
...
feature/pl
Author | SHA1 | Date |
---|---|---|
JulioV | 09ca9725c0 |
|
@ -218,6 +218,15 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
|
||||||
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||||
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||||
|
|
||||||
|
for provider in config["PHONE_PLUGIN_SENTIMENTAL"]["PROVIDERS"].keys():
|
||||||
|
if config["PHONE_PLUGIN_SENTIMENTAL"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/phone_plugin_sentimental_raw.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/raw/{pid}/phone_plugin_sentimental_with_datetime.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.extend(expand("data/interim/{pid}/phone_plugin_sentimental_features/phone_plugin_sentimental_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["PHONE_PLUGIN_SENTIMENTAL"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
|
||||||
|
files_to_compute.extend(expand("data/processed/features/{pid}/phone_plugin_sentimental.csv", pid=config["PIDS"],))
|
||||||
|
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
|
||||||
|
files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
|
||||||
|
|
||||||
for provider in config["FITBIT_DATA_YIELD"]["PROVIDERS"].keys():
|
for provider in config["FITBIT_DATA_YIELD"]["PROVIDERS"].keys():
|
||||||
if config["FITBIT_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
|
if config["FITBIT_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
|
||||||
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))
|
files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))
|
||||||
|
|
|
@ -282,6 +282,15 @@ PHONE_MESSAGES:
|
||||||
SRC_LANGUAGE: "r"
|
SRC_LANGUAGE: "r"
|
||||||
SRC_FOLDER: "rapids" # inside src/features/phone_messages
|
SRC_FOLDER: "rapids" # inside src/features/phone_messages
|
||||||
|
|
||||||
|
PHONE_PLUGIN_SENTIMENTAL:
|
||||||
|
TABLE: plugin_sentimental_study_data
|
||||||
|
PROVIDERS:
|
||||||
|
WWBP:
|
||||||
|
COMPUTE: False
|
||||||
|
FEATURES: []
|
||||||
|
SRC_FOLDER: "wwbp"
|
||||||
|
SRC_LANGUAGE: "python"
|
||||||
|
|
||||||
# See https://www.rapids.science/latest/features/phone-screen/
|
# See https://www.rapids.science/latest/features/phone-screen/
|
||||||
PHONE_SCREEN:
|
PHONE_SCREEN:
|
||||||
TABLE: screen
|
TABLE: screen
|
||||||
|
|
|
@ -418,6 +418,32 @@ rule phone_messages_r_features:
|
||||||
script:
|
script:
|
||||||
"../src/features/entry.R"
|
"../src/features/entry.R"
|
||||||
|
|
||||||
|
rule phone_plugin_sentimental_python_features:
|
||||||
|
input:
|
||||||
|
sensor_data = "data/raw/{pid}/phone_plugin_sentimental_with_datetime.csv",
|
||||||
|
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["PHONE_PLUGIN_SENTIMENTAL"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
provider_key = "{provider_key}",
|
||||||
|
sensor_key = "phone_plugin_sentimental"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/phone_plugin_sentimental_features/phone_plugin_sentimental_python_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/entry.py"
|
||||||
|
|
||||||
|
rule phone_plugin_sentimental_r_features:
|
||||||
|
input:
|
||||||
|
sensor_data = "data/raw/{pid}/plugin_sentimental_with_datetime.csv",
|
||||||
|
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
|
||||||
|
params:
|
||||||
|
provider = lambda wildcards: config["PHONE_PLUGIN_SENTIMENTAL"]["PROVIDERS"][wildcards.provider_key.upper()],
|
||||||
|
provider_key = "{provider_key}",
|
||||||
|
sensor_key = "phone_plugin_sentimental"
|
||||||
|
output:
|
||||||
|
"data/interim/{pid}/phone_plugin_sentimental_features/phone_plugin_sentimental_r_{provider_key}.csv"
|
||||||
|
script:
|
||||||
|
"../src/features/entry.R"
|
||||||
|
|
||||||
rule screen_episodes:
|
rule screen_episodes:
|
||||||
input:
|
input:
|
||||||
screen = "data/raw/{pid}/phone_screen_with_datetime_unified.csv"
|
screen = "data/raw/{pid}/phone_screen_with_datetime_unified.csv"
|
||||||
|
|
|
@ -0,0 +1,114 @@
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def wwbp_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
|
||||||
|
|
||||||
|
sentiment_data = pd.read_csv(sensor_data_files["sensor_data"])
|
||||||
|
sentiment_features = pd.DataFrame(columns=["local_segment"])
|
||||||
|
|
||||||
|
# Get relevant information about provided features
|
||||||
|
app_included = "app" in provider["FEATURES"]
|
||||||
|
|
||||||
|
if not sentiment_data.empty:
|
||||||
|
|
||||||
|
sentiment_data = filter_data_by_segment(sentiment_data, time_segment)
|
||||||
|
|
||||||
|
if not sentiment_data.empty:
|
||||||
|
|
||||||
|
# Split the data into groups by time segment
|
||||||
|
segments = sentiment_data.groupby("local_segment")
|
||||||
|
time_segments = []
|
||||||
|
|
||||||
|
# Get all tuples of app_category features to calculate
|
||||||
|
if app_included:
|
||||||
|
small_df = sentiment_data.drop(sentiment_data.columns.difference(['app_name', 'word_category']), axis=1)
|
||||||
|
tuples = list(small_df.groupby(['app_name', 'word_category']).groups)
|
||||||
|
categories = list(filter(lambda x : x[1] != 'total_words' and x[1]==x[1], tuples))
|
||||||
|
|
||||||
|
# Get all word categories features to calculate
|
||||||
|
else:
|
||||||
|
categories = sentiment_data.word_category.unique().tolist()
|
||||||
|
categories.remove('total_words')
|
||||||
|
|
||||||
|
# Aggregate each segment data into a single instance
|
||||||
|
for _, segment_df in segments:
|
||||||
|
instance = process_local_segment(segment_df, categories, app_included)
|
||||||
|
time_segments.append(instance)
|
||||||
|
|
||||||
|
# Combine the data into a final dataframe
|
||||||
|
sentiment_features = pd.concat(time_segments)
|
||||||
|
|
||||||
|
return sentiment_features
|
||||||
|
|
||||||
|
|
||||||
|
# Handles local segment logic and processing
|
||||||
|
def process_local_segment(df, categories, app_included):
|
||||||
|
|
||||||
|
if not app_included:
|
||||||
|
# Simply add the relevant features
|
||||||
|
features_df = insert_features(df, categories, False)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Get the features for each app-category combination
|
||||||
|
app_groups = df.groupby(['app_name'])
|
||||||
|
processed = []
|
||||||
|
for _, group in app_groups:
|
||||||
|
p = insert_features(group, categories, True)
|
||||||
|
processed.append(p)
|
||||||
|
|
||||||
|
# Combine the data into one instance
|
||||||
|
features_df = pd.concat(processed).groupby(['local_segment'], as_index = False).sum()
|
||||||
|
|
||||||
|
# Add the device_id column and return the data
|
||||||
|
features_df['device_id'] = df['device_id'].values[0]
|
||||||
|
|
||||||
|
return features_df
|
||||||
|
|
||||||
|
|
||||||
|
# Calculates features in a particular local segment
|
||||||
|
def insert_features(df, categories, app_included=False):
|
||||||
|
|
||||||
|
app = df['app_name'].values[0]
|
||||||
|
|
||||||
|
# Map each word_category to its score
|
||||||
|
category_to_score = {}
|
||||||
|
totals_df = pd.DataFrame()
|
||||||
|
totals_df['score'] = df.groupby(["word_category"])['double_sentiment_score'].sum()
|
||||||
|
for index, row in totals_df.iterrows():
|
||||||
|
category_to_score[index] = row['score']
|
||||||
|
|
||||||
|
# Get the total number of words in the time segment
|
||||||
|
total_words = category_to_score['total_words']
|
||||||
|
category_to_score.pop('total_words')
|
||||||
|
|
||||||
|
# Populate data with the available scores otherwise fill in 0
|
||||||
|
data = {}
|
||||||
|
for c in categories:
|
||||||
|
|
||||||
|
# c is a tuple (app, word_category)
|
||||||
|
if app_included:
|
||||||
|
tuple_app = c[0]
|
||||||
|
tuple_cat = c[1]
|
||||||
|
|
||||||
|
# Calculate the normalized score if c present
|
||||||
|
feature = tuple_cat + "_" + tuple_app
|
||||||
|
if tuple_app == app and tuple_cat in category_to_score:
|
||||||
|
data[feature] = category_to_score[tuple_cat] / total_words
|
||||||
|
else:
|
||||||
|
data[feature] = 0
|
||||||
|
|
||||||
|
# c is just a word_category
|
||||||
|
else:
|
||||||
|
# Calculate the normalized score if c is present
|
||||||
|
feature = c
|
||||||
|
if c in category_to_score:
|
||||||
|
data[feature] = category_to_score[c] / total_words
|
||||||
|
else:
|
||||||
|
data[feature] = 0
|
||||||
|
|
||||||
|
# Create a dataframe from the data
|
||||||
|
data['local_segment'] = df['local_segment'].values[0]
|
||||||
|
processed_df = pd.DataFrame([data])
|
||||||
|
|
||||||
|
return processed_df
|
||||||
|
|
Loading…
Reference in New Issue