2021-07-26 11:37:31 +02:00
|
|
|
from collections.abc import Collection
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
from config.models import Participant, Proximity
|
|
|
|
from setup import db_engine, session
|
|
|
|
|
2021-08-21 19:40:42 +02:00
|
|
|
FILL_NA_PROXIMITY = {
|
|
|
|
"freq_prox_near": 0,
|
2021-08-23 16:36:26 +02:00
|
|
|
"prop_prox_near": 1 / 2, # Of the form of a / (a + b).
|
2021-08-21 19:40:42 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
FEATURES_PROXIMITY = list(FILL_NA_PROXIMITY.keys())
|
2021-08-17 10:51:51 +02:00
|
|
|
|
2021-07-26 11:37:31 +02:00
|
|
|
|
|
|
|
def get_proximity_data(usernames: Collection) -> pd.DataFrame:
|
|
|
|
"""
|
|
|
|
Read the data from the proximity sensor table and return it in a dataframe.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
usernames: Collection
|
|
|
|
A list of usernames to put into the WHERE condition.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
df_proximity: pd.DataFrame
|
|
|
|
A dataframe of proximity data.
|
|
|
|
"""
|
|
|
|
query_proximity = (
|
|
|
|
session.query(Proximity, Participant.username)
|
|
|
|
.filter(Participant.id == Proximity.participant_id)
|
|
|
|
.filter(Participant.username.in_(usernames))
|
|
|
|
)
|
|
|
|
with db_engine.connect() as connection:
|
|
|
|
df_proximity = pd.read_sql(query_proximity.statement, connection)
|
|
|
|
return df_proximity
|
2021-08-11 15:04:27 +02:00
|
|
|
|
|
|
|
|
|
|
|
def recode_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
"""
|
|
|
|
This function recodes proximity from a double to a boolean value.
|
|
|
|
Different proximity sensors report different values,
|
|
|
|
but in our data only several distinct values have ever been found.
|
|
|
|
These are therefore converted into "near" and "far" binary values.
|
|
|
|
See expl_proximity.ipynb for additional info.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
df_proximity: pd.DataFrame
|
|
|
|
A dataframe of proximity data.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
df_proximity: pd.DataFrame
|
|
|
|
The same dataframe with an additional column bool_prox_near,
|
|
|
|
indicating whether "near" proximity was reported.
|
|
|
|
False values correspond to "far" reported by this sensor.
|
|
|
|
|
|
|
|
"""
|
|
|
|
df_proximity = df_proximity.assign(bool_prox_near=lambda x: x.double_proximity == 0)
|
|
|
|
return df_proximity
|
2021-08-11 16:40:19 +02:00
|
|
|
|
|
|
|
|
2021-08-12 15:07:20 +02:00
|
|
|
def count_proximity(
|
2021-08-17 10:51:51 +02:00
|
|
|
df_proximity: pd.DataFrame, group_by: Collection = None
|
2021-08-12 15:07:20 +02:00
|
|
|
) -> pd.DataFrame:
|
2021-08-11 16:40:19 +02:00
|
|
|
"""
|
|
|
|
The function counts how many times a "near" value occurs in proximity
|
|
|
|
and calculates the proportion of this counts to all proximity values (i.e. relative count).
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
df_proximity: pd.DataFrame
|
|
|
|
A dataframe of proximity data.
|
2021-08-12 15:07:20 +02:00
|
|
|
group_by: Collection
|
|
|
|
A list of strings, specifying by which parameters to group.
|
|
|
|
By default, the features are calculated per participant, but could be "date_lj" etc.
|
2021-08-11 16:40:19 +02:00
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
df_proximity_features: pd.DataFrame
|
|
|
|
A dataframe with the count of "near" proximity values and their relative count.
|
|
|
|
"""
|
2021-08-17 10:51:51 +02:00
|
|
|
if group_by is None:
|
2021-08-20 17:52:31 +02:00
|
|
|
group_by = []
|
2021-08-11 16:40:19 +02:00
|
|
|
if "bool_prox_near" not in df_proximity:
|
|
|
|
df_proximity = recode_proximity(df_proximity)
|
|
|
|
df_proximity["bool_prox_far"] = ~df_proximity["bool_prox_near"]
|
2021-08-20 17:52:31 +02:00
|
|
|
df_proximity_features = df_proximity.groupby(["participant_id"] + group_by).sum()[
|
2021-08-11 16:40:19 +02:00
|
|
|
["bool_prox_near", "bool_prox_far"]
|
|
|
|
]
|
|
|
|
df_proximity_features = df_proximity_features.assign(
|
|
|
|
prop_prox_near=lambda x: x.bool_prox_near / (x.bool_prox_near + x.bool_prox_far)
|
|
|
|
)
|
|
|
|
df_proximity_features = df_proximity_features.rename(
|
|
|
|
columns={"bool_prox_near": "freq_prox_near"}
|
|
|
|
).drop(columns="bool_prox_far", inplace=False)
|
|
|
|
return df_proximity_features
|