stress_at_work_analysis/features/ambient.py

92 lines
3.4 KiB
Python

from collections.abc import Collection
import pandas as pd
from config.models import (
Barometer,
BarometerSensor,
LightSensor,
Participant,
Temperature,
TemperatureSensor,
)
from setup import db_engine, session
MINIMUM_PRESSURE_MB = 870
# The lowest measurable sea-level pressure is found at the centers of tropical cyclones and tornadoes,
# with a record low of 870 mbar (87 kPa; 26 inHg).
def get_ambient_data(usernames: Collection, sensor=None) -> pd.DataFrame:
"""
Read the data from any of the ambient sensor tables and return it in a dataframe.
Parameters
----------
usernames: Collection
A list of usernames to put into the WHERE condition.
sensor: str
One of: barometer, barometer_sensor, light, temperature, temperature_sensor.
Here, the _sensor tables describe the phone sensors, such as their range, dela, resolution, vendor etc.,
whereas barometer, light, and temperature describe the measured characteristics of the environment.
Returns
-------
df_ambient: pd.DataFrame
A dataframe of ambient sensor data.
"""
if sensor == "barometer":
query_ambient = session.query(Barometer, Participant.username).filter(
Participant.id == Barometer.participant_id
)
elif sensor == "barometer_sensor":
query_ambient = session.query(BarometerSensor, Participant.username).filter(
Participant.id == BarometerSensor.participant_id
)
elif sensor == "light":
query_ambient = session.query(LightSensor, Participant.username).filter(
Participant.id == LightSensor.participant_id
)
# Note that LightSensor and its light_sensor table are incorrectly named.
# In this table, we actually find light data, i.e. double_light_lux, the ambient luminance in lux,
# and NOT light sensor data (its range, dela, resolution, vendor etc.) as the name suggests.
# We do not have light sensor data saved in the database.
elif sensor == "temperature":
query_ambient = session.query(Temperature, Participant.username).filter(
Participant.id == Temperature.participant_id
)
elif sensor == "temperature_sensor":
query_ambient = session.query(TemperatureSensor, Participant.username).filter(
Participant.id == TemperatureSensor.participant_id
)
else:
raise KeyError(
"Specify one of the ambient sensors: "
"barometer, barometer_sensor, light, temperature, or temperature_sensor."
)
query_ambient = query_ambient.filter(Participant.username.in_(usernames))
with db_engine.connect() as connection:
df_ambient = pd.read_sql(query_ambient.statement, connection)
return df_ambient
def clean_pressure(df_ambient: pd.DataFrame) -> pd.DataFrame:
"""
Simply removes values lower than MINIMUM_PRESSURE_MB (lowest measured pressure).
Parameters
----------
df_ambient: pd.DataFrame
A dataframe of barometer data, which includes measured pressure in double_values_0.
Returns
-------
df_ambient: pd.DataFrame
The same dataframe with rows with low values of pressure removed.
"""
if "double_values_0" not in df_ambient:
raise KeyError("The DF does not seem to hold barometer data.")
df_ambient = df_ambient[df_ambient["double_values_0"] > MINIMUM_PRESSURE_MB]
return df_ambient