from collections.abc import Collection import pandas as pd from config.models import ( Barometer, BarometerSensor, LightSensor, Participant, Temperature, TemperatureSensor, ) from setup import db_engine, session MINIMUM_PRESSURE_MB = 870 # The lowest measurable sea-level pressure is found at the centers of tropical cyclones and tornadoes, # with a record low of 870 mbar (87 kPa; 26 inHg). def get_ambient_data(usernames: Collection, sensor=None) -> pd.DataFrame: """ Read the data from any of the ambient sensor tables and return it in a dataframe. Parameters ---------- usernames: Collection A list of usernames to put into the WHERE condition. sensor: str One of: barometer, barometer_sensor, light, temperature, temperature_sensor. Here, the _sensor tables describe the phone sensors, such as their range, dela, resolution, vendor etc., whereas barometer, light, and temperature describe the measured characteristics of the environment. Returns ------- df_ambient: pd.DataFrame A dataframe of ambient sensor data. """ if sensor == "barometer": query_ambient = session.query(Barometer, Participant.username).filter( Participant.id == Barometer.participant_id ) elif sensor == "barometer_sensor": query_ambient = session.query(BarometerSensor, Participant.username).filter( Participant.id == BarometerSensor.participant_id ) elif sensor == "light": query_ambient = session.query(LightSensor, Participant.username).filter( Participant.id == LightSensor.participant_id ) # Note that LightSensor and its light_sensor table are incorrectly named. # In this table, we actually find light data, i.e. double_light_lux, the ambient luminance in lux, # and NOT light sensor data (its range, dela, resolution, vendor etc.) as the name suggests. # We do not have light sensor data saved in the database. elif sensor == "temperature": query_ambient = session.query(Temperature, Participant.username).filter( Participant.id == Temperature.participant_id ) elif sensor == "temperature_sensor": query_ambient = session.query(TemperatureSensor, Participant.username).filter( Participant.id == TemperatureSensor.participant_id ) else: raise KeyError( "Specify one of the ambient sensors: " "barometer, barometer_sensor, light, temperature, or temperature_sensor." ) query_ambient = query_ambient.filter(Participant.username.in_(usernames)) with db_engine.connect() as connection: df_ambient = pd.read_sql(query_ambient.statement, connection) return df_ambient def clean_pressure(df_ambient: pd.DataFrame) -> pd.DataFrame: """ Simply removes values lower than MINIMUM_PRESSURE_MB (lowest measured pressure). Parameters ---------- df_ambient: pd.DataFrame A dataframe of barometer data, which includes measured pressure in double_values_0. Returns ------- df_ambient: pd.DataFrame The same dataframe with rows with low values of pressure removed. """ if "double_values_0" not in df_ambient: raise KeyError("The DF does not seem to hold barometer data.") df_ambient = df_ambient[df_ambient["double_values_0"] > MINIMUM_PRESSURE_MB] return df_ambient