diff --git a/config/models.py b/config/models.py index ba3fb3b..af3b136 100644 --- a/config/models.py +++ b/config/models.py @@ -166,12 +166,43 @@ class Application(Base, AWAREsensor): class Barometer(Base, AWAREsensor): + """ + Contains the barometer sensor data. + + Attributes + ---------- + double_values_0: float + The ambient air pressure in mbar (hPa) + accuracy: int + Sensor’s accuracy level, either 1, 2, or 3 (see [SensorManager](https://developer.android.com/reference/android/hardware/SensorManager.html#SENSOR_STATUS_ACCURACY_HIGH)) + """ + double_values_0 = Column(Float, nullable=False) accuracy = Column(SmallInteger, nullable=True) label = Column(String, nullable=True) class BarometerSensor(Base, AWAREsensor): + """ + Contains the barometer sensor capabilities. + + Attributes + ---------- + double_sensor_maximum_range: float + Maximum sensor value possible + double_sensor_minimum_delay: float + Minimum sampling delay in microseconds + sensor_name: str + double_sensor_power_ma: float + Sensor’s power drain in mA + double_sensor_resolution: float + Sensor’s resolution in sensor’s units + sensor_type: str + sensor_vendor: str + Sensor’s manufacturer + sensor_version: str + """ + __tablename__ = "barometer_sensor" # Since this table is not really important, # I will leave all columns as nullable. (nullable=True by default.) @@ -257,6 +288,19 @@ class Imperfection(Base): class LightSensor(Base, AWAREsensor): + """ + Contains the light sensor data. + Note: Even though this table is named light_sensor, it actually contains what AWARE calls light data + (rather than the data about the sensor's capabilities). Cf. Barometer(Sensor) and Temperature(Sensor). + + Attributes + ---------- + double_light_lux: float + The ambient luminance in lux units + accuracy: int + Sensor’s accuracy level, either 1, 2, or 3 (see [SensorManager](https://developer.android.com/reference/android/hardware/SensorManager.html#SENSOR_STATUS_ACCURACY_HIGH)) + """ + __tablename__ = "light_sensor" double_light_lux = Column(Float, nullable=False) accuracy = Column(Integer, nullable=True) @@ -376,12 +420,43 @@ class SMS(Base, AWAREsensor): class Temperature(Base, AWAREsensor): + """ + Contains the temperature sensor data. + + Attributes + ---------- + temperature_celsius: float + Measured temperature in °C + accuracy: int + Sensor’s accuracy level, either 1, 2, or 3 (see [SensorManager](https://developer.android.com/reference/android/hardware/SensorManager.html#SENSOR_STATUS_ACCURACY_HIGH)) + """ + temperature_celsius = Column(Float, nullable=False) accuracy = Column(SmallInteger, nullable=True) label = Column(String, nullable=True) class TemperatureSensor(Base, AWAREsensor): + """ + Contains the temperature sensor capabilities. + + Attributes + ---------- + double_sensor_maximum_range: float + Maximum sensor value possible + double_sensor_minimum_delay: float + Minimum sampling delay in microseconds + sensor_name: str + double_sensor_power_ma: float + Sensor’s power drain in mA + double_sensor_resolution: float + Sensor’s resolution in sensor’s units + sensor_type: str + sensor_vendor: str + Sensor’s manufacturer + sensor_version: str + """ + # I left all of these nullable, # as we haven't seen any data from this sensor anyway. __tablename__ = "temperature_sensor" diff --git a/exploration/expl_light.py b/exploration/expl_ambient.py similarity index 66% rename from exploration/expl_light.py rename to exploration/expl_ambient.py index 65a06b0..8285325 100644 --- a/exploration/expl_light.py +++ b/exploration/expl_ambient.py @@ -6,7 +6,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.11.4 +# jupytext_version: 1.13.0 # kernelspec: # display_name: straw2analysis # language: python @@ -21,7 +21,6 @@ import sys import seaborn as sns from pytz import timezone -from tabulate import tabulate nb_dir = os.path.split(os.getcwd())[0] if nb_dir not in sys.path: @@ -32,18 +31,18 @@ import participants.query_db TZ_LJ = timezone("Europe/Ljubljana") # %% -from features.light import * +from features.ambient import * # %% [markdown] -# # Basic characteristics +# # Light # %% -df_light_nokia = get_light_data(["nokia_0000003"]) +df_light_nokia = get_ambient_data(["nokia_0000003"], "light") print(df_light_nokia) # %% participants_inactive_usernames = participants.query_db.get_usernames() -df_light_inactive = get_light_data(participants_inactive_usernames) +df_light_inactive = get_ambient_data(participants_inactive_usernames, "light") # %% df_light_inactive.accuracy.value_counts() @@ -103,7 +102,7 @@ df_light_nokia.loc[df_light_nokia["double_light_lux"] == 0, ["datetime_lj"]] # Zeroes are present during the day. It does happens when the sensor is physically blocked. # %% [markdown] -# # Differences between participants +# ## Differences between participants # %% df_light_participants = ( @@ -166,3 +165,74 @@ sns.displot(data=df_light_participants, x="std_rel", binwidth=0.005) # Relative variability is homogeneous. # # This means that light data needs to be standardized. Min/max standardization would probably fit best. + +# %% [markdown] +# # Barometer + +# %% [markdown] +# ## Barometer sensor + +# %% +df_barometer_sensor_samsung = get_ambient_data(["samsung_0000002"], "barometer_sensor") +df_barometer_sensor_samsung.shape + +# %% [markdown] +# Even though we have many values for this sensor, they are all repeated as seen below. + +# %% +barometer_sensor_cols = df_barometer_sensor_samsung.columns.to_list() +barometer_sensor_cols.remove("id") +barometer_sensor_cols.remove("_id") +barometer_sensor_cols.remove("timestamp") +barometer_sensor_cols.remove("device_id") +print(df_barometer_sensor_samsung.drop_duplicates(subset=barometer_sensor_cols)) + +# %% [markdown] +# ## Barometer data + +# %% +df_barometer_samsung = get_ambient_data(["samsung_0000002"], "barometer") +print(df_barometer_samsung) + +# %% +df_barometer_inactive = get_ambient_data(participants_inactive_usernames, "barometer") + +# %% +df_barometer_inactive.accuracy.value_counts() + +# %% +df_barometer_inactive.participant_id.nunique() + +# %% +df_barometer_inactive.double_values_0.describe() + +# %% [markdown] +# From [Wikipedia](https://en.wikipedia.org/wiki/Atmospheric_pressure#Mean_sea-level_pressure): +# +# > The lowest measurable sea-level pressure is found at the centers of tropical cyclones and tornadoes, with a record low of 870 mbar (87 kPa; 26 inHg). + +# %% +df_barometer_inactive[df_barometer_inactive["double_values_0"] < 870] + +# %% +sns.displot( + data=df_barometer_inactive[df_barometer_inactive["double_values_0"] > 870], + x="double_values_0", + binwidth=10, + height=8, +) + +# %% [markdown] +# # Temperature data + +# %% [markdown] +# ## Temperature sensor + +# %% [markdown] +# This table is empty. + +# %% [markdown] +# ## Temperature data + +# %% [markdown] +# This table is empty. diff --git a/features/ambient.py b/features/ambient.py new file mode 100644 index 0000000..7a9e8e3 --- /dev/null +++ b/features/ambient.py @@ -0,0 +1,91 @@ +from collections.abc import Collection + +import pandas as pd + +from config.models import ( + Barometer, + BarometerSensor, + LightSensor, + Participant, + Temperature, + TemperatureSensor, +) +from setup import db_engine, session + +MINIMUM_PRESSURE_MB = 870 +# The lowest measurable sea-level pressure is found at the centers of tropical cyclones and tornadoes, +# with a record low of 870 mbar (87 kPa; 26 inHg). + + +def get_ambient_data(usernames: Collection, sensor=None) -> pd.DataFrame: + """ + Read the data from any of the ambient sensor tables and return it in a dataframe. + + Parameters + ---------- + usernames: Collection + A list of usernames to put into the WHERE condition. + sensor: str + One of: barometer, barometer_sensor, light, temperature, temperature_sensor. + Here, the _sensor tables describe the phone sensors, such as their range, dela, resolution, vendor etc., + whereas barometer, light, and temperature describe the measured characteristics of the environment. + + Returns + ------- + df_ambient: pd.DataFrame + A dataframe of ambient sensor data. + """ + if sensor == "barometer": + query_ambient = session.query(Barometer, Participant.username).filter( + Participant.id == Barometer.participant_id + ) + elif sensor == "barometer_sensor": + query_ambient = session.query(BarometerSensor, Participant.username).filter( + Participant.id == BarometerSensor.participant_id + ) + elif sensor == "light": + query_ambient = session.query(LightSensor, Participant.username).filter( + Participant.id == LightSensor.participant_id + ) + # Note that LightSensor and its light_sensor table are incorrectly named. + # In this table, we actually find light data, i.e. double_light_lux, the ambient luminance in lux, + # and NOT light sensor data (its range, dela, resolution, vendor etc.) as the name suggests. + # We do not have light sensor data saved in the database. + elif sensor == "temperature": + query_ambient = session.query(Temperature, Participant.username).filter( + Participant.id == Temperature.participant_id + ) + elif sensor == "temperature_sensor": + query_ambient = session.query(TemperatureSensor, Participant.username).filter( + Participant.id == TemperatureSensor.participant_id + ) + else: + raise KeyError( + "Specify one of the ambient sensors: " + "barometer, barometer_sensor, light, temperature, or temperature_sensor." + ) + + query_ambient = query_ambient.filter(Participant.username.in_(usernames)) + with db_engine.connect() as connection: + df_ambient = pd.read_sql(query_ambient.statement, connection) + return df_ambient + + +def clean_pressure(df_ambient: pd.DataFrame) -> pd.DataFrame: + """ + Simply removes values lower than MINIMUM_PRESSURE_MB (lowest measured pressure). + + Parameters + ---------- + df_ambient: pd.DataFrame + A dataframe of barometer data, which includes measured pressure in double_values_0. + + Returns + ------- + df_ambient: pd.DataFrame + The same dataframe with rows with low values of pressure removed. + """ + if "double_values_0" not in df_ambient: + raise KeyError("The DF does not seem to hold barometer data.") + df_ambient = df_ambient[df_ambient["double_values_0"] > MINIMUM_PRESSURE_MB] + return df_ambient diff --git a/features/light.py b/features/light.py deleted file mode 100644 index 64bc3ae..0000000 --- a/features/light.py +++ /dev/null @@ -1,30 +0,0 @@ -from collections.abc import Collection - -import pandas as pd - -from config.models import LightSensor, Participant -from setup import db_engine, session - - -def get_light_data(usernames: Collection) -> pd.DataFrame: - """ - Read the data from the light sensor table and return it in a dataframe. - - Parameters - ---------- - usernames: Collection - A list of usernames to put into the WHERE condition. - - Returns - ------- - df_light: pd.DataFrame - A dataframe of light data. - """ - query_light = ( - session.query(LightSensor, Participant.username) - .filter(Participant.id == LightSensor.participant_id) - .filter(Participant.username.in_(usernames)) - ) - with db_engine.connect() as connection: - df_light = pd.read_sql(query_light.statement, connection) - return df_light