From 6302a0f0d900613808a109d3bb0bfd0e8a1e95e1 Mon Sep 17 00:00:00 2001 From: junos Date: Wed, 13 Oct 2021 16:57:38 +0200 Subject: [PATCH] Merge ambient sensors into one file. Explore barometer sensor data for one phone. --- .../{expl_light.py => expl_ambient.py} | 38 ++++++++++-- features/ambient.py | 62 +++++++++++++++++++ features/light.py | 30 --------- 3 files changed, 95 insertions(+), 35 deletions(-) rename exploration/{expl_light.py => expl_ambient.py} (80%) create mode 100644 features/ambient.py delete mode 100644 features/light.py diff --git a/exploration/expl_light.py b/exploration/expl_ambient.py similarity index 80% rename from exploration/expl_light.py rename to exploration/expl_ambient.py index 65a06b0..2b617ec 100644 --- a/exploration/expl_light.py +++ b/exploration/expl_ambient.py @@ -6,7 +6,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.11.4 +# jupytext_version: 1.12.0 # kernelspec: # display_name: straw2analysis # language: python @@ -32,18 +32,18 @@ import participants.query_db TZ_LJ = timezone("Europe/Ljubljana") # %% -from features.light import * +from features.ambient import * # %% [markdown] -# # Basic characteristics +# # Light # %% -df_light_nokia = get_light_data(["nokia_0000003"]) +df_light_nokia = get_ambient_data(["nokia_0000003"], "light") print(df_light_nokia) # %% participants_inactive_usernames = participants.query_db.get_usernames() -df_light_inactive = get_light_data(participants_inactive_usernames) +df_light_inactive = get_ambient_data(participants_inactive_usernames, "light") # %% df_light_inactive.accuracy.value_counts() @@ -166,3 +166,31 @@ sns.displot(data=df_light_participants, x="std_rel", binwidth=0.005) # Relative variability is homogeneous. # # This means that light data needs to be standardized. Min/max standardization would probably fit best. + +# %% [markdown] +# # Barometer + +# %% [markdown] +# ## Barometer sensor + +# %% +df_barometer_sensor_samsung = get_ambient_data(["samsung_0000002"], "barometer_sensor") +df_barometer_sensor_samsung.shape + +# %% [markdown] +# Even though we have many values for this sensor, they are all repeated as seen below. + +# %% +barometer_sensor_cols = df_barometer_sensor_samsung.columns.to_list() +barometer_sensor_cols.remove("id") +barometer_sensor_cols.remove("_id") +barometer_sensor_cols.remove("timestamp") +barometer_sensor_cols.remove("device_id") +print(df_barometer_sensor_samsung.drop_duplicates(subset=barometer_sensor_cols)) + +# %% +## Barometer data + +# %% +df_barometer_sensor_samsung = get_ambient_data(["samsung_0000002"], "barometer") +print(df_barometer_sensor_samsung) diff --git a/features/ambient.py b/features/ambient.py new file mode 100644 index 0000000..5128d7d --- /dev/null +++ b/features/ambient.py @@ -0,0 +1,62 @@ +from collections.abc import Collection + +import pandas as pd + +from config.models import Barometer, BarometerSensor, LightSensor, Participant, Temperature, TemperatureSensor +from setup import db_engine, session + + +def get_ambient_data(usernames: Collection, sensor=None) -> pd.DataFrame: + """ + Read the data from any of the ambient sensor tables and return it in a dataframe. + + Parameters + ---------- + usernames: Collection + A list of usernames to put into the WHERE condition. + sensor: str + One of: barometer, barometer_sensor, light, temperature, temperature_sensor. + Here, the _sensor tables describe the phone sensors, such as their range, dela, resolution, vendor etc., + whereas barometer, light, and temperature describe the measured characteristics of the environment. + + Returns + ------- + df_ambient: pd.DataFrame + A dataframe of ambient sensor data. + """ + if sensor == "barometer": + query_ambient = ( + session.query(Barometer, Participant.username) + .filter(Participant.id == Barometer.participant_id) + ) + elif sensor == "barometer_sensor": + query_ambient = ( + session.query(BarometerSensor, Participant.username) + .filter(Participant.id == BarometerSensor.participant_id) + ) + elif sensor == "light": + query_ambient = ( + session.query(LightSensor, Participant.username) + .filter(Participant.id == LightSensor.participant_id) + ) + # Note that LightSensor and its light_sensor table are incorrectly named. + # In this table, we actually find light data, i.e. double_light_lux, the ambient luminance in lux, + # and NOT light sensor data (its range, dela, resolution, vendor etc.) as the name suggests. + # We do not have light sensor data saved in the database. + elif sensor == "temperature": + query_ambient = ( + session.query(Temperature, Participant.username) + .filter(Participant.id == Temperature.participant_id) + ) + elif sensor == "temperature_sensor": + query_ambient = ( + session.query(TemperatureSensor, Participant.username) + .filter(Participant.id == TemperatureSensor.participant_id) + ) + else: + raise KeyError("Specify one of the ambient sensors: barometer, barometer_sensor, light, temperature, or temperature_sensor.") + + query_ambient = query_ambient.filter(Participant.username.in_(usernames)) + with db_engine.connect() as connection: + df_ambient = pd.read_sql(query_ambient.statement, connection) + return df_ambient diff --git a/features/light.py b/features/light.py deleted file mode 100644 index 64bc3ae..0000000 --- a/features/light.py +++ /dev/null @@ -1,30 +0,0 @@ -from collections.abc import Collection - -import pandas as pd - -from config.models import LightSensor, Participant -from setup import db_engine, session - - -def get_light_data(usernames: Collection) -> pd.DataFrame: - """ - Read the data from the light sensor table and return it in a dataframe. - - Parameters - ---------- - usernames: Collection - A list of usernames to put into the WHERE condition. - - Returns - ------- - df_light: pd.DataFrame - A dataframe of light data. - """ - query_light = ( - session.query(LightSensor, Participant.username) - .filter(Participant.id == LightSensor.participant_id) - .filter(Participant.username.in_(usernames)) - ) - with db_engine.connect() as connection: - df_light = pd.read_sql(query_light.statement, connection) - return df_light