Merge branch 'ambient' into ml_pipeline

rapids
junos 2021-11-17 10:39:55 +01:00
commit d34c2ec5e9
4 changed files with 243 additions and 37 deletions

View File

@ -166,12 +166,43 @@ class Application(Base, AWAREsensor):
class Barometer(Base, AWAREsensor):
"""
Contains the barometer sensor data.
Attributes
----------
double_values_0: float
The ambient air pressure in mbar (hPa)
accuracy: int
Sensors accuracy level, either 1, 2, or 3 (see [SensorManager](https://developer.android.com/reference/android/hardware/SensorManager.html#SENSOR_STATUS_ACCURACY_HIGH))
"""
double_values_0 = Column(Float, nullable=False)
accuracy = Column(SmallInteger, nullable=True)
label = Column(String, nullable=True)
class BarometerSensor(Base, AWAREsensor):
"""
Contains the barometer sensor capabilities.
Attributes
----------
double_sensor_maximum_range: float
Maximum sensor value possible
double_sensor_minimum_delay: float
Minimum sampling delay in microseconds
sensor_name: str
double_sensor_power_ma: float
Sensors power drain in mA
double_sensor_resolution: float
Sensors resolution in sensors units
sensor_type: str
sensor_vendor: str
Sensors manufacturer
sensor_version: str
"""
__tablename__ = "barometer_sensor"
# Since this table is not really important,
# I will leave all columns as nullable. (nullable=True by default.)
@ -257,6 +288,19 @@ class Imperfection(Base):
class LightSensor(Base, AWAREsensor):
"""
Contains the light sensor data.
Note: Even though this table is named light_sensor, it actually contains what AWARE calls light data
(rather than the data about the sensor's capabilities). Cf. Barometer(Sensor) and Temperature(Sensor).
Attributes
----------
double_light_lux: float
The ambient luminance in lux units
accuracy: int
Sensors accuracy level, either 1, 2, or 3 (see [SensorManager](https://developer.android.com/reference/android/hardware/SensorManager.html#SENSOR_STATUS_ACCURACY_HIGH))
"""
__tablename__ = "light_sensor"
double_light_lux = Column(Float, nullable=False)
accuracy = Column(Integer, nullable=True)
@ -376,12 +420,43 @@ class SMS(Base, AWAREsensor):
class Temperature(Base, AWAREsensor):
"""
Contains the temperature sensor data.
Attributes
----------
temperature_celsius: float
Measured temperature in °C
accuracy: int
Sensors accuracy level, either 1, 2, or 3 (see [SensorManager](https://developer.android.com/reference/android/hardware/SensorManager.html#SENSOR_STATUS_ACCURACY_HIGH))
"""
temperature_celsius = Column(Float, nullable=False)
accuracy = Column(SmallInteger, nullable=True)
label = Column(String, nullable=True)
class TemperatureSensor(Base, AWAREsensor):
"""
Contains the temperature sensor capabilities.
Attributes
----------
double_sensor_maximum_range: float
Maximum sensor value possible
double_sensor_minimum_delay: float
Minimum sampling delay in microseconds
sensor_name: str
double_sensor_power_ma: float
Sensors power drain in mA
double_sensor_resolution: float
Sensors resolution in sensors units
sensor_type: str
sensor_vendor: str
Sensors manufacturer
sensor_version: str
"""
# I left all of these nullable,
# as we haven't seen any data from this sensor anyway.
__tablename__ = "temperature_sensor"

View File

@ -6,7 +6,7 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.11.4
# jupytext_version: 1.13.0
# kernelspec:
# display_name: straw2analysis
# language: python
@ -21,7 +21,6 @@ import sys
import seaborn as sns
from pytz import timezone
from tabulate import tabulate
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
@ -32,18 +31,18 @@ import participants.query_db
TZ_LJ = timezone("Europe/Ljubljana")
# %%
from features.light import *
from features.ambient import *
# %% [markdown]
# # Basic characteristics
# # Light
# %%
df_light_nokia = get_light_data(["nokia_0000003"])
df_light_nokia = get_ambient_data(["nokia_0000003"], "light")
print(df_light_nokia)
# %%
participants_inactive_usernames = participants.query_db.get_usernames()
df_light_inactive = get_light_data(participants_inactive_usernames)
df_light_inactive = get_ambient_data(participants_inactive_usernames, "light")
# %%
df_light_inactive.accuracy.value_counts()
@ -103,7 +102,7 @@ df_light_nokia.loc[df_light_nokia["double_light_lux"] == 0, ["datetime_lj"]]
# Zeroes are present during the day. It does happens when the sensor is physically blocked.
# %% [markdown]
# # Differences between participants
# ## Differences between participants
# %%
df_light_participants = (
@ -166,3 +165,74 @@ sns.displot(data=df_light_participants, x="std_rel", binwidth=0.005)
# Relative variability is homogeneous.
#
# This means that light data needs to be standardized. Min/max standardization would probably fit best.
# %% [markdown]
# # Barometer
# %% [markdown]
# ## Barometer sensor
# %%
df_barometer_sensor_samsung = get_ambient_data(["samsung_0000002"], "barometer_sensor")
df_barometer_sensor_samsung.shape
# %% [markdown]
# Even though we have many values for this sensor, they are all repeated as seen below.
# %%
barometer_sensor_cols = df_barometer_sensor_samsung.columns.to_list()
barometer_sensor_cols.remove("id")
barometer_sensor_cols.remove("_id")
barometer_sensor_cols.remove("timestamp")
barometer_sensor_cols.remove("device_id")
print(df_barometer_sensor_samsung.drop_duplicates(subset=barometer_sensor_cols))
# %% [markdown]
# ## Barometer data
# %%
df_barometer_samsung = get_ambient_data(["samsung_0000002"], "barometer")
print(df_barometer_samsung)
# %%
df_barometer_inactive = get_ambient_data(participants_inactive_usernames, "barometer")
# %%
df_barometer_inactive.accuracy.value_counts()
# %%
df_barometer_inactive.participant_id.nunique()
# %%
df_barometer_inactive.double_values_0.describe()
# %% [markdown]
# From [Wikipedia](https://en.wikipedia.org/wiki/Atmospheric_pressure#Mean_sea-level_pressure):
#
# > The lowest measurable sea-level pressure is found at the centers of tropical cyclones and tornadoes, with a record low of 870 mbar (87 kPa; 26 inHg).
# %%
df_barometer_inactive[df_barometer_inactive["double_values_0"] < 870]
# %%
sns.displot(
data=df_barometer_inactive[df_barometer_inactive["double_values_0"] > 870],
x="double_values_0",
binwidth=10,
height=8,
)
# %% [markdown]
# # Temperature data
# %% [markdown]
# ## Temperature sensor
# %% [markdown]
# This table is empty.
# %% [markdown]
# ## Temperature data
# %% [markdown]
# This table is empty.

View File

@ -0,0 +1,91 @@
from collections.abc import Collection
import pandas as pd
from config.models import (
Barometer,
BarometerSensor,
LightSensor,
Participant,
Temperature,
TemperatureSensor,
)
from setup import db_engine, session
MINIMUM_PRESSURE_MB = 870
# The lowest measurable sea-level pressure is found at the centers of tropical cyclones and tornadoes,
# with a record low of 870 mbar (87 kPa; 26 inHg).
def get_ambient_data(usernames: Collection, sensor=None) -> pd.DataFrame:
"""
Read the data from any of the ambient sensor tables and return it in a dataframe.
Parameters
----------
usernames: Collection
A list of usernames to put into the WHERE condition.
sensor: str
One of: barometer, barometer_sensor, light, temperature, temperature_sensor.
Here, the _sensor tables describe the phone sensors, such as their range, dela, resolution, vendor etc.,
whereas barometer, light, and temperature describe the measured characteristics of the environment.
Returns
-------
df_ambient: pd.DataFrame
A dataframe of ambient sensor data.
"""
if sensor == "barometer":
query_ambient = session.query(Barometer, Participant.username).filter(
Participant.id == Barometer.participant_id
)
elif sensor == "barometer_sensor":
query_ambient = session.query(BarometerSensor, Participant.username).filter(
Participant.id == BarometerSensor.participant_id
)
elif sensor == "light":
query_ambient = session.query(LightSensor, Participant.username).filter(
Participant.id == LightSensor.participant_id
)
# Note that LightSensor and its light_sensor table are incorrectly named.
# In this table, we actually find light data, i.e. double_light_lux, the ambient luminance in lux,
# and NOT light sensor data (its range, dela, resolution, vendor etc.) as the name suggests.
# We do not have light sensor data saved in the database.
elif sensor == "temperature":
query_ambient = session.query(Temperature, Participant.username).filter(
Participant.id == Temperature.participant_id
)
elif sensor == "temperature_sensor":
query_ambient = session.query(TemperatureSensor, Participant.username).filter(
Participant.id == TemperatureSensor.participant_id
)
else:
raise KeyError(
"Specify one of the ambient sensors: "
"barometer, barometer_sensor, light, temperature, or temperature_sensor."
)
query_ambient = query_ambient.filter(Participant.username.in_(usernames))
with db_engine.connect() as connection:
df_ambient = pd.read_sql(query_ambient.statement, connection)
return df_ambient
def clean_pressure(df_ambient: pd.DataFrame) -> pd.DataFrame:
"""
Simply removes values lower than MINIMUM_PRESSURE_MB (lowest measured pressure).
Parameters
----------
df_ambient: pd.DataFrame
A dataframe of barometer data, which includes measured pressure in double_values_0.
Returns
-------
df_ambient: pd.DataFrame
The same dataframe with rows with low values of pressure removed.
"""
if "double_values_0" not in df_ambient:
raise KeyError("The DF does not seem to hold barometer data.")
df_ambient = df_ambient[df_ambient["double_values_0"] > MINIMUM_PRESSURE_MB]
return df_ambient

View File

@ -1,30 +0,0 @@
from collections.abc import Collection
import pandas as pd
from config.models import LightSensor, Participant
from setup import db_engine, session
def get_light_data(usernames: Collection) -> pd.DataFrame:
"""
Read the data from the light sensor table and return it in a dataframe.
Parameters
----------
usernames: Collection
A list of usernames to put into the WHERE condition.
Returns
-------
df_light: pd.DataFrame
A dataframe of light data.
"""
query_light = (
session.query(LightSensor, Participant.username)
.filter(Participant.id == LightSensor.participant_id)
.filter(Participant.username.in_(usernames))
)
with db_engine.connect() as connection:
df_light = pd.read_sql(query_light.statement, connection)
return df_light