Add a demo of pipeline.

Merge branch 'ambient' into ml_pipeline
Remove low values of pressure.
2021-11-17 10:44:49 +01:00 · 2021-11-17 10:39:55 +01:00 · 2021-10-22 18:09:17 +02:00 · 2021-10-14 17:59:33 +02:00 · 2021-10-13 16:57:38 +02:00
7 changed files with 739 additions and 47 deletions
--- a/config/models.py
+++ b/config/models.py
@ -166,12 +166,43 @@ class Application(Base, AWAREsensor):


 class Barometer(Base, AWAREsensor):
+    """
+    Contains the barometer sensor data.
+
+    Attributes
+    ----------
+    double_values_0: float
+        The ambient air pressure in mbar (hPa)
+    accuracy: int
+        Sensor’s accuracy level, either 1, 2, or 3 (see [SensorManager](https://developer.android.com/reference/android/hardware/SensorManager.html#SENSOR_STATUS_ACCURACY_HIGH))
+    """
+
    double_values_0 = Column(Float, nullable=False)
    accuracy = Column(SmallInteger, nullable=True)
    label = Column(String, nullable=True)


 class BarometerSensor(Base, AWAREsensor):
+    """
+    Contains the barometer sensor capabilities.
+
+    Attributes
+    ----------
+    double_sensor_maximum_range: float
+        Maximum sensor value possible
+    double_sensor_minimum_delay: float
+        Minimum sampling delay in microseconds
+    sensor_name: str
+    double_sensor_power_ma: float
+        Sensor’s power drain in mA
+    double_sensor_resolution: float
+        Sensor’s resolution in sensor’s units
+    sensor_type: str
+    sensor_vendor: str
+        Sensor’s manufacturer
+    sensor_version: str
+    """
+
    __tablename__ = "barometer_sensor"
    # Since this table is not really important,
    # I will leave all columns as nullable. (nullable=True by default.)
@ -257,6 +288,19 @@ class Imperfection(Base):


 class LightSensor(Base, AWAREsensor):
+    """
+    Contains the light sensor data.
+    Note: Even though this table is named light_sensor, it actually contains what AWARE calls light data
+        (rather than the data about the sensor's capabilities). Cf. Barometer(Sensor) and Temperature(Sensor).
+
+    Attributes
+    ----------
+    double_light_lux: float
+        The ambient luminance in lux units
+    accuracy: int
+        Sensor’s accuracy level, either 1, 2, or 3 (see [SensorManager](https://developer.android.com/reference/android/hardware/SensorManager.html#SENSOR_STATUS_ACCURACY_HIGH))
+    """
+
    __tablename__ = "light_sensor"
    double_light_lux = Column(Float, nullable=False)
    accuracy = Column(Integer, nullable=True)
@ -376,12 +420,43 @@ class SMS(Base, AWAREsensor):


 class Temperature(Base, AWAREsensor):
+    """
+    Contains the temperature sensor data.
+
+    Attributes
+    ----------
+    temperature_celsius: float
+        Measured temperature in °C
+    accuracy: int
+        Sensor’s accuracy level, either 1, 2, or 3 (see [SensorManager](https://developer.android.com/reference/android/hardware/SensorManager.html#SENSOR_STATUS_ACCURACY_HIGH))
+    """
+
    temperature_celsius = Column(Float, nullable=False)
    accuracy = Column(SmallInteger, nullable=True)
    label = Column(String, nullable=True)


 class TemperatureSensor(Base, AWAREsensor):
+    """
+    Contains the temperature sensor capabilities.
+
+    Attributes
+    ----------
+    double_sensor_maximum_range: float
+        Maximum sensor value possible
+    double_sensor_minimum_delay: float
+        Minimum sampling delay in microseconds
+    sensor_name: str
+    double_sensor_power_ma: float
+        Sensor’s power drain in mA
+    double_sensor_resolution: float
+        Sensor’s resolution in sensor’s units
+    sensor_type: str
+    sensor_vendor: str
+        Sensor’s manufacturer
+    sensor_version: str
+    """
+
    # I left all of these nullable,
    # as we haven't seen any data from this sensor anyway.
    __tablename__ = "temperature_sensor"
--- a/exploration/ex_ml_pipeline.py
+++ b/exploration/ex_ml_pipeline.py
@ -99,9 +99,7 @@ df_esm_PANAS_daily_means = (


 # %%
-df_proximity_daily_counts = proximity.count_proximity(
-    df_proximity, ["date_lj"]
-)
+df_proximity_daily_counts = proximity.count_proximity(df_proximity, ["date_lj"])

 # %%
 df_proximity_daily_counts
--- a/exploration/expl_ambient.py
+++ b/exploration/expl_ambient.py
@ -6,7 +6,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.11.4
+#       jupytext_version: 1.13.0
 #   kernelspec:
 #     display_name: straw2analysis
 #     language: python
@ -21,7 +21,6 @@ import sys

 import seaborn as sns
 from pytz import timezone
-from tabulate import tabulate

 nb_dir = os.path.split(os.getcwd())[0]
 if nb_dir not in sys.path:
@ -32,18 +31,18 @@ import participants.query_db
 TZ_LJ = timezone("Europe/Ljubljana")

 # %%
-from features.light import *
+from features.ambient import *

 # %% [markdown]
-# # Basic characteristics
+# # Light

 # %%
-df_light_nokia = get_light_data(["nokia_0000003"])
+df_light_nokia = get_ambient_data(["nokia_0000003"], "light")
 print(df_light_nokia)

 # %%
 participants_inactive_usernames = participants.query_db.get_usernames()
-df_light_inactive = get_light_data(participants_inactive_usernames)
+df_light_inactive = get_ambient_data(participants_inactive_usernames, "light")

 # %%
 df_light_inactive.accuracy.value_counts()
@ -103,7 +102,7 @@ df_light_nokia.loc[df_light_nokia["double_light_lux"] == 0, ["datetime_lj"]]
 # Zeroes are present during the day. It does happens when the sensor is physically blocked.

 # %% [markdown]
-# # Differences between participants
+# ## Differences between participants

 # %%
 df_light_participants = (
@ -166,3 +165,74 @@ sns.displot(data=df_light_participants, x="std_rel", binwidth=0.005)
 # Relative variability is homogeneous.
 #
 # This means that light data needs to be standardized. Min/max standardization would probably fit best.
+
+# %% [markdown]
+# # Barometer
+
+# %% [markdown]
+# ## Barometer sensor
+
+# %%
+df_barometer_sensor_samsung = get_ambient_data(["samsung_0000002"], "barometer_sensor")
+df_barometer_sensor_samsung.shape
+
+# %% [markdown]
+# Even though we have many values for this sensor, they are all repeated as seen below.
+
+# %%
+barometer_sensor_cols = df_barometer_sensor_samsung.columns.to_list()
+barometer_sensor_cols.remove("id")
+barometer_sensor_cols.remove("_id")
+barometer_sensor_cols.remove("timestamp")
+barometer_sensor_cols.remove("device_id")
+print(df_barometer_sensor_samsung.drop_duplicates(subset=barometer_sensor_cols))
+
+# %% [markdown]
+# ## Barometer data
+
+# %%
+df_barometer_samsung = get_ambient_data(["samsung_0000002"], "barometer")
+print(df_barometer_samsung)
+
+# %%
+df_barometer_inactive = get_ambient_data(participants_inactive_usernames, "barometer")
+
+# %%
+df_barometer_inactive.accuracy.value_counts()
+
+# %%
+df_barometer_inactive.participant_id.nunique()
+
+# %%
+df_barometer_inactive.double_values_0.describe()
+
+# %% [markdown]
+# From [Wikipedia](https://en.wikipedia.org/wiki/Atmospheric_pressure#Mean_sea-level_pressure):
+#
+# > The lowest measurable sea-level pressure is found at the centers of tropical cyclones and tornadoes, with a record low of 870 mbar (87 kPa; 26 inHg).
+
+# %%
+df_barometer_inactive[df_barometer_inactive["double_values_0"] < 870]
+
+# %%
+sns.displot(
+    data=df_barometer_inactive[df_barometer_inactive["double_values_0"] > 870],
+    x="double_values_0",
+    binwidth=10,
+    height=8,
+)
+
+# %% [markdown]
+# # Temperature data
+
+# %% [markdown]
+# ## Temperature sensor
+
+# %% [markdown]
+# This table is empty.
+
+# %% [markdown]
+# ## Temperature data
+
+# %% [markdown]
+# This table is empty.
--- a/features/ambient.py
+++ b/features/ambient.py
@ -0,0 +1,91 @@
+from collections.abc import Collection
+
+import pandas as pd
+
+from config.models import (
+    Barometer,
+    BarometerSensor,
+    LightSensor,
+    Participant,
+    Temperature,
+    TemperatureSensor,
+)
+from setup import db_engine, session
+
+MINIMUM_PRESSURE_MB = 870
+# The lowest measurable sea-level pressure is found at the centers of tropical cyclones and tornadoes,
+# with a record low of 870 mbar (87 kPa; 26 inHg).
+
+
+def get_ambient_data(usernames: Collection, sensor=None) -> pd.DataFrame:
+    """
+    Read the data from any of the ambient sensor tables and return it in a dataframe.
+
+    Parameters
+    ----------
+    usernames: Collection
+        A list of usernames to put into the WHERE condition.
+    sensor: str
+        One of: barometer, barometer_sensor, light, temperature, temperature_sensor.
+        Here, the _sensor tables describe the phone sensors, such as their range, dela, resolution, vendor etc.,
+        whereas barometer, light, and temperature describe the measured characteristics of the environment.
+
+    Returns
+    -------
+    df_ambient: pd.DataFrame
+        A dataframe of ambient sensor data.
+    """
+    if sensor == "barometer":
+        query_ambient = session.query(Barometer, Participant.username).filter(
+            Participant.id == Barometer.participant_id
+        )
+    elif sensor == "barometer_sensor":
+        query_ambient = session.query(BarometerSensor, Participant.username).filter(
+            Participant.id == BarometerSensor.participant_id
+        )
+    elif sensor == "light":
+        query_ambient = session.query(LightSensor, Participant.username).filter(
+            Participant.id == LightSensor.participant_id
+        )
+    # Note that LightSensor and its light_sensor table are incorrectly named.
+    # In this table, we actually find light data, i.e. double_light_lux, the ambient luminance in lux,
+    # and NOT light sensor data (its range, dela, resolution, vendor etc.) as the name suggests.
+    # We do not have light sensor data saved in the database.
+    elif sensor == "temperature":
+        query_ambient = session.query(Temperature, Participant.username).filter(
+            Participant.id == Temperature.participant_id
+        )
+    elif sensor == "temperature_sensor":
+        query_ambient = session.query(TemperatureSensor, Participant.username).filter(
+            Participant.id == TemperatureSensor.participant_id
+        )
+    else:
+        raise KeyError(
+            "Specify one of the ambient sensors: "
+            "barometer, barometer_sensor, light, temperature, or temperature_sensor."
+        )
+
+    query_ambient = query_ambient.filter(Participant.username.in_(usernames))
+    with db_engine.connect() as connection:
+        df_ambient = pd.read_sql(query_ambient.statement, connection)
+    return df_ambient
+
+
+def clean_pressure(df_ambient: pd.DataFrame) -> pd.DataFrame:
+    """
+    Simply removes values lower than MINIMUM_PRESSURE_MB (lowest measured pressure).
+
+    Parameters
+    ----------
+    df_ambient: pd.DataFrame
+        A dataframe of barometer data, which includes measured pressure in double_values_0.
+
+    Returns
+    -------
+    df_ambient: pd.DataFrame
+        The same dataframe with rows with low values of pressure removed.
+    """
+    if "double_values_0" not in df_ambient:
+        raise KeyError("The DF does not seem to hold barometer data.")
+    df_ambient = df_ambient[df_ambient["double_values_0"] > MINIMUM_PRESSURE_MB]
+    return df_ambient
--- a/features/light.py
+++ b/features/light.py
@ -1,30 +0,0 @@
-from collections.abc import Collection
-
-import pandas as pd
-
-from config.models import LightSensor, Participant
-from setup import db_engine, session
-
-
-def get_light_data(usernames: Collection) -> pd.DataFrame:
-    """
-    Read the data from the light sensor table and return it in a dataframe.
-
-    Parameters
-    ----------
-    usernames: Collection
-        A list of usernames to put into the WHERE condition.
-
-    Returns
-    -------
-    df_light: pd.DataFrame
-        A dataframe of light data.
-    """
-    query_light = (
-        session.query(LightSensor, Participant.username)
-        .filter(Participant.id == LightSensor.participant_id)
-        .filter(Participant.username.in_(usernames))
-    )
-    with db_engine.connect() as connection:
-        df_light = pd.read_sql(query_light.statement, connection)
-    return df_light
--- a/machine_learning/prox_comm_PANAS_nb.ipynb
+++ b/machine_learning/prox_comm_PANAS_nb.ipynb
--- a/statistical_analysis/adherence.py
+++ b/statistical_analysis/adherence.py
@ -96,13 +96,23 @@ df_session_counts_time = classify_sessions_by_completion_time(df_esm_preprocesse
 # Sessions are now classified according to the type of a session (a true questionnaire or simple single questions) and users response.

 # %%
-df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response"].astype("category")
-df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.remove_categories(['during_work_first', 'ema_unanswered', 'evening_first', 'morning', 'morning_first'])
-df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.add_categories("interrupted")
-df_session_counts_time.loc[df_session_counts_time["session_response_cat"].isna(), "session_response_cat"] = "interrupted"
-#df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.rename_categories({
-#    "ema_unanswered": "interrupted", 
-#    "morning_first": "interrupted", 
+df_session_counts_time["session_response_cat"] = df_session_counts_time[
+    "session_response"
+].astype("category")
+df_session_counts_time["session_response_cat"] = df_session_counts_time[
+    "session_response_cat"
+].cat.remove_categories(
+    ["during_work_first", "ema_unanswered", "evening_first", "morning", "morning_first"]
+)
+df_session_counts_time["session_response_cat"] = df_session_counts_time[
+    "session_response_cat"
+].cat.add_categories("interrupted")
+df_session_counts_time.loc[
+    df_session_counts_time["session_response_cat"].isna(), "session_response_cat"
+] = "interrupted"
+# df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.rename_categories({
+#    "ema_unanswered": "interrupted",
+#    "morning_first": "interrupted",
 #    "evening_first": "interrupted",
 #    "morning": "interrupted",
 #    "during_work_first": "interrupted"})
Author	SHA1	Message	Date
junos	e33a49c9fc	Add a demo of pipeline.	2021-11-17 10:44:49 +01:00
junos	d34c2ec5e9	Merge branch 'ambient' into ml_pipeline	2021-11-17 10:39:55 +01:00
junos	6fc0d962ae	Remove low values of pressure.	2021-10-22 18:09:17 +02:00
junos	92fbda242b	Explore barometer and temperature data. Add docstrings to models.	2021-10-14 17:59:33 +02:00
junos	6302a0f0d9	Merge ambient sensors into one file. Explore barometer sensor data for one phone.	2021-10-13 16:57:38 +02:00