239 lines
5.8 KiB
Python
239 lines
5.8 KiB
Python
# ---
|
|
# jupyter:
|
|
# jupytext:
|
|
# formats: ipynb,py:percent
|
|
# text_representation:
|
|
# extension: .py
|
|
# format_name: percent
|
|
# format_version: '1.3'
|
|
# jupytext_version: 1.13.0
|
|
# kernelspec:
|
|
# display_name: straw2analysis
|
|
# language: python
|
|
# name: straw2analysis
|
|
# ---
|
|
|
|
# %%
|
|
# %matplotlib inline
|
|
import datetime
|
|
import os
|
|
import sys
|
|
|
|
import seaborn as sns
|
|
from pytz import timezone
|
|
|
|
nb_dir = os.path.split(os.getcwd())[0]
|
|
if nb_dir not in sys.path:
|
|
sys.path.append(nb_dir)
|
|
|
|
import participants.query_db
|
|
|
|
TZ_LJ = timezone("Europe/Ljubljana")
|
|
|
|
# %%
|
|
from features.ambient import *
|
|
|
|
# %% [markdown]
|
|
# # Light
|
|
|
|
# %%
|
|
df_light_nokia = get_ambient_data(["nokia_0000003"], "light")
|
|
print(df_light_nokia)
|
|
|
|
# %%
|
|
participants_inactive_usernames = participants.query_db.get_usernames()
|
|
df_light_inactive = get_ambient_data(participants_inactive_usernames, "light")
|
|
|
|
# %%
|
|
df_light_inactive.accuracy.value_counts()
|
|
|
|
# %% [markdown]
|
|
# From [SensorManager](https://developer.android.com/reference/android/hardware/SensorManager.html#SENSOR_STATUS_ACCURACY_HIGH):
|
|
#
|
|
# ```java
|
|
# public static final int SENSOR_STATUS_ACCURACY_HIGH
|
|
# ```
|
|
#
|
|
# This sensor is reporting data with maximum accuracy
|
|
#
|
|
# Constant Value: 3 (0x00000003)
|
|
|
|
# %%
|
|
df_light_inactive.double_light_lux.describe()
|
|
|
|
# %%
|
|
df_light_plot = df_light_inactive.copy()
|
|
df_light_plot["double_light_lux"] = df_light_plot["double_light_lux"] + 1
|
|
sns.displot(
|
|
data=df_light_plot,
|
|
x="double_light_lux",
|
|
binwidth=0.1,
|
|
log_scale=(True, False),
|
|
height=8,
|
|
)
|
|
|
|
# %% [markdown]
|
|
# The official SensorManager Light constants are:
|
|
# * Cloudy sky: 100.0
|
|
# * Full moon: 0.25
|
|
# * No moon: 0.001
|
|
# * Overcast: 10000.0
|
|
# * Shade: 20000.0
|
|
# * Sunlight: 110000.0
|
|
# * Sunlight maximum: 120000.0
|
|
# * Sunrise: 400.0
|
|
#
|
|
|
|
# %%
|
|
df_light_low = df_light_inactive[df_light_inactive["double_light_lux"] <= 10]
|
|
sns.displot(data=df_light_low, x="double_light_lux", binwidth=0.5, height=8)
|
|
|
|
# %%
|
|
df_light_very_low = df_light_low[df_light_low["double_light_lux"] < 0.5]
|
|
df_light_very_low.double_light_lux.value_counts()
|
|
|
|
# %%
|
|
df_light_nokia["datetime_lj"] = df_light_nokia["timestamp"].apply(
|
|
lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ)
|
|
)
|
|
df_light_nokia.loc[df_light_nokia["double_light_lux"] == 0, ["datetime_lj"]]
|
|
|
|
# %% [markdown]
|
|
# Zeroes are present during the day. It does happens when the sensor is physically blocked.
|
|
|
|
# %% [markdown]
|
|
# ## Differences between participants
|
|
|
|
# %%
|
|
df_light_participants = (
|
|
df_light_inactive[["participant_id", "device_id", "double_light_lux"]]
|
|
.groupby(["participant_id", "device_id"])
|
|
.agg(["mean", "median", "std", "min", "max"])
|
|
.reset_index(col_level=1)
|
|
)
|
|
df_light_participants.columns = df_light_participants.columns.get_level_values(1)
|
|
|
|
# %%
|
|
df_light_participants[df_light_participants["min"] > 0]
|
|
|
|
# %%
|
|
df_light_inactive[
|
|
df_light_inactive["device_id"] == "3188b03e-8b6f-45da-894e-769eed81bbda"
|
|
].shape
|
|
|
|
# %% [markdown]
|
|
# This was a Lenovo Vibe K6, but the small range of values is due to a reinstallation shortly after the first (unsuccessful) installation.
|
|
|
|
# %%
|
|
sns.displot(data=df_light_participants, x="mean", binwidth=0.1, log_scale=(True, False))
|
|
|
|
# %%
|
|
sns.displot(data=df_light_participants, x="max", binwidth=0.1, log_scale=(True, False))
|
|
|
|
# %% [markdown]
|
|
# Variability in means is probably due to variability in maxima.
|
|
|
|
# %%
|
|
histogram_median = sns.displot(
|
|
data=df_light_participants, x="median", binwidth=50, log_scale=(False, False)
|
|
)
|
|
|
|
# %%
|
|
df_light_participants[df_light_participants["median"] > 1e4]
|
|
|
|
# %% [markdown]
|
|
# This was a Cubot KingKong Mini 2 phone.
|
|
|
|
# %%
|
|
histogram_median = sns.displot(
|
|
data=df_light_participants, x="median", binwidth=50, log_scale=(False, False)
|
|
)
|
|
histogram_median.set(xlim=(0, 600))
|
|
|
|
# %% [markdown]
|
|
# Other medians are much more similar.
|
|
|
|
# %%
|
|
df_light_participants["std_rel"] = (
|
|
df_light_participants["std"] / df_light_participants["max"]
|
|
)
|
|
|
|
# %%
|
|
sns.displot(data=df_light_participants, x="std_rel", binwidth=0.005)
|
|
|
|
# %% [markdown]
|
|
# Relative variability is homogeneous.
|
|
#
|
|
# This means that light data needs to be standardized. Min/max standardization would probably fit best.
|
|
|
|
# %% [markdown]
|
|
# # Barometer
|
|
|
|
# %% [markdown]
|
|
# ## Barometer sensor
|
|
|
|
# %%
|
|
df_barometer_sensor_samsung = get_ambient_data(["samsung_0000002"], "barometer_sensor")
|
|
df_barometer_sensor_samsung.shape
|
|
|
|
# %% [markdown]
|
|
# Even though we have many values for this sensor, they are all repeated as seen below.
|
|
|
|
# %%
|
|
barometer_sensor_cols = df_barometer_sensor_samsung.columns.to_list()
|
|
barometer_sensor_cols.remove("id")
|
|
barometer_sensor_cols.remove("_id")
|
|
barometer_sensor_cols.remove("timestamp")
|
|
barometer_sensor_cols.remove("device_id")
|
|
print(df_barometer_sensor_samsung.drop_duplicates(subset=barometer_sensor_cols))
|
|
|
|
# %% [markdown]
|
|
# ## Barometer data
|
|
|
|
# %%
|
|
df_barometer_samsung = get_ambient_data(["samsung_0000002"], "barometer")
|
|
print(df_barometer_samsung)
|
|
|
|
# %%
|
|
df_barometer_inactive = get_ambient_data(participants_inactive_usernames, "barometer")
|
|
|
|
# %%
|
|
df_barometer_inactive.accuracy.value_counts()
|
|
|
|
# %%
|
|
df_barometer_inactive.participant_id.nunique()
|
|
|
|
# %%
|
|
df_barometer_inactive.double_values_0.describe()
|
|
|
|
# %% [markdown]
|
|
# From [Wikipedia](https://en.wikipedia.org/wiki/Atmospheric_pressure#Mean_sea-level_pressure):
|
|
#
|
|
# > The lowest measurable sea-level pressure is found at the centers of tropical cyclones and tornadoes, with a record low of 870 mbar (87 kPa; 26 inHg).
|
|
|
|
# %%
|
|
df_barometer_inactive[df_barometer_inactive["double_values_0"] < 870]
|
|
|
|
# %%
|
|
sns.displot(
|
|
data=df_barometer_inactive[df_barometer_inactive["double_values_0"] > 870],
|
|
x="double_values_0",
|
|
binwidth=10,
|
|
height=8,
|
|
)
|
|
|
|
# %% [markdown]
|
|
# # Temperature data
|
|
|
|
# %% [markdown]
|
|
# ## Temperature sensor
|
|
|
|
# %% [markdown]
|
|
# This table is empty.
|
|
|
|
# %% [markdown]
|
|
# ## Temperature data
|
|
|
|
# %% [markdown]
|
|
# This table is empty.
|