2021-07-23 16:42:16 +02:00
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:percent
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
2021-10-14 17:59:33 +02:00
# jupytext_version: 1.13.0
2021-07-23 16:42:16 +02:00
# kernelspec:
# display_name: straw2analysis
# language: python
# name: straw2analysis
# ---
# %%
2021-07-23 18:28:02 +02:00
# %matplotlib inline
import datetime
2021-07-23 16:42:16 +02:00
import os
import sys
import seaborn as sns
2021-07-23 18:28:02 +02:00
from pytz import timezone
2021-07-23 16:42:16 +02:00
nb_dir = os . path . split ( os . getcwd ( ) ) [ 0 ]
if nb_dir not in sys . path :
sys . path . append ( nb_dir )
import participants . query_db
2021-07-23 18:28:02 +02:00
TZ_LJ = timezone ( " Europe/Ljubljana " )
2021-07-23 16:42:16 +02:00
# %%
2021-10-13 16:57:38 +02:00
from features . ambient import *
2021-07-23 16:42:16 +02:00
2021-07-23 18:28:02 +02:00
# %% [markdown]
2021-10-13 16:57:38 +02:00
# # Light
2021-07-23 18:28:02 +02:00
2021-07-23 16:42:16 +02:00
# %%
2021-10-13 16:57:38 +02:00
df_light_nokia = get_ambient_data ( [ " nokia_0000003 " ] , " light " )
2021-07-23 16:42:16 +02:00
print ( df_light_nokia )
# %%
participants_inactive_usernames = participants . query_db . get_usernames ( )
2021-10-13 16:57:38 +02:00
df_light_inactive = get_ambient_data ( participants_inactive_usernames , " light " )
2021-07-23 16:42:16 +02:00
# %%
df_light_inactive . accuracy . value_counts ( )
2021-07-23 18:28:02 +02:00
# %% [markdown]
# From [SensorManager](https://developer.android.com/reference/android/hardware/SensorManager.html#SENSOR_STATUS_ACCURACY_HIGH):
#
# ```java
# public static final int SENSOR_STATUS_ACCURACY_HIGH
# ```
#
# This sensor is reporting data with maximum accuracy
#
# Constant Value: 3 (0x00000003)
2021-07-23 16:42:16 +02:00
# %%
df_light_inactive . double_light_lux . describe ( )
# %%
df_light_plot = df_light_inactive . copy ( )
df_light_plot [ " double_light_lux " ] = df_light_plot [ " double_light_lux " ] + 1
sns . displot (
data = df_light_plot ,
x = " double_light_lux " ,
binwidth = 0.1 ,
log_scale = ( True , False ) ,
height = 8 ,
)
# %% [markdown]
# The official SensorManager Light constants are:
# * Cloudy sky: 100.0
# * Full moon: 0.25
# * No moon: 0.001
# * Overcast: 10000.0
# * Shade: 20000.0
# * Sunlight: 110000.0
# * Sunlight maximum: 120000.0
# * Sunrise: 400.0
#
# %%
df_light_low = df_light_inactive [ df_light_inactive [ " double_light_lux " ] < = 10 ]
sns . displot ( data = df_light_low , x = " double_light_lux " , binwidth = 0.5 , height = 8 )
# %%
2021-07-23 18:28:02 +02:00
df_light_very_low = df_light_low [ df_light_low [ " double_light_lux " ] < 0.5 ]
df_light_very_low . double_light_lux . value_counts ( )
# %%
df_light_nokia [ " datetime_lj " ] = df_light_nokia [ " timestamp " ] . apply (
lambda x : datetime . datetime . fromtimestamp ( x / 1000.0 , tz = TZ_LJ )
)
df_light_nokia . loc [ df_light_nokia [ " double_light_lux " ] == 0 , [ " datetime_lj " ] ]
# %% [markdown]
# Zeroes are present during the day. It does happens when the sensor is physically blocked.
2021-07-23 18:50:17 +02:00
# %% [markdown]
2021-10-14 17:59:33 +02:00
# ## Differences between participants
2021-07-23 18:50:17 +02:00
# %%
df_light_participants = (
df_light_inactive [ [ " participant_id " , " device_id " , " double_light_lux " ] ]
. groupby ( [ " participant_id " , " device_id " ] )
. agg ( [ " mean " , " median " , " std " , " min " , " max " ] )
. reset_index ( col_level = 1 )
)
df_light_participants . columns = df_light_participants . columns . get_level_values ( 1 )
# %%
df_light_participants [ df_light_participants [ " min " ] > 0 ]
# %%
df_light_inactive [
df_light_inactive [ " device_id " ] == " 3188b03e-8b6f-45da-894e-769eed81bbda "
] . shape
# %% [markdown]
# This was a Lenovo Vibe K6, but the small range of values is due to a reinstallation shortly after the first (unsuccessful) installation.
# %%
sns . displot ( data = df_light_participants , x = " mean " , binwidth = 0.1 , log_scale = ( True , False ) )
# %%
sns . displot ( data = df_light_participants , x = " max " , binwidth = 0.1 , log_scale = ( True , False ) )
# %% [markdown]
# Variability in means is probably due to variability in maxima.
# %%
histogram_median = sns . displot (
data = df_light_participants , x = " median " , binwidth = 50 , log_scale = ( False , False )
)
# %%
df_light_participants [ df_light_participants [ " median " ] > 1e4 ]
# %% [markdown]
# This was a Cubot KingKong Mini 2 phone.
# %%
histogram_median = sns . displot (
data = df_light_participants , x = " median " , binwidth = 50 , log_scale = ( False , False )
)
histogram_median . set ( xlim = ( 0 , 600 ) )
# %% [markdown]
# Other medians are much more similar.
# %%
df_light_participants [ " std_rel " ] = (
df_light_participants [ " std " ] / df_light_participants [ " max " ]
)
# %%
sns . displot ( data = df_light_participants , x = " std_rel " , binwidth = 0.005 )
# %% [markdown]
# Relative variability is homogeneous.
#
# This means that light data needs to be standardized. Min/max standardization would probably fit best.
2021-10-13 16:57:38 +02:00
# %% [markdown]
# # Barometer
# %% [markdown]
# ## Barometer sensor
# %%
df_barometer_sensor_samsung = get_ambient_data ( [ " samsung_0000002 " ] , " barometer_sensor " )
df_barometer_sensor_samsung . shape
# %% [markdown]
# Even though we have many values for this sensor, they are all repeated as seen below.
# %%
barometer_sensor_cols = df_barometer_sensor_samsung . columns . to_list ( )
barometer_sensor_cols . remove ( " id " )
barometer_sensor_cols . remove ( " _id " )
barometer_sensor_cols . remove ( " timestamp " )
barometer_sensor_cols . remove ( " device_id " )
print ( df_barometer_sensor_samsung . drop_duplicates ( subset = barometer_sensor_cols ) )
2021-10-14 17:59:33 +02:00
# %% [markdown]
# ## Barometer data
# %%
df_barometer_samsung = get_ambient_data ( [ " samsung_0000002 " ] , " barometer " )
print ( df_barometer_samsung )
# %%
df_barometer_inactive = get_ambient_data ( participants_inactive_usernames , " barometer " )
# %%
df_barometer_inactive . accuracy . value_counts ( )
# %%
df_barometer_inactive . participant_id . nunique ( )
# %%
df_barometer_inactive . double_values_0 . describe ( )
# %% [markdown]
# From [Wikipedia](https://en.wikipedia.org/wiki/Atmospheric_pressure#Mean_sea-level_pressure):
#
# > The lowest measurable sea-level pressure is found at the centers of tropical cyclones and tornadoes, with a record low of 870 mbar (87 kPa; 26 inHg).
2021-10-13 16:57:38 +02:00
# %%
2021-10-14 17:59:33 +02:00
df_barometer_inactive [ df_barometer_inactive [ " double_values_0 " ] < 870 ]
2021-10-13 16:57:38 +02:00
# %%
2021-10-14 17:59:33 +02:00
sns . displot (
data = df_barometer_inactive [ df_barometer_inactive [ " double_values_0 " ] > 870 ] ,
x = " double_values_0 " ,
binwidth = 10 ,
height = 8 ,
)
# %% [markdown]
# # Temperature data
# %% [markdown]
# ## Temperature sensor
# %% [markdown]
# This table is empty.
# %% [markdown]
# ## Temperature data
# %% [markdown]
# This table is empty.