# --- # jupyter: # jupytext: # formats: ipynb,py:percent # text_representation: # extension: .py # format_name: percent # format_version: '1.3' # jupytext_version: 1.13.0 # kernelspec: # display_name: straw2analysis # language: python # name: straw2analysis # --- # %% # %matplotlib inline import datetime import os import sys import seaborn as sns from pytz import timezone nb_dir = os.path.split(os.getcwd())[0] if nb_dir not in sys.path: sys.path.append(nb_dir) import participants.query_db TZ_LJ = timezone("Europe/Ljubljana") # %% from features.ambient import * # %% [markdown] # # Light # %% df_light_nokia = get_ambient_data(["nokia_0000003"], "light") print(df_light_nokia) # %% participants_inactive_usernames = participants.query_db.get_usernames() df_light_inactive = get_ambient_data(participants_inactive_usernames, "light") # %% df_light_inactive.accuracy.value_counts() # %% [markdown] # From [SensorManager](https://developer.android.com/reference/android/hardware/SensorManager.html#SENSOR_STATUS_ACCURACY_HIGH): # # ```java # public static final int SENSOR_STATUS_ACCURACY_HIGH # ``` # # This sensor is reporting data with maximum accuracy # # Constant Value: 3 (0x00000003) # %% df_light_inactive.double_light_lux.describe() # %% df_light_plot = df_light_inactive.copy() df_light_plot["double_light_lux"] = df_light_plot["double_light_lux"] + 1 sns.displot( data=df_light_plot, x="double_light_lux", binwidth=0.1, log_scale=(True, False), height=8, ) # %% [markdown] # The official SensorManager Light constants are: # * Cloudy sky: 100.0 # * Full moon: 0.25 # * No moon: 0.001 # * Overcast: 10000.0 # * Shade: 20000.0 # * Sunlight: 110000.0 # * Sunlight maximum: 120000.0 # * Sunrise: 400.0 # # %% df_light_low = df_light_inactive[df_light_inactive["double_light_lux"] <= 10] sns.displot(data=df_light_low, x="double_light_lux", binwidth=0.5, height=8) # %% df_light_very_low = df_light_low[df_light_low["double_light_lux"] < 0.5] df_light_very_low.double_light_lux.value_counts() # %% df_light_nokia["datetime_lj"] = df_light_nokia["timestamp"].apply( lambda x: datetime.datetime.fromtimestamp(x / 1000.0, tz=TZ_LJ) ) df_light_nokia.loc[df_light_nokia["double_light_lux"] == 0, ["datetime_lj"]] # %% [markdown] # Zeroes are present during the day. It does happens when the sensor is physically blocked. # %% [markdown] # ## Differences between participants # %% df_light_participants = ( df_light_inactive[["participant_id", "device_id", "double_light_lux"]] .groupby(["participant_id", "device_id"]) .agg(["mean", "median", "std", "min", "max"]) .reset_index(col_level=1) ) df_light_participants.columns = df_light_participants.columns.get_level_values(1) # %% df_light_participants[df_light_participants["min"] > 0] # %% df_light_inactive[ df_light_inactive["device_id"] == "3188b03e-8b6f-45da-894e-769eed81bbda" ].shape # %% [markdown] # This was a Lenovo Vibe K6, but the small range of values is due to a reinstallation shortly after the first (unsuccessful) installation. # %% sns.displot(data=df_light_participants, x="mean", binwidth=0.1, log_scale=(True, False)) # %% sns.displot(data=df_light_participants, x="max", binwidth=0.1, log_scale=(True, False)) # %% [markdown] # Variability in means is probably due to variability in maxima. # %% histogram_median = sns.displot( data=df_light_participants, x="median", binwidth=50, log_scale=(False, False) ) # %% df_light_participants[df_light_participants["median"] > 1e4] # %% [markdown] # This was a Cubot KingKong Mini 2 phone. # %% histogram_median = sns.displot( data=df_light_participants, x="median", binwidth=50, log_scale=(False, False) ) histogram_median.set(xlim=(0, 600)) # %% [markdown] # Other medians are much more similar. # %% df_light_participants["std_rel"] = ( df_light_participants["std"] / df_light_participants["max"] ) # %% sns.displot(data=df_light_participants, x="std_rel", binwidth=0.005) # %% [markdown] # Relative variability is homogeneous. # # This means that light data needs to be standardized. Min/max standardization would probably fit best. # %% [markdown] # # Barometer # %% [markdown] # ## Barometer sensor # %% df_barometer_sensor_samsung = get_ambient_data(["samsung_0000002"], "barometer_sensor") df_barometer_sensor_samsung.shape # %% [markdown] # Even though we have many values for this sensor, they are all repeated as seen below. # %% barometer_sensor_cols = df_barometer_sensor_samsung.columns.to_list() barometer_sensor_cols.remove("id") barometer_sensor_cols.remove("_id") barometer_sensor_cols.remove("timestamp") barometer_sensor_cols.remove("device_id") print(df_barometer_sensor_samsung.drop_duplicates(subset=barometer_sensor_cols)) # %% [markdown] # ## Barometer data # %% df_barometer_samsung = get_ambient_data(["samsung_0000002"], "barometer") print(df_barometer_samsung) # %% df_barometer_inactive = get_ambient_data(participants_inactive_usernames, "barometer") # %% df_barometer_inactive.accuracy.value_counts() # %% df_barometer_inactive.participant_id.nunique() # %% df_barometer_inactive.double_values_0.describe() # %% [markdown] # From [Wikipedia](https://en.wikipedia.org/wiki/Atmospheric_pressure#Mean_sea-level_pressure): # # > The lowest measurable sea-level pressure is found at the centers of tropical cyclones and tornadoes, with a record low of 870 mbar (87 kPa; 26 inHg). # %% df_barometer_inactive[df_barometer_inactive["double_values_0"] < 870] # %% sns.displot( data=df_barometer_inactive[df_barometer_inactive["double_values_0"] > 870], x="double_values_0", binwidth=10, height=8, ) # %% [markdown] # # Temperature data # %% [markdown] # ## Temperature sensor # %% [markdown] # This table is empty. # %% [markdown] # ## Temperature data # %% [markdown] # This table is empty.