From 9580533c1418b9a7e8b545308d541188c922c6f0 Mon Sep 17 00:00:00 2001 From: junos Date: Fri, 23 Jul 2021 18:50:17 +0200 Subject: [PATCH] Explore between-participants differences in light. --- exploration/expl_light.py | 64 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/exploration/expl_light.py b/exploration/expl_light.py index b24ae26..65a06b0 100644 --- a/exploration/expl_light.py +++ b/exploration/expl_light.py @@ -102,3 +102,67 @@ df_light_nokia.loc[df_light_nokia["double_light_lux"] == 0, ["datetime_lj"]] # %% [markdown] # Zeroes are present during the day. It does happens when the sensor is physically blocked. +# %% [markdown] +# # Differences between participants + +# %% +df_light_participants = ( + df_light_inactive[["participant_id", "device_id", "double_light_lux"]] + .groupby(["participant_id", "device_id"]) + .agg(["mean", "median", "std", "min", "max"]) + .reset_index(col_level=1) +) +df_light_participants.columns = df_light_participants.columns.get_level_values(1) + +# %% +df_light_participants[df_light_participants["min"] > 0] + +# %% +df_light_inactive[ + df_light_inactive["device_id"] == "3188b03e-8b6f-45da-894e-769eed81bbda" +].shape + +# %% [markdown] +# This was a Lenovo Vibe K6, but the small range of values is due to a reinstallation shortly after the first (unsuccessful) installation. + +# %% +sns.displot(data=df_light_participants, x="mean", binwidth=0.1, log_scale=(True, False)) + +# %% +sns.displot(data=df_light_participants, x="max", binwidth=0.1, log_scale=(True, False)) + +# %% [markdown] +# Variability in means is probably due to variability in maxima. + +# %% +histogram_median = sns.displot( + data=df_light_participants, x="median", binwidth=50, log_scale=(False, False) +) + +# %% +df_light_participants[df_light_participants["median"] > 1e4] + +# %% [markdown] +# This was a Cubot KingKong Mini 2 phone. + +# %% +histogram_median = sns.displot( + data=df_light_participants, x="median", binwidth=50, log_scale=(False, False) +) +histogram_median.set(xlim=(0, 600)) + +# %% [markdown] +# Other medians are much more similar. + +# %% +df_light_participants["std_rel"] = ( + df_light_participants["std"] / df_light_participants["max"] +) + +# %% +sns.displot(data=df_light_participants, x="std_rel", binwidth=0.005) + +# %% [markdown] +# Relative variability is homogeneous. +# +# This means that light data needs to be standardized. Min/max standardization would probably fit best.