diff --git a/exploration/expl_baseline.py b/exploration/expl_baseline.py new file mode 100644 index 0000000..331ad7e --- /dev/null +++ b/exploration/expl_baseline.py @@ -0,0 +1,75 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.11.2 +# kernelspec: +# display_name: straw2analysis +# language: python +# name: straw2analysis +# --- + +# %% +import os +import sys +import datetime +import seaborn as sns +import pandas as pd + +nb_dir = os.path.split(os.getcwd())[0] +if nb_dir not in sys.path: sys.path.append(nb_dir) + +import participants.query_db + +# %% +baseline_si = pd.read_csv('E:/STRAWbaseline/results-survey637813.csv') +baseline_be = pd.read_csv('E:/STRAWbaseline/results-survey358134.csv') + +# %% +participants_inactive_usernames = participants.query_db.get_usernames(collection_start=datetime.date.fromisoformat("2020-08-01")) + +# %% +baseline = pd.concat([baseline_si, baseline_be], join="inner").reset_index().drop(columns="index") +baseline_inactive = baseline[baseline["Gebruikersnaam"].isin(participants_inactive_usernames)] + +# %% +baseline + +# %% +participants_inactive_usernames = pd.Series(participants.query_db.get_usernames(collection_start=datetime.date.fromisoformat("2020-08-01"))) + +# %% [markdown] +# # Demographic information + +# %% [markdown] +# ## Numerus + +# %% +print(baseline_inactive.shape[0]) +print(participants_inactive_usernames.shape[0]) + +# %% +participants_inactive_usernames[~participants_inactive_usernames.isin(baseline["Gebruikersnaam"])].sort_values() + +# %% +baseline_inactive["startlanguage"].value_counts() + +# %% +baseline_inactive["Geslacht"].value_counts() + +# %% +now = pd.Timestamp('now') +baseline_inactive = baseline_inactive.assign(dob = lambda x: pd.to_datetime(x.Geboortedatum), + age = lambda x: now - x.dob) + +# %% +baseline_inactive["age"].describe() + +# %% +3710/365.25 + +# %%