# --- # jupyter: # jupytext: # formats: ipynb,py:percent # text_representation: # extension: .py # format_name: percent # format_version: '1.3' # jupytext_version: 1.11.2 # kernelspec: # display_name: straw2analysis # language: python # name: straw2analysis # --- import datetime # %% import os import sys import pandas as pd import seaborn as sns nb_dir = os.path.split(os.getcwd())[0] if nb_dir not in sys.path: sys.path.append(nb_dir) import participants.query_db # %% baseline_si = pd.read_csv("E:/STRAWbaseline/results-survey637813.csv") baseline_be_1 = pd.read_csv("E:/STRAWbaseline/results-survey358134.csv") baseline_be_2 = pd.read_csv("E:/STRAWbaseline/results-survey413767.csv") # %% participants_inactive_usernames = participants.query_db.get_usernames( collection_start=datetime.date.fromisoformat("2020-08-01") ) # %% baseline = ( pd.concat([baseline_si, baseline_be_1, baseline_be_2], join="inner") .reset_index() .drop(columns="index") ) baseline_inactive = baseline[ baseline["Gebruikersnaam"].isin(participants_inactive_usernames) ] # %% baseline # %% participants_inactive_usernames = pd.Series( participants.query_db.get_usernames( collection_start=datetime.date.fromisoformat("2020-08-01") ) ) # %% [markdown] # # Demographic information # %% [markdown] # ## Numerus # %% print(baseline_inactive.shape[0]) print(participants_inactive_usernames.shape[0]) # %% participants_inactive_usernames[ ~participants_inactive_usernames.isin(baseline["Gebruikersnaam"]) ].sort_values() # %% baseline_inactive["startlanguage"].value_counts() # %% baseline_inactive["Geslacht"].value_counts() # %% now = pd.Timestamp("now") baseline_inactive = baseline_inactive.assign( dob=lambda x: pd.to_datetime(x.Geboortedatum), age=lambda x: now - x.dob ) # %% baseline_inactive["age"].describe() # %% 3618 / 365.25 # %%