Explore baseline screening data in a notebook.
parent
d5cd76f05a
commit
7b88f62e0a
|
@ -0,0 +1,75 @@
|
|||
# ---
|
||||
# jupyter:
|
||||
# jupytext:
|
||||
# formats: ipynb,py:percent
|
||||
# text_representation:
|
||||
# extension: .py
|
||||
# format_name: percent
|
||||
# format_version: '1.3'
|
||||
# jupytext_version: 1.11.2
|
||||
# kernelspec:
|
||||
# display_name: straw2analysis
|
||||
# language: python
|
||||
# name: straw2analysis
|
||||
# ---
|
||||
|
||||
# %%
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import seaborn as sns
|
||||
import pandas as pd
|
||||
|
||||
nb_dir = os.path.split(os.getcwd())[0]
|
||||
if nb_dir not in sys.path: sys.path.append(nb_dir)
|
||||
|
||||
import participants.query_db
|
||||
|
||||
# %%
|
||||
baseline_si = pd.read_csv('E:/STRAWbaseline/results-survey637813.csv')
|
||||
baseline_be = pd.read_csv('E:/STRAWbaseline/results-survey358134.csv')
|
||||
|
||||
# %%
|
||||
participants_inactive_usernames = participants.query_db.get_usernames(collection_start=datetime.date.fromisoformat("2020-08-01"))
|
||||
|
||||
# %%
|
||||
baseline = pd.concat([baseline_si, baseline_be], join="inner").reset_index().drop(columns="index")
|
||||
baseline_inactive = baseline[baseline["Gebruikersnaam"].isin(participants_inactive_usernames)]
|
||||
|
||||
# %%
|
||||
baseline
|
||||
|
||||
# %%
|
||||
participants_inactive_usernames = pd.Series(participants.query_db.get_usernames(collection_start=datetime.date.fromisoformat("2020-08-01")))
|
||||
|
||||
# %% [markdown]
|
||||
# # Demographic information
|
||||
|
||||
# %% [markdown]
|
||||
# ## Numerus
|
||||
|
||||
# %%
|
||||
print(baseline_inactive.shape[0])
|
||||
print(participants_inactive_usernames.shape[0])
|
||||
|
||||
# %%
|
||||
participants_inactive_usernames[~participants_inactive_usernames.isin(baseline["Gebruikersnaam"])].sort_values()
|
||||
|
||||
# %%
|
||||
baseline_inactive["startlanguage"].value_counts()
|
||||
|
||||
# %%
|
||||
baseline_inactive["Geslacht"].value_counts()
|
||||
|
||||
# %%
|
||||
now = pd.Timestamp('now')
|
||||
baseline_inactive = baseline_inactive.assign(dob = lambda x: pd.to_datetime(x.Geboortedatum),
|
||||
age = lambda x: now - x.dob)
|
||||
|
||||
# %%
|
||||
baseline_inactive["age"].describe()
|
||||
|
||||
# %%
|
||||
3710/365.25
|
||||
|
||||
# %%
|
Loading…
Reference in New Issue