2021-06-08 11:21:18 +02:00
|
|
|
# ---
|
|
|
|
# jupyter:
|
|
|
|
# jupytext:
|
|
|
|
# formats: ipynb,py:percent
|
|
|
|
# text_representation:
|
|
|
|
# extension: .py
|
|
|
|
# format_name: percent
|
|
|
|
# format_version: '1.3'
|
|
|
|
# jupytext_version: 1.11.2
|
|
|
|
# kernelspec:
|
|
|
|
# display_name: straw2analysis
|
|
|
|
# language: python
|
|
|
|
# name: straw2analysis
|
|
|
|
# ---
|
|
|
|
|
2021-06-11 14:50:14 +02:00
|
|
|
import datetime
|
|
|
|
|
2021-06-08 11:21:18 +02:00
|
|
|
# %%
|
|
|
|
import os
|
|
|
|
import sys
|
2021-06-11 14:50:14 +02:00
|
|
|
|
2021-06-08 11:21:18 +02:00
|
|
|
import pandas as pd
|
2021-06-11 14:50:14 +02:00
|
|
|
import seaborn as sns
|
2021-06-08 11:21:18 +02:00
|
|
|
|
|
|
|
nb_dir = os.path.split(os.getcwd())[0]
|
2021-06-11 14:50:14 +02:00
|
|
|
if nb_dir not in sys.path:
|
|
|
|
sys.path.append(nb_dir)
|
2021-06-08 11:21:18 +02:00
|
|
|
|
|
|
|
import participants.query_db
|
|
|
|
|
|
|
|
# %%
|
2021-06-11 14:50:14 +02:00
|
|
|
baseline_si = pd.read_csv("E:/STRAWbaseline/results-survey637813.csv")
|
|
|
|
baseline_be_1 = pd.read_csv("E:/STRAWbaseline/results-survey358134.csv")
|
|
|
|
baseline_be_2 = pd.read_csv("E:/STRAWbaseline/results-survey413767.csv")
|
2021-06-08 11:21:18 +02:00
|
|
|
|
|
|
|
# %%
|
2021-06-11 14:50:14 +02:00
|
|
|
participants_inactive_usernames = participants.query_db.get_usernames(
|
|
|
|
collection_start=datetime.date.fromisoformat("2020-08-01")
|
|
|
|
)
|
2021-06-08 11:21:18 +02:00
|
|
|
|
|
|
|
# %%
|
2021-06-11 14:50:14 +02:00
|
|
|
baseline = (
|
|
|
|
pd.concat([baseline_si, baseline_be_1, baseline_be_2], join="inner")
|
|
|
|
.reset_index()
|
|
|
|
.drop(columns="index")
|
|
|
|
)
|
|
|
|
baseline_inactive = baseline[
|
|
|
|
baseline["Gebruikersnaam"].isin(participants_inactive_usernames)
|
|
|
|
]
|
2021-06-08 11:21:18 +02:00
|
|
|
|
|
|
|
# %%
|
|
|
|
baseline
|
|
|
|
|
|
|
|
# %%
|
2021-06-11 14:50:14 +02:00
|
|
|
participants_inactive_usernames = pd.Series(
|
|
|
|
participants.query_db.get_usernames(
|
|
|
|
collection_start=datetime.date.fromisoformat("2020-08-01")
|
|
|
|
)
|
|
|
|
)
|
2021-06-08 11:21:18 +02:00
|
|
|
|
|
|
|
# %% [markdown]
|
|
|
|
# # Demographic information
|
|
|
|
|
|
|
|
# %% [markdown]
|
|
|
|
# ## Numerus
|
|
|
|
|
|
|
|
# %%
|
|
|
|
print(baseline_inactive.shape[0])
|
|
|
|
print(participants_inactive_usernames.shape[0])
|
|
|
|
|
|
|
|
# %%
|
2021-06-11 14:50:14 +02:00
|
|
|
participants_inactive_usernames[
|
|
|
|
~participants_inactive_usernames.isin(baseline["Gebruikersnaam"])
|
|
|
|
].sort_values()
|
2021-06-08 11:21:18 +02:00
|
|
|
|
|
|
|
# %%
|
|
|
|
baseline_inactive["startlanguage"].value_counts()
|
|
|
|
|
|
|
|
# %%
|
|
|
|
baseline_inactive["Geslacht"].value_counts()
|
|
|
|
|
|
|
|
# %%
|
2021-06-11 14:50:14 +02:00
|
|
|
now = pd.Timestamp("now")
|
|
|
|
baseline_inactive = baseline_inactive.assign(
|
|
|
|
dob=lambda x: pd.to_datetime(x.Geboortedatum), age=lambda x: now - x.dob
|
|
|
|
)
|
2021-06-08 11:21:18 +02:00
|
|
|
|
|
|
|
# %%
|
|
|
|
baseline_inactive["age"].describe()
|
|
|
|
|
|
|
|
# %%
|
2021-06-14 17:09:45 +02:00
|
|
|
3618 / 365.25
|
2021-06-08 11:21:18 +02:00
|
|
|
|
|
|
|
# %%
|