stress_at_work_analysis/exploration/expl_baseline.py

77 lines
1.9 KiB
Python

# ---
# jupyter:
# jupytext:
# formats: ipynb,py:percent
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.11.2
# kernelspec:
# display_name: straw2analysis
# language: python
# name: straw2analysis
# ---
# %%
import os
import sys
import datetime
import seaborn as sns
import pandas as pd
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path: sys.path.append(nb_dir)
import participants.query_db
# %%
baseline_si = pd.read_csv('E:/STRAWbaseline/results-survey637813.csv')
baseline_be_1 = pd.read_csv('E:/STRAWbaseline/results-survey358134.csv')
baseline_be_2 = pd.read_csv('E:/STRAWbaseline/results-survey413767.csv')
# %%
participants_inactive_usernames = participants.query_db.get_usernames(collection_start=datetime.date.fromisoformat("2020-08-01"))
# %%
baseline = pd.concat([baseline_si, baseline_be_1, baseline_be_2], join="inner").reset_index().drop(columns="index")
baseline_inactive = baseline[baseline["Gebruikersnaam"].isin(participants_inactive_usernames)]
# %%
baseline
# %%
participants_inactive_usernames = pd.Series(participants.query_db.get_usernames(collection_start=datetime.date.fromisoformat("2020-08-01")))
# %% [markdown]
# # Demographic information
# %% [markdown]
# ## Numerus
# %%
print(baseline_inactive.shape[0])
print(participants_inactive_usernames.shape[0])
# %%
participants_inactive_usernames[~participants_inactive_usernames.isin(baseline["Gebruikersnaam"])].sort_values()
# %%
baseline_inactive["startlanguage"].value_counts()
# %%
baseline_inactive["Geslacht"].value_counts()
# %%
now = pd.Timestamp('now')
baseline_inactive = baseline_inactive.assign(dob = lambda x: pd.to_datetime(x.Geboortedatum),
age = lambda x: now - x.dob)
# %%
baseline_inactive["age"].describe()
# %%
3710/365.25
# %%