stress_at_work_analysis/participants/prepare_usernames_file.py

70 lines
2.2 KiB
Python

import datetime
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
sys.path.append(nb_dir)
import pandas as pd
from features.timezone import get_timezone_data
from pyprojroot import here
import participants.query_db
participants_inactive_usernames = participants.query_db.get_usernames(
tester=False, # True participants are wanted.
active=False, # They have all finished their participation.
collection_start=datetime.date.fromisoformat(
"2020-08-01"
), # This is the timeframe of the main study.
last_upload=datetime.date.fromisoformat("2021-09-01"),
)
participants_overview_si = pd.read_csv(
snakemake.params["baseline_folder"] + "Participants_overview_Slovenia.csv", sep=";"
)
participants_overview_be = pd.read_csv(
snakemake.params["baseline_folder"]+ "Participants_overview_Belgium.csv", sep=";"
)
participants_true_si = participants_overview_si[
participants_overview_si["Wristband_SerialNo"] != "DECLINED"
]
participants_true_be = participants_overview_be[
participants_overview_be["SmartphoneBrand+Generation"].str.slice(0, 3) != "Not"
]
# Concatenate participants from both countries.
participants_usernames_empatica = pd.concat(
[participants_true_be, participants_true_si]
)
# Filter only the participants from the main study (queried from the database).
participants_usernames_empatica = participants_usernames_empatica[
participants_usernames_empatica["Username"].isin(participants_inactive_usernames)
]
# Rename and select columns.
participants_usernames_empatica = participants_usernames_empatica.rename(
columns={"Username": "label", "Wristband_SerialNo": "empatica_id"}
)[["label", "empatica_id"]]
# Adapt for csv export.
participants_usernames_empatica["empatica_id"] = participants_usernames_empatica[
"empatica_id"
].str.replace(",", ";")
participants_usernames_empatica.to_csv(
snakemake.output["usernames_file"],
header=True,
index=False,
line_terminator="\n",
)
timezone_df = get_timezone_data(participants_inactive_usernames)
timezone_df.to_csv(
snakemake.output["timezone_file"],
header=True,
index=False,
line_terminator="\n",
)