import json

import numpy as np
import pandas as pd

ESM_TYPE = {
    "text": 1,
    "radio": 2,
    "checkbox": 3,
    "likert": 4,
    "quick_answers": 5,
    "scale": 6,
    "datetime": 7,
    "pam": 8,
    "number": 9,
    "web": 10,
    "date": 11,
}

QUESTIONNAIRE_IDS = {
    "sleep_quality": 1,
    "PANAS": {
        "positive_affect": 8,
        "negative_affect": 9
    },
    "job_content_questionnaire": {
        "job_demand": 10,
        "job_control": 11,
        "supervisor_support": 12,
        "coworker_support": 13,
    },
    "PFITS": {
        "supervisor": 14,
        "coworkers": 15
    },
    "UWES": {
        "vigor": 16,
        "dedication": 17,
        "absorption": 18
    },
    "COPE": {
        "active": 19,
        "support": 20,
        "emotions": 21
    },
    "work_life_balance": {
        "life_work": 22,
        "work_life": 23
    },
    "recovery_experience": {
        "detachment": 24,
        "relaxation": 25
    },
    "symptoms": 26,
    "stress_appraisal": {
        "stressfulness_event": 87,
        "threat": 88,
        "challenge": 89,
        "event_time": 90,
        "event_duration": 91,
        "event_work_related": 92,
        "stressfulness_period": 93,
    },
    "late_work": 94,
    "work_hours": 95,
    "left_work": 96,
    "activities": 97,
    "coffee_breaks": 98,
    "at_work_yet": 99,
}

ESM_STATUS_ANSWERED = 2

GROUP_SESSIONS_BY = ["participant_id", "device_id", "esm_session"]

SESSION_STATUS_UNANSWERED = "ema_unanswered"
SESSION_STATUS_DAY_FINISHED = "day_finished"
SESSION_STATUS_COMPLETE = "ema_completed"

ANSWER_DAY_FINISHED = "DayFinished3421"
ANSWER_DAY_OFF = "DayOff3421"
ANSWER_SET_EVENING = "DayFinishedSetEvening"

MAX_MORNING_LENGTH = 3
# When the participants was not yet at work at the time of the first (morning) EMA,
# only three items were answered.
# Two sleep related items and one indicating NOT starting work yet.
# Daytime EMAs are all longer, in fact they always consist of at least 6 items.


def preprocess_esm(df_esm: pd.DataFrame) -> pd.DataFrame:
    """
    Convert timestamps into human-readable datetimes and dates
    and expand the JSON column into several Pandas DF columns.

    Parameters
    ----------
    df_esm: pd.DataFrame
        A dataframe of esm data.

    Returns
    -------
    df_esm_preprocessed: pd.DataFrame
        A dataframe with added columns: datetime in Ljubljana timezone and all fields from ESM_JSON column.
    """
    df_esm_json = df_esm["esm_json"].apply(json.loads)
    df_esm_json = pd.json_normalize(df_esm_json).drop(
        columns=["esm_trigger"]
    )  # The esm_trigger column is already present in the main df.
    return df_esm.join(df_esm_json)


def clean_up_esm(df_esm_preprocessed: pd.DataFrame) -> pd.DataFrame:
    """
    This function eliminates invalid ESM responses.
    It removes unanswered ESMs and those that indicate end of work and similar.
    It also extracts a numeric answer from strings such as "4 - I strongly agree".

    Parameters
    ----------
    df_esm_preprocessed: pd.DataFrame
        A preprocessed dataframe of esm data.

    Returns
    -------
    df_esm_clean: pd.DataFrame
        A subset of the original dataframe.

    """
    df_esm_clean = df_esm_preprocessed[
        df_esm_preprocessed["esm_status"] == ESM_STATUS_ANSWERED
    ]
    df_esm_clean = df_esm_clean[
        ~df_esm_clean["esm_user_answer"].isin(
            [ANSWER_DAY_FINISHED, ANSWER_DAY_OFF, ANSWER_SET_EVENING]
        )
    ]
    df_esm_clean["esm_user_answer_numeric"] = np.nan
    esm_type_numeric = [
        ESM_TYPE.get("radio"),
        ESM_TYPE.get("scale"),
        ESM_TYPE.get("number"),
    ]
    df_esm_clean.loc[
        df_esm_clean["esm_type"].isin(esm_type_numeric)
    ] = df_esm_clean.loc[df_esm_clean["esm_type"].isin(esm_type_numeric)].assign(
        esm_user_answer_numeric=lambda x: x.esm_user_answer.str.slice(stop=1).astype(
            int
        )
    )
    return df_esm_clean