diff --git a/exploration/ex_ml_pipeline.py b/exploration/ex_ml_pipeline.py index fec5717..328513a 100644 --- a/exploration/ex_ml_pipeline.py +++ b/exploration/ex_ml_pipeline.py @@ -99,9 +99,7 @@ df_esm_PANAS_daily_means = ( # %% -df_proximity_daily_counts = proximity.count_proximity( - df_proximity, ["date_lj"] -) +df_proximity_daily_counts = proximity.count_proximity(df_proximity, ["date_lj"]) # %% df_proximity_daily_counts diff --git a/machine_learning/prox_comm_PANAS_nb.ipynb b/machine_learning/prox_comm_PANAS_nb.ipynb new file mode 100644 index 0000000..d10de82 --- /dev/null +++ b/machine_learning/prox_comm_PANAS_nb.ipynb @@ -0,0 +1,478 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "25ba2626-2b93-48e7-b9cc-551fe03335f4", + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import yaml\n", + "from sklearn import linear_model\n", + "from sklearn.model_selection import LeaveOneGroupOut, cross_val_score\n", + "import os\n", + "import importlib\n", + "import matplotlib.pyplot as plt\n", + "import sys\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import pandas as pd\n", + "\n", + "nb_dir = os.path.split(os.getcwd())[0]\n", + "if nb_dir not in sys.path:\n", + " sys.path.append(nb_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b429e654-b065-4ea7-9dac-955584f7a016", + "metadata": {}, + "outputs": [], + "source": [ + "from machine_learning import pipeline, features_sensor, labels, model" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5f2a92e0-d6ea-49a1-9f06-d808c1bd57e9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "importlib.reload(labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "948cb320-f2c1-46a2-a42d-ab12894d321a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SensorFeatures initialized.\n", + "Calculating features ...\n", + "Read proximity features from the file.\n", + "Read communication features from the file.\n" + ] + } + ], + "source": [ + "with open(\"./config/prox_comm_PANAS_features.yaml\", \"r\") as file:\n", + " sensor_features_params = yaml.safe_load(file)\n", + "sensor_features = features_sensor.SensorFeatures(**sensor_features_params)\n", + "#sensor_features.set_sensor_data()\n", + "sensor_features.calculate_features(cached=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "872679f6-e343-4d2a-bfc5-e4e3d224c766", + "metadata": {}, + "outputs": [], + "source": [ + "all_features = sensor_features.get_features(\"all\",\"all\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "52f0f3cb-733a-4345-ab36-e52dc3c5a76c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Labels initialized.\n", + "Aggregating labels ...\n", + "Read labels from the file.\n" + ] + } + ], + "source": [ + "with open(\"./config/prox_comm_PANAS_labels.yaml\", \"r\") as file:\n", + " labels_params = yaml.safe_load(file)\n", + "labels_current = labels.Labels(**labels_params)\n", + "#labels_current.set_labels()\n", + "labels_current.aggregate_labels(cached=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c366516a-6aa6-4101-a18d-0dc35f597d87", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ModelValidation initialized.\n", + "Validation method set.\n" + ] + } + ], + "source": [ + "model_validation = model.ModelValidation(\n", + " sensor_features.get_features(\"all\", \"all\"),\n", + " labels_current.get_aggregated_labels(),\n", + " group_variable=\"participant_id\",\n", + " cv_name=\"loso\",\n", + ")\n", + "model_validation.model = linear_model.LinearRegression()\n", + "model_validation.set_cv_method()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "0eab568d-ad7f-4243-be05-26bafb310c5c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running cross validation ...\n" + ] + } + ], + "source": [ + "model_loso_r2 = model_validation.cross_validate()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "fde0151b-c259-45e8-af2e-94f37edf0b01", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[-8.50176280e-02 -3.66239404e-02 -5.77416259e-01 -9.06942628e-01\n", + " -3.13084482e+00 -5.25290051e-02 -2.32414699e+00 -7.65972845e+00\n", + " -3.65181380e+00 -9.62417876e+00 -7.44270369e-02 6.78427260e-02\n", + " -5.69919784e-01 -9.03242379e-01 -1.21151912e-01 -5.13453030e+00\n", + " -1.60384696e+00 -3.19062741e+00 -6.63847516e-01 -9.90156817e-02\n", + " -7.72057926e-01 -4.90843105e+01 3.69446095e-01 -2.08765985e+00\n", + " -1.99641377e+00 -1.29034837e+03 -3.21364491e+00 -5.77331614e-01\n", + " 0.00000000e+00 -6.84298747e-03 -1.63138097e+01 -1.66204067e+00\n", + " -2.27751119e-01 -1.33661361e+00 -9.81485624e-01 -8.49005069e+00\n", + " -1.54261232e+01 -1.07208976e+01 -5.94109632e-01 -1.46186838e-01\n", + " -3.35992820e-01 -1.56058931e-01 -4.30691060e+00 -4.02218511e+00\n", + " -1.76476411e+01 -4.87642855e-02 -5.30649694e+00 -2.17399142e-01\n", + " -4.13520657e-01 -3.86148143e+00 -8.01412328e-01 -8.23760834e+00\n", + " -2.06664107e+00 -1.18230651e+00 -2.06091099e-02 -9.97601126e-02]\n", + "-26.655054402780422\n" + ] + } + ], + "source": [ + "print(model_loso_r2)\n", + "print(np.mean(model_loso_r2))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0435685e-9998-4eff-a3ee-6edc781dde81", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.06784273, 0.36944609])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_loso_r2[model_loso_r2 > 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "c4560ac5-8c83-43d0-b6e0-b03dfd19c1c3", + "metadata": {}, + "outputs": [], + "source": [ + "logo = LeaveOneGroupOut()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "75dc95ca-ad77-4e36-bc8b-653d3b73037f", + "metadata": {}, + "outputs": [], + "source": [ + "try_X = model_validation.X.reset_index().drop([\"participant_id\",\"date_lj\"], axis=1)\n", + "try_y = model_validation.y.reset_index().drop([\"participant_id\",\"date_lj\"], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "8a58963d-a10b-468a-ae82-6395e8b2e7b5", + "metadata": {}, + "outputs": [], + "source": [ + "model_loso_mean_absolute_error = -1 * cross_val_score(\n", + "estimator=model_validation.model,\n", + "X=try_X,\n", + "y=try_y,\n", + "groups=model_validation.groups,\n", + "cv=logo.split(X=try_X, y=try_y, groups=model_validation.groups), \n", + "scoring='neg_mean_absolute_error'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c98e13d6-734f-4adc-909b-c4a400a01d3e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0.43618444, 0.39780929, 0.56970789, 0.38847095, 0.78244762,\n", + " 0.29847855, 0.4348883 , 1.80633684, 0.29097794, 0.53106755,\n", + " 0.32757327, 0.37845186, 0.30228743, 0.34129752, 0.2555845 ,\n", + " 1.27981007, 0.40270591, 0.35411635, 0.2568122 , 0.5820276 ,\n", + " 0.33293713, 0.47789249, 0.19690204, 0.68629304, 0.67457704,\n", + " 13.0369228 , 0.41234072, 0.31384332, 0.45126702, 0.34806906,\n", + " 0.52854722, 0.28707449, 0.28282637, 0.49286602, 0.26406791,\n", + " 0.39567315, 0.33661383, 1.23764371, 0.43788937, 0.32592072,\n", + " 0.47443271, 0.55999948, 0.50408039, 0.40523803, 0.50241167,\n", + " 0.30617356, 0.31461521, 0.28494495, 0.32278505, 0.29084659,\n", + " 0.47211231, 0.33807521, 0.34608592, 0.40624902, 0.22882316,\n", + " 0.45563856])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_loso_mean_absolute_error" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "cd821657-cc18-46f3-92d1-b331b863790f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.39674122009711504" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.median(model_loso_mean_absolute_error)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "ead0d898-8a96-404d-a895-b213771dc7ea", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_validation.model.fit(try_X, try_y)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "406d82e7-488c-46a3-8426-ca49e01993f5", + "metadata": {}, + "outputs": [], + "source": [ + "Y_predicted = model_validation.model.predict(try_X)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "310c6287-7d6e-4261-8c9c-3c592c822bd1", + "metadata": {}, + "outputs": [], + "source": [ + "try_y.rename(columns={\"NA\": \"NA_true\"}, inplace=True)\n", + "try_y[\"NA_predicted\"] = Y_predicted\n", + "NA_long = pd.wide_to_long(\n", + " try_y.reset_index(),\n", + " i=\"index\",\n", + " j=\"value\",\n", + " stubnames=\"NA\",\n", + " sep=\"_\",\n", + " suffix=\".+\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "62f9312d-f9d7-403c-89c0-5c04d05e76bd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAg4AAAFgCAYAAADTrfGOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAgFklEQVR4nO3de7xXdZ3v8ddHpLxgaonEyOYyDlmSCsh4GewyOiqTllmWONVoNlGOpmZnJi+PU/qY9JgaleNJD43Xk5fIcmR6NAwmeDiVqWiYoJKoCCgj2y4CR0OBz/nj94PZwIb93dv9u+zffj0fj9+D31rru36/z5el7Pf+rrW+KzITSZKkEjs0ugBJktR3GBwkSVIxg4MkSSpmcJAkScUMDpIkqdiOjS7gjZg0aVLOnDmz0WVIktRqYlsb+vSIw0svvdToEiRJ6lf6dHCQJEn1ZXCQJEnFDA6SJKmYwUGSJBUzOEiSpGIGB0mSVMzgIEmSihkcJElSMYODJEkqZnCQJEnFDA6SJKmYwUGSJBUzOEiSpGIGh35u5PBhRETxa+TwYY0uWZLUQDs2ugA11nPLnidnX1bcPo68sIbVSJKanSMOkiSpmMFBkiQVMzhIkqRiBgdJklTM4CBJkooZHCRJUjGDgyRJKmZwkCRJxQwOkiSpWM2CQ0S0RcSciHgiIhZGxDnV9RdHxPMRMb/6+kCHfS6IiMURsSgijq1VbZIkqWdqOeX0OuBLmflIROwGPBwR91S3fTMzr+rYOCL2ByYDY4A/AX4aEe/IzPU1rFGSJHVDzUYcMnNFZj5Sfb8aeALYZzu7nADckZlrM/NZYDFwSK3qkyRJ3VeXaxwiYiQwDniguuqsiPh1RNwQEXtW1+0DLOuw23K2HzQkSVKd1Tw4RMQg4IfAuZm5CrgW2BcYC6wAvrGxaSe7ZyefNyUi5kXEvPb29toULUmSOlXT4BARA6mEhlsz80cAmfliZq7PzA3Ad/mv0xHLgbYOuw8DXtjyMzNzWmZOyMwJgwcPrmX5kiRpC7W8qyKA64EnMnNqh/VDOzQ7EVhQfT8DmBwRb46IUcBo4MFa1SdJkrqvlndVTAQ+BTwWEfOr6y4ETomIsVROQywBPgeQmQsjYjrwOJU7Ms70jgpJkppLzYJDZv6Mzq9b+Ml29rkUuLRWNUmSpDfGmSMlSVIxg4MkSSpmcJAkScUMDpIkqZjBQZIkFTM4SJKkYgYHSZJUzOAgSZKKGRwkSVIxg4MkSSpmcJAkScUMDpIkqZjBQZIkFTM4SJKkYgYHSZJUzOAgSZKKGRwkSVIxg4MkSSpmcJAkScUMDpIkqZjBQZIkFTM4SJKkYgYHSZJUzODQYkYOH0ZEFL8kSeqOHRtdgHrXc8ueJ2dfVtw+jrywhtVIklqNIw6SJKmYwUGSJBUzOEiSpGIGB0mSVMzgIEmSihkcJElSMYODJEkqZnCQJEnFDA6SJKmYwUGSJBUzOEiSpGIGB0mSVMzgIEmSihkcJElSMYODJEkqZnCQJEnFDA6SJKmYwUGSJBUzOEiSpGIGB0mSVMzgIEmSihkcJElSsZoFh4hoi4g5EfFERCyMiHOq698aEfdExFPVP/fssM8FEbE4IhZFxLG1qk2SJPVMLUcc1gFfysx3AYcBZ0bE/sD5wL2ZORq4t7pMddtkYAwwCfhORAyoYX2SJKmbahYcMnNFZj5Sfb8aeALYBzgBuLna7Gbgw9X3JwB3ZObazHwWWAwcUqv6JElS99XlGoeIGAmMAx4AhmTmCqiEC2DvarN9gGUddlteXbflZ02JiHkRMa+9vb2mdUuSpM3VPDhExCDgh8C5mblqe007WZdbrciclpkTMnPC4MGDe6tMSZJUoKbBISIGUgkNt2bmj6qrX4yIodXtQ4GV1fXLgbYOuw8DXqhlfZIkqXtqeVdFANcDT2Tm1A6bZgCnVt+fCtzdYf3kiHhzRIwCRgMP1qo+SZLUfTvW8LMnAp8CHouI+dV1FwKXA9Mj4jPAUuBjAJm5MCKmA49TuSPjzMxcX8P6JElSN9UsOGTmz+j8ugWAo7axz6XApbWqSZIkvTHOHClJkooZHDrRNnwEEVH8ahs+otElS5JUF7W8xqHPWr5sKVNnLSpuf94x+9WwGkmSmocjDpIkqZjBQZIkFTM4SJKkYgYHSZJUzOAgSZKKGRwkSVIxg4MkSSpmcJAkScUMDpIkqZjBQZIkFTM4SJKkYgYHSZJUzOAgSZKKGRwkSVIxg4MkSSpmcJAkScUMDpIkqZjBQZIkFTM4SJKkYgYHSZJUzOAgSZKKGRwkSVIxg4MkSSpmcJAkScUMDpIkqZjBQZIkFTM4SJKkYgYHSZJUzOCgbhkQEBHFr5HDhzW6ZElSL9qx0QWob1mfkLMvK24fR15Yw2okSfXmiIMkSSpmcJAkScUMDpIkqZjBQZIkFTM4SJKkYgYHSZJUrMvgEBH/u2SdJElqfSUjDmM6LkTEAODg2pQjSZKa2TaDQ0RcEBGrgQMjYlX1tRpYCdxdtwolSVLT2GZwyMz/kZm7AVdm5luqr90y822ZeUEda5QkSU2iyymnM/OCiNgHGNGxfWbOrWVhkiSp+XQZHCLicmAy8Diwvro6AYODJEn9TMlDrk4E9svMtbUuRpIkNbeSuyqeAQbWuhBJktT8SoLDK8D8iPhfEXH1xldXO0XEDRGxMiIWdFh3cUQ8HxHzq68PdNh2QUQsjohFEXFsz7ojSZJqqeRUxYzqq7tuAq4Bbtli/Tcz86qOKyJifyrXUYwB/gT4aUS8IzPXI0mSmkbJXRU39+SDM3NuRIwsbH4CcEf1OopnI2IxcAhwf0++W5Ik1UbJlNPPRsQzW77ewHeeFRG/rp7K2LO6bh9gWYc2y6vrOqtnSkTMi4h57e3tb6AMSZLUXSXXOEwA/rz6eg9wNfC9Hn7ftcC+wFhgBfCN6vropG129gGZOS0zJ2TmhMGDB/ewDEmS1BNdBofM/G2H1/OZ+S3gyJ58WWa+mJnrM3MD8F0qpyOgMsLQ1qHpMOCFnnyHJEmqnZIJoMZ3WNyBygjEbj35sogYmpkrqosnAhvvuJgB3BYRU6lcHDkaeLAn3yFJkmqn5K6Kb3R4vw5YAny8q50i4nbg/cBeEbEc+Crw/ogYS+U0xBLgcwCZuTAiplOZnXIdcKZ3VEiS1HxK7qr4y558cGae0snq67fT/lLg0p58lyRJqo+Suyp2j4ipG+9kiIhvRMTu9ShOkiQ1l5K7Km4AVlM5PfFxYBVwYy2LkiRJzankGod9M/OjHZYviYj5NapHkiQ1sZIRh1cj4oiNCxExEXi1diVJkqRmVTLicAZwc4frGn4PnFaziiRJUtMquatiPnBQRLyluryq1kVJkqTmVHJXxWURsUdmrsrMVRGxZ0R8rR7FSZKk5lJyjcNfZ+YfNi5k5u+BD9SsIkmS1LRKgsOAiHjzxoWI2Bl483baS5KkFlVyceT3gHsj4kYqU0WfDtxc06okSVJTKrk48oqI+DXwV1Qef/1PmfkfNa9MkiQ1nZIRBzJzJjCzxrVIkqQmV3KNgyRJEmBwkCRJ3VAyj8PxEWHAkCRJRSMOk4GnIuKKiHhXrQuSJEnNq8vgkJmfBMYBTwM3RsT9ETElInareXWSJKmpFJ2CqD6f4ofAHcBQ4ETgkYj4Qg1rkyRJTabkGocPRsRdwGxgIHBIZv41cBDw32pcnyRJaiIl8zh8DPhmZs7tuDIzX4mI02tTliRJakYlM0f+7Xa23du75UiSpGZWcqrisIh4KCLWRMRrEbE+IlbVozhJktRcSi6OvAY4BXgK2Bn4O+Cfa1mUJElqTqXPqlgcEQMycz2VWzJ/UeO6JElSEyoZcXglIt4EzK9OAvVFYNca19XS2oaPICKKX23DRzS6ZEmSgLIRh09RCRhnAV8E2oCP1rKoVrd82VKmzlpU3P68Y/arYTWSJJUruaviuYgYXH1/Se1LkiRJzWqbpyqi4uKIeAl4EvhNRLRHxFfqV54kSWom27vG4VxgIvDnmfm2zNwTOBSYWL3OQZIk9TPbCw5/C5ySmc9uXJGZzwCfrG6TJEn9zPaCw8DMfGnLlZnZTuWZFZIkqZ/ZXnB4rYfbJElSi9reXRUHbWNq6QB2qlE9kiSpiW0zOGTmgHoWIkmSml/JzJGSJEmAwUGSJHWDwUGSJBUzOEiSpGIGB0mSVMzgIEmSihkcJElSMYODJEkqZnCQJEnFDA6SJKmYwUGSJBUzOEiSpGI1Cw4RcUNErIyIBR3WvTUi7omIp6p/7tlh2wURsTgiFkXEsbWqS5Ik9VwtRxxuAiZtse584N7MHA3cW10mIvYHJgNjqvt8JyJ8OqckSU2mZsEhM+cCv9ti9QnAzdX3NwMf7rD+jsxcm5nPAouBQ2pVmyRJ6pl6X+MwJDNXAFT/3Lu6fh9gWYd2y6vrthIRUyJiXkTMa29vr2mxkiRpc81ycWR0si47a5iZ0zJzQmZOGDx4cI3LkqTWNmjQoEaXoD6m3sHhxYgYClD9c2V1/XKgrUO7YcALda5NkiR1od7BYQZwavX9qcDdHdZPjog3R8QoYDTwYJ1rk6Q+78tf/jLf+c53Ni1ffPHFXHLJJRx11FGMHz+eAw44gLvvvnur/e677z6OP/74TctnnXUWN910EwAPP/ww73vf+zj44IM59thjWbFiRc37oeZVy9sxbwfuB/aLiOUR8RngcuDoiHgKOLq6TGYuBKYDjwMzgTMzc32tapOkVjV58mS+//3vb1qePn06n/70p7nrrrt45JFHmDNnDl/60pfI7PRs8FZef/11vvCFL3DnnXfy8MMPc/rpp3PRRRfVqnz1ATvW6oMz85RtbDpqG+0vBS6tVT2S1B+MGzeOlStX8sILL9De3s6ee+7J0KFD+eIXv8jcuXPZYYcdeP7553nxxRd5+9vf3uXnLVq0iAULFnD00UcDsH79eoYOHVrrbqiJ1Sw4SJIa46STTuLOO+/kP//zP5k8eTK33nor7e3tPPzwwwwcOJCRI0fyxz/+cbN9dtxxRzZs2LBpeeP2zGTMmDHcf//9de2Dmlez3FUhSeolkydP5o477uDOO+/kpJNO4uWXX2bvvfdm4MCBzJkzh+eee26rfUaMGMHjjz/O2rVrefnll7n33nsB2G+//Whvb98UHF5//XUWLlxY1/6ouTjiIEktZsyYMaxevZp99tmHoUOH8olPfIIPfvCDTJgwgbFjx/LOd75zq33a2tr4+Mc/zoEHHsjo0aMZN24cAG9605u48847Ofvss3n55ZdZt24d5557LmPGjKl3t9QkDA6S1IIee+yxTe/32muvbZ5qWLNmzab3V1xxBVdcccVWbcaOHcvcuXN7v0j1SZ6qkCRJxQwOvSF2ICKKX/3JgKBbfzcjhw9rdMmSpO3wVEVvyA1MnbWouPl5x+zX7a/oq4FjfULOvqy4fRx5YQ2rkSS9UQaHPqL0h68/eCVJteSpCkmSVMzgIEmSihkcJKlF/eEPf9jsgVdSbzA4SFKdtA0f0a27jLp6tQ0fsd3v21ZwWL/eZwiq57w4UpLqZPmypd26A6srXd2hdf755/P0008zduxYBg4cyKBBgxg6dCjz58/nJz/5CccffzwLFiwA4KqrrmLNmjVcfPHFPP3005x55pm0t7ezyy678N3vfrfT2SbVPxkcJKlFXX755SxYsID58+dz3333cdxxx7FgwQJGjRrFkiVLtrnflClTuO666xg9ejQPPPAAf//3f8/s2bPrV7iamsFBkvqJQw45hFGjRm23zZo1a/jFL37Bxz72sU3r1q5dW+vS1IcYHCSpn9h11103vd/WY7Q3bNjAHnvswfz58+tdnvoIL46UpBa12267sXr16k63DRkyhJUrV/Lb3/6WtWvX8uMf/xiAt7zlLYwaNYof/OAHAGQmjz76aN1qVvNzxEGSWtTb3vY2Jk6cyLvf/W523nlnhgwZsmnbwIED+cpXvsKhhx7KqFGjNrv48dZbb+WMM87ga1/7Gq+//jqTJ0/moIMOakQX1IQMDpJUJ8PahvfoWTXb+7yu3HbbbdvcdvbZZ3P22WdvtX7UqFHMnDnzDdWm1mVwkKQ6Wbb0uUaXIL1hXuMgSZKKGRwkSVIxg4MkSSpmcJAkScUMDpIkqZjBQZJU5L777uP4448HYMaMGVx++eXbbNvTR3pffPHFXHXVVT2uUbVncJCkOhk5fFivPlZ75PBhvVJXTx6z/aEPfYjzzz9/m9t7GhzU/JzHQZLq5Lllz5OzL+u1z4sjL+yyzZIlS5g0aRKHHnoov/rVr3jHO97BLbfcwv7778/pp5/OrFmzOOuss3jrW9/KV7/6VdauXcu+++7LjTfeyKBBg5g5cybnnnsue+21F+PHj9/0uTfddBPz5s3jmmuu4cUXX+Tzn/88zzzzDADXXnstV1999aZHeh999NFceeWVXHnllUyfPp21a9dy4okncskllwBw6aWXcsstt9DW1sbgwYM5+OCDe+3vSL3P4CBJLW7RokVcf/31TJw4kdNPP33TSMBOO+3Ez372M1566SU+8pGP8NOf/pRdd92Vr3/960ydOpV//Md/5LOf/SyzZ8/mz/7szzj55JM7/fyzzz6b973vfdx1112sX7+eNWvWbPZIb4BZs2bx1FNP8eCDD5KZfOhDH2Lu3Lnsuuuu3HHHHfzqV79i3bp1jB8/3uDQ5AwOktTi2tramDhxIgCf/OQnufrqqwE2BYFf/vKXPP7445vavPbaaxx++OE8+eSTjBo1itGjR2/ad9q0aVt9/uzZs7nlllsAGDBgALvvvju///3vN2sza9YsZs2axbhx44DK47ufeuopVq9ezYknnsguu+wCVE6BqLkZHCSpxUVEp8sbH7OdmRx99NHcfvvtm7WbP3/+Vvv2VGZywQUX8LnPfW6z9d/61rd67TtUH14cKUktbunSpdx///0A3H777RxxxBGbbT/ssMP4+c9/zuLFiwF45ZVX+M1vfsM73/lOnn32WZ5++ulN+3bmqKOO4tprrwUqF1quWrVqq0d6H3vssdxwww2sWbMGgOeff56VK1fy3ve+l7vuuotXX32V1atX82//9m+923n1OoODJLW4d73rXdx8880ceOCB/O53v+OMM87YbPvgwYO56aabOOWUUzjwwAM57LDDePLJJ9lpp52YNm0axx13HEcccQQjRozo9PO//e1vM2fOHA444AAOPvhgFi5cuNkjvf/hH/6BY445hr/5m7/h8MMP54ADDuCkk05i9erVjB8/npNPPpmxY8fy0Y9+lPe85z31+CvRG+CpCkmqkxFt+xTdCdGdzyuxww47cN111222bsmSJZstH3nkkTz00ENb7Ttp0iSefPLJrdafdtppnHbaaQAMGTKEu+++e6s2Wz7S+5xzzuGcc87Zqt1FF13ERRdd1FU31CQMDpJUJ0uWLm90CdIb5qkKSWphI0eOZMGCBY0uQy3EEYf+Lnbo3tBpmDUlqT8zOLSa7gYBYOq064vbnjflM92tSJLUQgwOrSY3GAQkSTXjuLMkSSrmiENf0IPTD5Ik1YLBoS/oxukHTz1IkmrJUxWSJKmYwUGSJBUzOEiSpGIGB0mSVMzgIEmSijXkroqIWAKsBtYD6zJzQkS8Ffg+MBJYAnw8M3/fiPokSVLnGnk75l9m5ksdls8H7s3MyyPi/OrylxtTmnqNz8KQpJbSTPM4nAC8v/r+ZuA+DA59n1NgS1JLadSvdwnMioiHI2JKdd2QzFwBUP1z7852jIgpETEvIua1t7fXqVxJkgSNG3GYmJkvRMTewD0R8WTpjpk5DZgGMGHChKxVgZIkaWsNGXHIzBeqf64E7gIOAV6MiKEA1T9XNqI2SZK0bXUPDhGxa0TstvE9cAywAJgBnFptdipwd71rkyRJ29eIUxVDgLsiYuP335aZMyPiIWB6RHwGWAp8rAG1SZKk7ah7cMjMZ4CDOln/W+CoetcjSZLKNdPtmOoLujsvgySppRgc1D3OyyBJ/ZrT9EmSpGIGB0mSVMzgIEmSihkcJElSMYODJEkqZnBQ04mI4tfI4cMaXa4k9Svejqmmk7MvK27rnBKSVF+OOEiSpGIGB0mSVMzgIEmSihkcJElSMYOD+pW24SO6dddG2/ARjS5ZkpqKd1WoX1m+bClTZy0qbn/eMfvVsBpJ6nsccZAkScUMDpIkqZjBQZIkFTM4SJKkYl4cqeYSO3RvGukw+0pSPRkc1FxyA1OnXV/c/Lwpn6lhMZKkLfnrmiRJKmZwkHqRE0xJanWeqlDfFjsQEY2uYhMnmJLU6gwO6ttygz+oJamOPFUhSZKKGRwkSVIxg4MkSSpmcJAkScW8OFLania7a0OSGs3gIG2Pd21I0mY8VSFJkooZHCRJUjGDgyRJKuY1DurzLrnkkkaXIEn9hsFBfd5XTz2quO15P7+thpVIUuvzVIUkSSpmcJAkScUMDpKaUtvwEURE8att+IhGlyz1C17jIKkpLV+21Mm3pCbkiIPUSNUprf2tuhf4dynVhSMOUiPVeErrtuEjWL5saXH7YW3DWbb0uYZ/do84PbhUFwYH9Tv9ad6HWg73d/uzj31Xcz0wrJsPMKt58JH6CIOD+p0+Pe9DX35aZ7ONCDRbPVIfYXBokP70W696kT/sJDWYwaFB+vRvvZIarumuMVG/YXDYBkcE1C/15VMh/Yy3q6pRmi44RMQk4NvAAOBfMvPyRtThiID6pW6cCvEHkdQ/NVVwiIgBwP8EjgaWAw9FxIzMfLyxlXXNEQqpxXVzNGbAjgNZv+714vZ9/VRCd06ddPfvpr/9XTa7pgoOwCHA4sx8BiAi7gBOAJo+ODhCoZ4ydPYRPbgwtaluV61x8AG6NVpV07/LJprvpCearZ4tRWbW7cu6EhEnAZMy8++qy58CDs3Mszq0mQJMqS7uB5T/1/TG7AW8VKfvagb9qb/9qa/Qv/rbn/oK9reV1buvL2XmpM42NNuIQ2dxeLNkk5nTgGn1Kee/RMS8zJxQ7+9tlP7U3/7UV+hf/e1PfQX728qaqa/N9qyK5UBbh+VhwAsNqkWSJG2h2YLDQ8DoiBgVEW8CJgMzGlyTJEmqaqpTFZm5LiLOAv6Dyu2YN2TmwgaXtVHdT480WH/qb3/qK/Sv/vanvoL9bWVN09emujhSkiQ1t2Y7VSFJkpqYwUGSJBUzOGwhIiZFxKKIWBwR53eyPSLi6ur2X0fE+EbU2RsK+vr+iHg5IuZXX19pRJ29ISJuiIiVEbFgG9tb5rhCUX9b6di2RcSciHgiIhZGxDmdtGmZ41vY35Y4vhGxU0Q8GBGPVvu61WxpLXZsS/rb+GObmb6qLyoXZD4N/CnwJuBRYP8t2nwA+Hcqc04cBjzQ6Lpr2Nf3Az9udK291N/3AuOBBdvY3hLHtRv9baVjOxQYX32/G/CbVv3/thv9bYnjWz1eg6rvBwIPAIe18LEt6W/Dj60jDpvbNOV1Zr4GbJzyuqMTgFuy4pfAHhExtN6F9oKSvraMzJwL/G47TVrluAJF/W0ZmbkiMx+pvl8NPAHss0Wzljm+hf1tCdXjtaa6OLD62vKK/lY6tiX9bTiDw+b2AZZ1WF7O1v9DlrTpC0r7cXh12OzfI2JMfUpriFY5rt3Rcsc2IkYC46j8ptZRSx7f7fQXWuT4RsSAiJgPrATuycyWPrYF/YUGH1uDw+a6nPK6sE1fUNKPR4ARmXkQ8M/Av9a6qAZqleNaquWObUQMAn4InJuZq7bc3Mkuffr4dtHfljm+mbk+M8dSmUn4kIh49xZNWurYFvS34cfW4LC5kimvW2Va7C77kZmrNg6bZeZPgIERsVf9SqyrVjmuRVrt2EbEQCo/RG/NzB910qSljm9X/W214wuQmX8A7gO2fPBSSx3bjbbV32Y4tgaHzZVMeT0D+NvqlbyHAS9n5op6F9oLuuxrRLw9ovIc3og4hMp/L7+te6X10SrHtUgrHdtqP64HnsjMqdto1jLHt6S/rXJ8I2JwROxRfb8z8FfAk1s0a6Vj22V/m+HYNtWU042W25jyOiI+X91+HfATKlfxLgZeAT7dqHrfiMK+ngScERHrgFeByVm9rLeviYjbqVyNvFdELAe+SuXCo5Y6rhsV9Ldlji0wEfgU8Fj13DDAhcBwaMnjW9LfVjm+Q4GbI2IAlR+Q0zPzx634b3JVSX8bfmydclqSJBXzVIUkSSpmcJAkScUMDpIkqZjBQZIkFTM4SJKkYgYHqR+LiPXVJ+wtrE5he15EbPffhYj4k4i4s/r+/RHx4/pUK6kZOI+D1L+9Wp3elojYG7gN2J3KvA+dyswXqNxLLqkfcsRBEgCZuRKYApxVnYVvZET834h4pPr6C6g8WCkiFnTcNyJ2iIinImJwh+XFW06FGxEXR8TNETErIpZExEci4oqIeCwiZlanUiYiDo6I/xMRD0fEf0T1aYcR8dmIeKg6OvLDiNiluv6miLg6In4REc9EhMFGqhGDg6RNMvMZKv8u7E3l6XxHZ+Z44GTg6u3stwH4HvCJ6qq/Ah7NzJc6ab4vcByVxyF/D5iTmQdQmQXvuGp4+GfgpMw8GLgBuLS6748y88+rD/h5AvhMh88dChwBHA9c3t2+SyrjqQpJW9r4tMGBwDURMRZYD7yji/1uAO4GvgWcDty4jXb/npmvR8RjVKY7n1ld/xgwEtgPeDdwT3VK/gHAxmcPvDsivgbsAQyiMmX6Rv9aDTCPR8SQrjopqWcMDpI2iYg/pRISVlK5zuFF4CAqoxB/3N6+mbksIl6MiCOBQ/mv0Yctra223xARr3eYZ38DlX+TAliYmYd3su9NwIcz89GIOI3K8zg2+9yNXdlerZJ6zlMVkoDKk/mA64Brqj/MdwdWVH+L/xSV3/y78i9UTj9Mz8z1PSxlETA4Ig6v1jUwIsZUt+0GrKiezthWMJFUQwYHqX/beePtmMBPgVnAJdVt3wFOjYhfUjlN8f8KPm8GlVMI2zpN0aXMfI3KXRtfj4hHgfnAX1Q3/3fgAeAetn68sqQ68OmYknpNREwAvpmZ72l0LZJqw2scJPWKiDgfOANPIUgtzREHSZJUzGscJElSMYODJEkqZnCQJEnFDA6SJKmYwUGSJBX7/0pUGCSk+TQMAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "g1 = sns.displot(NA_long, x=\"NA\", hue=\"value\", binwidth=0.1, height=5, aspect=1.5)\n", + "sns.move_legend(g1, \"upper left\", bbox_to_anchor=(.55, .45))\n", + "g1.set_axis_labels(\"Daily mean\", \"Day count\")\n", + "\n", + "display(g1)\n", + "g1.savefig(\"prox_comm_PANAS_predictions.pdf\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7e84c79b-321a-4e8f-a795-515fafe169a4", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_absolute_error" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "e1579333-b57b-4bce-9c86-f77c0cd0d3d4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.42725018860641295" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean_absolute_error(try_y[\"NA_true\"], try_y[\"NA_predicted\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "5f633f76-999a-436f-afc3-f3cc44061e5a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0.43618444, 0.39780929, 0.56970789, 0.38847095, 0.78244762,\n", + " 0.29847855, 0.4348883 , 1.80633684, 0.29097794, 0.53106755,\n", + " 0.32757327, 0.37845186, 0.30228743, 0.34129752, 0.2555845 ,\n", + " 1.27981007, 0.40270591, 0.35411635, 0.2568122 , 0.5820276 ,\n", + " 0.33293713, 0.47789249, 0.19690204, 0.68629304, 0.67457704,\n", + " 13.0369228 , 0.41234072, 0.31384332, 0.45126702, 0.34806906,\n", + " 0.52854722, 0.28707449, 0.28282637, 0.49286602, 0.26406791,\n", + " 0.39567315, 0.33661383, 1.23764371, 0.43788937, 0.32592072,\n", + " 0.47443271, 0.55999948, 0.50408039, 0.40523803, 0.50241167,\n", + " 0.30617356, 0.31461521, 0.28494495, 0.32278505, 0.29084659,\n", + " 0.47211231, 0.33807521, 0.34608592, 0.40624902, 0.22882316,\n", + " 0.45563856])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_loso_mean_absolute_error" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "straw2analysis", + "language": "python", + "name": "straw2analysis" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/statistical_analysis/adherence.py b/statistical_analysis/adherence.py index 2477355..2589fd8 100644 --- a/statistical_analysis/adherence.py +++ b/statistical_analysis/adherence.py @@ -96,13 +96,23 @@ df_session_counts_time = classify_sessions_by_completion_time(df_esm_preprocesse # Sessions are now classified according to the type of a session (a true questionnaire or simple single questions) and users response. # %% -df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response"].astype("category") -df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.remove_categories(['during_work_first', 'ema_unanswered', 'evening_first', 'morning', 'morning_first']) -df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.add_categories("interrupted") -df_session_counts_time.loc[df_session_counts_time["session_response_cat"].isna(), "session_response_cat"] = "interrupted" -#df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.rename_categories({ -# "ema_unanswered": "interrupted", -# "morning_first": "interrupted", +df_session_counts_time["session_response_cat"] = df_session_counts_time[ + "session_response" +].astype("category") +df_session_counts_time["session_response_cat"] = df_session_counts_time[ + "session_response_cat" +].cat.remove_categories( + ["during_work_first", "ema_unanswered", "evening_first", "morning", "morning_first"] +) +df_session_counts_time["session_response_cat"] = df_session_counts_time[ + "session_response_cat" +].cat.add_categories("interrupted") +df_session_counts_time.loc[ + df_session_counts_time["session_response_cat"].isna(), "session_response_cat" +] = "interrupted" +# df_session_counts_time["session_response_cat"] = df_session_counts_time["session_response_cat"].cat.rename_categories({ +# "ema_unanswered": "interrupted", +# "morning_first": "interrupted", # "evening_first": "interrupted", # "morning": "interrupted", # "during_work_first": "interrupted"})