From 66b36faedc859ace686ea22d83af4497a2ca4a61 Mon Sep 17 00:00:00 2001
From: junos <junos.lukan@ijs.si>
Date: Fri, 9 Apr 2021 15:33:52 +0200
Subject: [PATCH] Add Seaborn and cleanup.

---
 .gitignore                      |  2 ++
 .idea/straw2analysis.iml        |  1 +
 config/environment.yml          |  1 +
 exploration/communication.ipynb | 32 +++++++++++++++++++++++++-------
 features/communication.py       |  5 ++++-
 participants/query_db.py        |  2 +-
 6 files changed, 34 insertions(+), 9 deletions(-)
diff --git a/.gitignore b/.gitignore
index b6abfca..dc16781 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
 /.env
 */.ipynb_checkpoints/
+__pycache__/
+*/__pycache__/
diff --git a/.idea/straw2analysis.iml b/.idea/straw2analysis.iml
index 89e730b..e7a4a3d 100644
--- a/.idea/straw2analysis.iml
+++ b/.idea/straw2analysis.iml
@@ -3,6 +3,7 @@
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$">
       <excludeFolder url="file://$MODULE_DIR$/config/.ipynb_checkpoints" />
+      <excludeFolder url="file://$MODULE_DIR$/exploration/.ipynb_checkpoints" />
     </content>
     <orderEntry type="jdk" jdkName="Python 3.9 (straw2analysis)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
diff --git a/config/environment.yml b/config/environment.yml
index 7f1d829..4ff921d 100644
--- a/config/environment.yml
+++ b/config/environment.yml
@@ -12,4 +12,5 @@ dependencies:
   - pandas
   - psycopg2
   - python-dotenv
+  - seaborn
   - sqlalchemy
\ No newline at end of file
diff --git a/exploration/communication.ipynb b/exploration/communication.ipynb
index f77b1a1..5fc0072 100644
--- a/exploration/communication.ipynb
+++ b/exploration/communication.ipynb
@@ -5,6 +5,15 @@
    "execution_count": 1,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "import seaborn as sns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import os, sys\n",
     "nb_dir = os.path.split(os.getcwd())[0]\n",
@@ -14,16 +23,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
     "from features.communication import *"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Example of feature calculation"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -72,7 +88,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -134,7 +150,7 @@
        "21                              0  "
       ]
      },
-     "execution_count": 4,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -145,8 +161,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
+   "execution_count": 6,
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
      "data": {
@@ -194,7 +212,7 @@
        "21                       16        2"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/features/communication.py b/features/communication.py
index cee9b9e..8b9b5d1 100644
--- a/features/communication.py
+++ b/features/communication.py
@@ -100,7 +100,10 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
     -------
     comm_features: pd.DataFrame
         A list of communication features for every participant.
-
+        These are:
+        * the number of messages by type (received, sent),
+        * the number of calls by type (incoming, outgoing missed), and
+        * the duration of calls by type.
     """
     if "call_type" in comm_df:
         comm_counts = (
diff --git a/participants/query_db.py b/participants/query_db.py
index 087d185..aaa3976 100644
--- a/participants/query_db.py
+++ b/participants/query_db.py
@@ -24,4 +24,4 @@ def get_usernames(
     )
     with db_engine.connect() as connection:
         df_participants = pd.read_sql(query_participant_usernames.statement, connection)
-    return df_participants
+    return df_participants.values.flatten()