Add Seaborn and cleanup.

2021-04-09 15:33:52 +02:00 · 2021-04-09 15:33:52 +02:00 · 66b36faedc
parent e6d129c6ee
commit 66b36faedc
6 changed files with 34 additions and 9 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,4 @@
 /.env
 */.ipynb_checkpoints/
+__pycache__/
+*/__pycache__/
--- a/.idea/straw2analysis.iml
+++ b/.idea/straw2analysis.iml
@ -3,6 +3,7 @@
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$">
      <excludeFolder url="file://$MODULE_DIR$/config/.ipynb_checkpoints" />
+      <excludeFolder url="file://$MODULE_DIR$/exploration/.ipynb_checkpoints" />
    </content>
    <orderEntry type="jdk" jdkName="Python 3.9 (straw2analysis)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
--- a/config/environment.yml
+++ b/config/environment.yml
@ -12,4 +12,5 @@ dependencies:
  - pandas
  - psycopg2
  - python-dotenv
+  - seaborn
  - sqlalchemy
--- a/exploration/communication.ipynb
+++ b/exploration/communication.ipynb
@ -5,6 +5,15 @@
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
+   "source": [
+    "import seaborn as sns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "import os, sys\n",
    "nb_dir = os.path.split(os.getcwd())[0]\n",
@ -14,16 +23,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from features.communication import *"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Example of feature calculation"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
@ -72,7 +88,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@ -134,7 +150,7 @@
       "21                              0  "
      ]
     },
-     "execution_count": 4,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -145,8 +161,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
+   "execution_count": 6,
+   "metadata": {
+    "scrolled": true
+   },
   "outputs": [
    {
     "data": {
@ -194,7 +212,7 @@
       "21                       16        2"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
--- a/features/communication.py
+++ b/features/communication.py
@ -100,7 +100,10 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
    -------
    comm_features: pd.DataFrame
        A list of communication features for every participant.
-
+        These are:
+        * the number of messages by type (received, sent),
+        * the number of calls by type (incoming, outgoing missed), and
+        * the duration of calls by type.
    """
    if "call_type" in comm_df:
        comm_counts = (
--- a/participants/query_db.py
+++ b/participants/query_db.py
@ -24,4 +24,4 @@ def get_usernames(
    )
    with db_engine.connect() as connection:
        df_participants = pd.read_sql(query_participant_usernames.statement, connection)
-    return df_participants
+    return df_participants.values.flatten()