From 66b36faedc859ace686ea22d83af4497a2ca4a61 Mon Sep 17 00:00:00 2001 From: junos Date: Fri, 9 Apr 2021 15:33:52 +0200 Subject: [PATCH] Add Seaborn and cleanup. --- .gitignore | 2 ++ .idea/straw2analysis.iml | 1 + config/environment.yml | 1 + exploration/communication.ipynb | 32 +++++++++++++++++++++++++------- features/communication.py | 5 ++++- participants/query_db.py | 2 +- 6 files changed, 34 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index b6abfca..dc16781 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /.env */.ipynb_checkpoints/ +__pycache__/ +*/__pycache__/ diff --git a/.idea/straw2analysis.iml b/.idea/straw2analysis.iml index 89e730b..e7a4a3d 100644 --- a/.idea/straw2analysis.iml +++ b/.idea/straw2analysis.iml @@ -3,6 +3,7 @@ + diff --git a/config/environment.yml b/config/environment.yml index 7f1d829..4ff921d 100644 --- a/config/environment.yml +++ b/config/environment.yml @@ -12,4 +12,5 @@ dependencies: - pandas - psycopg2 - python-dotenv + - seaborn - sqlalchemy \ No newline at end of file diff --git a/exploration/communication.ipynb b/exploration/communication.ipynb index f77b1a1..5fc0072 100644 --- a/exploration/communication.ipynb +++ b/exploration/communication.ipynb @@ -5,6 +5,15 @@ "execution_count": 1, "metadata": {}, "outputs": [], + "source": [ + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], "source": [ "import os, sys\n", "nb_dir = os.path.split(os.getcwd())[0]\n", @@ -14,16 +23,23 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from features.communication import *" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example of feature calculation" + ] + }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -72,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -134,7 +150,7 @@ "21 0 " ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -145,8 +161,10 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, + "execution_count": 6, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { @@ -194,7 +212,7 @@ "21 16 2" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } diff --git a/features/communication.py b/features/communication.py index cee9b9e..8b9b5d1 100644 --- a/features/communication.py +++ b/features/communication.py @@ -100,7 +100,10 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame: ------- comm_features: pd.DataFrame A list of communication features for every participant. - + These are: + * the number of messages by type (received, sent), + * the number of calls by type (incoming, outgoing missed), and + * the duration of calls by type. """ if "call_type" in comm_df: comm_counts = ( diff --git a/participants/query_db.py b/participants/query_db.py index 087d185..aaa3976 100644 --- a/participants/query_db.py +++ b/participants/query_db.py @@ -24,4 +24,4 @@ def get_usernames( ) with db_engine.connect() as connection: df_participants = pd.read_sql(query_participant_usernames.statement, connection) - return df_participants + return df_participants.values.flatten()