diff --git a/exploration/communication.ipynb b/exploration/communication.ipynb
new file mode 100644
index 0000000..f77b1a1
--- /dev/null
+++ b/exploration/communication.ipynb
@@ -0,0 +1,229 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os, sys\n",
+ "nb_dir = os.path.split(os.getcwd())[0]\n",
+ "if nb_dir not in sys.path:\n",
+ " sys.path.append(nb_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from features.communication import *"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " id _id timestamp device_id call_type \\\n",
+ "0 1649 2 1603359870948 645ca1c1-b798-410c-a0b2-fd24d0f0186d 2 \n",
+ "1 1648 1 1603359849077 645ca1c1-b798-410c-a0b2-fd24d0f0186d 2 \n",
+ "2 1647 1 1603358854783 049df3f8-8541-4cf5-af2b-83f6b3f0cf4b 2 \n",
+ "3 1267 5 1599242289282 d2a71262-b2cf-484b-b422-ec2a84eebd3d 2 \n",
+ "4 1266 4 1599242131166 d2a71262-b2cf-484b-b422-ec2a84eebd3d 2 \n",
+ "5 794 3 1588053846893 d2a71262-b2cf-484b-b422-ec2a84eebd3d 3 \n",
+ "6 744 2 1587137920351 d2a71262-b2cf-484b-b422-ec2a84eebd3d 3 \n",
+ "7 616 1 1585919254218 d2a71262-b2cf-484b-b422-ec2a84eebd3d 1 \n",
+ "8 556 1 1585043148221 d5fb52e1-7df8-44b5-a805-8d04ca008061 1 \n",
+ "\n",
+ " call_duration trace participant_id \\\n",
+ "0 0 040519011 21 \n",
+ "1 0 +38640519011 21 \n",
+ "2 0 72441dc0eb9550fcdc5a61cce9dc8bd302494680 21 \n",
+ "3 0 4f345b8682824a491e57efbd4afd61e6212a9c05 21 \n",
+ "4 0 4f345b8682824a491e57efbd4afd61e6212a9c05 21 \n",
+ "5 0 1d705b16b9983c32d2ef1af7f150944696a23fb5 21 \n",
+ "6 0 1d705b16b9983c32d2ef1af7f150944696a23fb5 21 \n",
+ "7 29 1d705b16b9983c32d2ef1af7f150944696a23fb5 21 \n",
+ "8 17 501cef50691bcc4f0ddc4bb5d6daa07154189d47 21 \n",
+ "\n",
+ " username \n",
+ "0 nokia_0000003 \n",
+ "1 nokia_0000003 \n",
+ "2 nokia_0000003 \n",
+ "3 nokia_0000003 \n",
+ "4 nokia_0000003 \n",
+ "5 nokia_0000003 \n",
+ "6 nokia_0000003 \n",
+ "7 nokia_0000003 \n",
+ "8 nokia_0000003 \n"
+ ]
+ }
+ ],
+ "source": [
+ "df_calls = get_call_data([\"nokia_0000003\"])\n",
+ "print(df_calls)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " call_type | \n",
+ " no_incoming | \n",
+ " no_outgoing | \n",
+ " no_missed | \n",
+ " duration_incoming | \n",
+ " duration_outgoing | \n",
+ "
\n",
+ " \n",
+ " participant_id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 21 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 2 | \n",
+ " 46 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "call_type no_incoming no_outgoing no_missed duration_incoming \\\n",
+ "participant_id \n",
+ "21 2 5 2 46 \n",
+ "\n",
+ "call_type duration_outgoing \n",
+ "participant_id \n",
+ "21 0 "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "count_comms(df_calls)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " message_type | \n",
+ " no_received | \n",
+ " no_sent | \n",
+ "
\n",
+ " \n",
+ " participant_id | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 21 | \n",
+ " 16 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "message_type no_received no_sent\n",
+ "participant_id \n",
+ "21 16 2"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_sms = get_sms_data([\"nokia_0000003\"])\n",
+ "count_comms(df_sms)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "straw2analysis",
+ "language": "python",
+ "name": "straw2analysis"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/features/communication.py b/features/communication.py
index d5aebcf..66dfa07 100644
--- a/features/communication.py
+++ b/features/communication.py
@@ -2,7 +2,7 @@ from typing import List
import pandas as pd
-from config.models import Call, Participant
+from config.models import Call, Participant, SMS
from setup import db_engine, session
call_types = {1: "incoming", 2: "outgoing", 3: "missed"}
@@ -33,6 +33,30 @@ def get_call_data(usernames: List) -> pd.DataFrame:
return df_calls
+def get_sms_data(usernames: List) -> pd.DataFrame:
+ """
+ Read the data from the sms table and return it in a dataframe.
+
+ Parameters
+ ----------
+ usernames: List
+ A list of usernames to put into the WHERE condition.
+
+ Returns
+ -------
+ df_sms: pd.DataFrame
+ A dataframe of call data.
+ """
+ query_sms = (
+ session.query(SMS, Participant.username)
+ .filter(Participant.id == SMS.participant_id)
+ .filter(Participant.username.in_(usernames))
+ )
+ with db_engine.connect() as connection:
+ df_sms = pd.read_sql(query_sms.statement, connection)
+ return df_sms
+
+
def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame:
"""
Count contacts (callers, senders) and enumerate them by their frequency.