From 9b495e63a366497a18e5d376021059a6a3d3570a Mon Sep 17 00:00:00 2001 From: junos Date: Tue, 6 Apr 2021 16:50:40 +0200 Subject: [PATCH] Add a function to get sms data and illustrate in Jupyter notebook. --- exploration/communication.ipynb | 229 ++++++++++++++++++++++++++++++++ features/communication.py | 26 +++- 2 files changed, 254 insertions(+), 1 deletion(-) create mode 100644 exploration/communication.ipynb diff --git a/exploration/communication.ipynb b/exploration/communication.ipynb new file mode 100644 index 0000000..f77b1a1 --- /dev/null +++ b/exploration/communication.ipynb @@ -0,0 +1,229 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys\n", + "nb_dir = os.path.split(os.getcwd())[0]\n", + "if nb_dir not in sys.path:\n", + " sys.path.append(nb_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from features.communication import *" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " id _id timestamp device_id call_type \\\n", + "0 1649 2 1603359870948 645ca1c1-b798-410c-a0b2-fd24d0f0186d 2 \n", + "1 1648 1 1603359849077 645ca1c1-b798-410c-a0b2-fd24d0f0186d 2 \n", + "2 1647 1 1603358854783 049df3f8-8541-4cf5-af2b-83f6b3f0cf4b 2 \n", + "3 1267 5 1599242289282 d2a71262-b2cf-484b-b422-ec2a84eebd3d 2 \n", + "4 1266 4 1599242131166 d2a71262-b2cf-484b-b422-ec2a84eebd3d 2 \n", + "5 794 3 1588053846893 d2a71262-b2cf-484b-b422-ec2a84eebd3d 3 \n", + "6 744 2 1587137920351 d2a71262-b2cf-484b-b422-ec2a84eebd3d 3 \n", + "7 616 1 1585919254218 d2a71262-b2cf-484b-b422-ec2a84eebd3d 1 \n", + "8 556 1 1585043148221 d5fb52e1-7df8-44b5-a805-8d04ca008061 1 \n", + "\n", + " call_duration trace participant_id \\\n", + "0 0 040519011 21 \n", + "1 0 +38640519011 21 \n", + "2 0 72441dc0eb9550fcdc5a61cce9dc8bd302494680 21 \n", + "3 0 4f345b8682824a491e57efbd4afd61e6212a9c05 21 \n", + "4 0 4f345b8682824a491e57efbd4afd61e6212a9c05 21 \n", + "5 0 1d705b16b9983c32d2ef1af7f150944696a23fb5 21 \n", + "6 0 1d705b16b9983c32d2ef1af7f150944696a23fb5 21 \n", + "7 29 1d705b16b9983c32d2ef1af7f150944696a23fb5 21 \n", + "8 17 501cef50691bcc4f0ddc4bb5d6daa07154189d47 21 \n", + "\n", + " username \n", + "0 nokia_0000003 \n", + "1 nokia_0000003 \n", + "2 nokia_0000003 \n", + "3 nokia_0000003 \n", + "4 nokia_0000003 \n", + "5 nokia_0000003 \n", + "6 nokia_0000003 \n", + "7 nokia_0000003 \n", + "8 nokia_0000003 \n" + ] + } + ], + "source": [ + "df_calls = get_call_data([\"nokia_0000003\"])\n", + "print(df_calls)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
call_typeno_incomingno_outgoingno_missedduration_incomingduration_outgoing
participant_id
21252460
\n", + "
" + ], + "text/plain": [ + "call_type no_incoming no_outgoing no_missed duration_incoming \\\n", + "participant_id \n", + "21 2 5 2 46 \n", + "\n", + "call_type duration_outgoing \n", + "participant_id \n", + "21 0 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "count_comms(df_calls)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
message_typeno_receivedno_sent
participant_id
21162
\n", + "
" + ], + "text/plain": [ + "message_type no_received no_sent\n", + "participant_id \n", + "21 16 2" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_sms = get_sms_data([\"nokia_0000003\"])\n", + "count_comms(df_sms)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "straw2analysis", + "language": "python", + "name": "straw2analysis" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/features/communication.py b/features/communication.py index d5aebcf..66dfa07 100644 --- a/features/communication.py +++ b/features/communication.py @@ -2,7 +2,7 @@ from typing import List import pandas as pd -from config.models import Call, Participant +from config.models import Call, Participant, SMS from setup import db_engine, session call_types = {1: "incoming", 2: "outgoing", 3: "missed"} @@ -33,6 +33,30 @@ def get_call_data(usernames: List) -> pd.DataFrame: return df_calls +def get_sms_data(usernames: List) -> pd.DataFrame: + """ + Read the data from the sms table and return it in a dataframe. + + Parameters + ---------- + usernames: List + A list of usernames to put into the WHERE condition. + + Returns + ------- + df_sms: pd.DataFrame + A dataframe of call data. + """ + query_sms = ( + session.query(SMS, Participant.username) + .filter(Participant.id == SMS.participant_id) + .filter(Participant.username.in_(usernames)) + ) + with db_engine.connect() as connection: + df_sms = pd.read_sql(query_sms.statement, connection) + return df_sms + + def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame: """ Count contacts (callers, senders) and enumerate them by their frequency.