Add a function to get sms data and illustrate in Jupyter notebook.

communication
junos 2021-04-06 16:50:40 +02:00
parent b57c8a4eac
commit 9b495e63a3
2 changed files with 254 additions and 1 deletions

View File

@ -0,0 +1,229 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os, sys\n",
"nb_dir = os.path.split(os.getcwd())[0]\n",
"if nb_dir not in sys.path:\n",
" sys.path.append(nb_dir)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from features.communication import *"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id _id timestamp device_id call_type \\\n",
"0 1649 2 1603359870948 645ca1c1-b798-410c-a0b2-fd24d0f0186d 2 \n",
"1 1648 1 1603359849077 645ca1c1-b798-410c-a0b2-fd24d0f0186d 2 \n",
"2 1647 1 1603358854783 049df3f8-8541-4cf5-af2b-83f6b3f0cf4b 2 \n",
"3 1267 5 1599242289282 d2a71262-b2cf-484b-b422-ec2a84eebd3d 2 \n",
"4 1266 4 1599242131166 d2a71262-b2cf-484b-b422-ec2a84eebd3d 2 \n",
"5 794 3 1588053846893 d2a71262-b2cf-484b-b422-ec2a84eebd3d 3 \n",
"6 744 2 1587137920351 d2a71262-b2cf-484b-b422-ec2a84eebd3d 3 \n",
"7 616 1 1585919254218 d2a71262-b2cf-484b-b422-ec2a84eebd3d 1 \n",
"8 556 1 1585043148221 d5fb52e1-7df8-44b5-a805-8d04ca008061 1 \n",
"\n",
" call_duration trace participant_id \\\n",
"0 0 040519011 21 \n",
"1 0 +38640519011 21 \n",
"2 0 72441dc0eb9550fcdc5a61cce9dc8bd302494680 21 \n",
"3 0 4f345b8682824a491e57efbd4afd61e6212a9c05 21 \n",
"4 0 4f345b8682824a491e57efbd4afd61e6212a9c05 21 \n",
"5 0 1d705b16b9983c32d2ef1af7f150944696a23fb5 21 \n",
"6 0 1d705b16b9983c32d2ef1af7f150944696a23fb5 21 \n",
"7 29 1d705b16b9983c32d2ef1af7f150944696a23fb5 21 \n",
"8 17 501cef50691bcc4f0ddc4bb5d6daa07154189d47 21 \n",
"\n",
" username \n",
"0 nokia_0000003 \n",
"1 nokia_0000003 \n",
"2 nokia_0000003 \n",
"3 nokia_0000003 \n",
"4 nokia_0000003 \n",
"5 nokia_0000003 \n",
"6 nokia_0000003 \n",
"7 nokia_0000003 \n",
"8 nokia_0000003 \n"
]
}
],
"source": [
"df_calls = get_call_data([\"nokia_0000003\"])\n",
"print(df_calls)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>call_type</th>\n",
" <th>no_incoming</th>\n",
" <th>no_outgoing</th>\n",
" <th>no_missed</th>\n",
" <th>duration_incoming</th>\n",
" <th>duration_outgoing</th>\n",
" </tr>\n",
" <tr>\n",
" <th>participant_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>46</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"call_type no_incoming no_outgoing no_missed duration_incoming \\\n",
"participant_id \n",
"21 2 5 2 46 \n",
"\n",
"call_type duration_outgoing \n",
"participant_id \n",
"21 0 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"count_comms(df_calls)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>message_type</th>\n",
" <th>no_received</th>\n",
" <th>no_sent</th>\n",
" </tr>\n",
" <tr>\n",
" <th>participant_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>16</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"message_type no_received no_sent\n",
"participant_id \n",
"21 16 2"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sms = get_sms_data([\"nokia_0000003\"])\n",
"count_comms(df_sms)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "straw2analysis",
"language": "python",
"name": "straw2analysis"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -2,7 +2,7 @@ from typing import List
import pandas as pd import pandas as pd
from config.models import Call, Participant from config.models import Call, Participant, SMS
from setup import db_engine, session from setup import db_engine, session
call_types = {1: "incoming", 2: "outgoing", 3: "missed"} call_types = {1: "incoming", 2: "outgoing", 3: "missed"}
@ -33,6 +33,30 @@ def get_call_data(usernames: List) -> pd.DataFrame:
return df_calls return df_calls
def get_sms_data(usernames: List) -> pd.DataFrame:
"""
Read the data from the sms table and return it in a dataframe.
Parameters
----------
usernames: List
A list of usernames to put into the WHERE condition.
Returns
-------
df_sms: pd.DataFrame
A dataframe of call data.
"""
query_sms = (
session.query(SMS, Participant.username)
.filter(Participant.id == SMS.participant_id)
.filter(Participant.username.in_(usernames))
)
with db_engine.connect() as connection:
df_sms = pd.read_sql(query_sms.statement, connection)
return df_sms
def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame: def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame:
""" """
Count contacts (callers, senders) and enumerate them by their frequency. Count contacts (callers, senders) and enumerate them by their frequency.