Add a function to count calls and their duration and SMSes by type.

communication
junos 2021-04-06 16:23:19 +02:00
parent d218bb1d7c
commit ab2dbf7a02
2 changed files with 37 additions and 0 deletions

View File

@ -47,6 +47,7 @@ class Participant(Base):
tester: bool
Is this a tester (or a true participant)?
"""
__tablename__ = "participants"
id = Column(Integer, primary_key=True)
username = Column(String(64), index=True, unique=True)
@ -96,6 +97,7 @@ class AWAREsensor(object):
participant_id: int
The foreign key relating (with the relationship) tables to the participants table.
"""
id = Column(BigInteger, primary_key=True, nullable=False)
_id = Column(BigInteger, nullable=False)
timestamp = Column(BigInteger, nullable=False)
@ -205,6 +207,7 @@ class Call(Base, AWAREsensor):
trace: str(40)
A hash value SHA-1 of the phone number (source or target) of the call
"""
call_type = Column(SmallInteger, nullable=False)
call_duration = Column(Integer, nullable=False)
trace = Column(String(length=40), nullable=True)
@ -345,6 +348,7 @@ class SMS(Base, AWAREsensor):
trace: str(40)
A hash value SHA-1 of the phone number (source or target) of the call
"""
message_type = Column(SmallInteger, nullable=False)
trace = Column(String(length=40), nullable=False)

View File

@ -5,6 +5,9 @@ import pandas as pd
from config.models import Call, Participant
from setup import db_engine, session
call_types = {1: "incoming", 2: "outgoing", 3: "missed"}
sms_types = {1: "received", 2: "sent"}
def get_call_data(usernames: List) -> pd.DataFrame:
"""
@ -58,3 +61,33 @@ def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame:
comm_df["contact_id"] = comm_df["contact_id"].cat.rename_categories(contact_code)
# Recode the contacts into integers from 0 to n_contacts, so that the first one is contacted the most often.
return comm_df
def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
if "call_type" in comm_df:
comm_counts = (
comm_df.value_counts(subset=["participant_id", "call_type"])
.unstack()
.rename(columns=call_types)
.add_prefix("no_")
)
comm_duration = (
comm_df.groupby(["participant_id", "call_type"])
.sum()["call_duration"]
.unstack()
.rename(columns=call_types)
.add_prefix("duration_")
)
comm_features = comm_counts.join(comm_duration)
elif "message_type" in comm_df:
comm_counts = (
comm_df.value_counts(subset=["participant_id", "message_type"])
.unstack()
.rename(columns=sms_types)
.add_prefix("no_")
)
comm_features = comm_counts
else:
raise KeyError("The dataframe contains neither call_type or message_type")
return comm_features