Add a function to count calls and their duration and SMSes by type.
parent
d218bb1d7c
commit
ab2dbf7a02
|
@ -47,6 +47,7 @@ class Participant(Base):
|
||||||
tester: bool
|
tester: bool
|
||||||
Is this a tester (or a true participant)?
|
Is this a tester (or a true participant)?
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__tablename__ = "participants"
|
__tablename__ = "participants"
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
username = Column(String(64), index=True, unique=True)
|
username = Column(String(64), index=True, unique=True)
|
||||||
|
@ -96,6 +97,7 @@ class AWAREsensor(object):
|
||||||
participant_id: int
|
participant_id: int
|
||||||
The foreign key relating (with the relationship) tables to the participants table.
|
The foreign key relating (with the relationship) tables to the participants table.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
id = Column(BigInteger, primary_key=True, nullable=False)
|
id = Column(BigInteger, primary_key=True, nullable=False)
|
||||||
_id = Column(BigInteger, nullable=False)
|
_id = Column(BigInteger, nullable=False)
|
||||||
timestamp = Column(BigInteger, nullable=False)
|
timestamp = Column(BigInteger, nullable=False)
|
||||||
|
@ -205,6 +207,7 @@ class Call(Base, AWAREsensor):
|
||||||
trace: str(40)
|
trace: str(40)
|
||||||
A hash value SHA-1 of the phone number (source or target) of the call
|
A hash value SHA-1 of the phone number (source or target) of the call
|
||||||
"""
|
"""
|
||||||
|
|
||||||
call_type = Column(SmallInteger, nullable=False)
|
call_type = Column(SmallInteger, nullable=False)
|
||||||
call_duration = Column(Integer, nullable=False)
|
call_duration = Column(Integer, nullable=False)
|
||||||
trace = Column(String(length=40), nullable=True)
|
trace = Column(String(length=40), nullable=True)
|
||||||
|
@ -345,6 +348,7 @@ class SMS(Base, AWAREsensor):
|
||||||
trace: str(40)
|
trace: str(40)
|
||||||
A hash value SHA-1 of the phone number (source or target) of the call
|
A hash value SHA-1 of the phone number (source or target) of the call
|
||||||
"""
|
"""
|
||||||
|
|
||||||
message_type = Column(SmallInteger, nullable=False)
|
message_type = Column(SmallInteger, nullable=False)
|
||||||
trace = Column(String(length=40), nullable=False)
|
trace = Column(String(length=40), nullable=False)
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,9 @@ import pandas as pd
|
||||||
from config.models import Call, Participant
|
from config.models import Call, Participant
|
||||||
from setup import db_engine, session
|
from setup import db_engine, session
|
||||||
|
|
||||||
|
call_types = {1: "incoming", 2: "outgoing", 3: "missed"}
|
||||||
|
sms_types = {1: "received", 2: "sent"}
|
||||||
|
|
||||||
|
|
||||||
def get_call_data(usernames: List) -> pd.DataFrame:
|
def get_call_data(usernames: List) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
|
@ -58,3 +61,33 @@ def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame:
|
||||||
comm_df["contact_id"] = comm_df["contact_id"].cat.rename_categories(contact_code)
|
comm_df["contact_id"] = comm_df["contact_id"].cat.rename_categories(contact_code)
|
||||||
# Recode the contacts into integers from 0 to n_contacts, so that the first one is contacted the most often.
|
# Recode the contacts into integers from 0 to n_contacts, so that the first one is contacted the most often.
|
||||||
return comm_df
|
return comm_df
|
||||||
|
|
||||||
|
|
||||||
|
def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
|
||||||
|
if "call_type" in comm_df:
|
||||||
|
comm_counts = (
|
||||||
|
comm_df.value_counts(subset=["participant_id", "call_type"])
|
||||||
|
.unstack()
|
||||||
|
.rename(columns=call_types)
|
||||||
|
.add_prefix("no_")
|
||||||
|
)
|
||||||
|
comm_duration = (
|
||||||
|
comm_df.groupby(["participant_id", "call_type"])
|
||||||
|
.sum()["call_duration"]
|
||||||
|
.unstack()
|
||||||
|
.rename(columns=call_types)
|
||||||
|
.add_prefix("duration_")
|
||||||
|
)
|
||||||
|
comm_features = comm_counts.join(comm_duration)
|
||||||
|
elif "message_type" in comm_df:
|
||||||
|
comm_counts = (
|
||||||
|
comm_df.value_counts(subset=["participant_id", "message_type"])
|
||||||
|
.unstack()
|
||||||
|
.rename(columns=sms_types)
|
||||||
|
.add_prefix("no_")
|
||||||
|
)
|
||||||
|
comm_features = comm_counts
|
||||||
|
else:
|
||||||
|
raise KeyError("The dataframe contains neither call_type or message_type")
|
||||||
|
return comm_features
|
||||||
|
|
Loading…
Reference in New Issue