Add more call features.

The total duration of calls and maximum duration by type.
The number of all calls and ratio of incoming and outgoing calls.
communication
junos 2021-05-07 15:18:56 +02:00
parent 0ea9d76a6f
commit 6e9c13d5d8
4 changed files with 73 additions and 25 deletions

View File

@ -14,20 +14,25 @@
# %%
import sqlalchemy
print(sqlalchemy.__version__)
# %%
from sqlalchemy import create_engine
engine = create_engine('sqlite:///:memory:', echo=True)
engine = create_engine("sqlite:///:memory:", echo=True)
# %%
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
# %%
from sqlalchemy import Column, Integer, String
class User(Base):
__tablename__ = 'users'
__tablename__ = "users"
id = Column(Integer, primary_key=True)
name = Column(String)
@ -36,7 +41,10 @@ class User(Base):
def __repr__(self):
return "<User(name='%s', fullname='%s', nickname='%s')>" % (
self.name, self.fullname, self.nickname)
self.name,
self.fullname,
self.nickname,
)
# %%
@ -46,13 +54,14 @@ print(User.__table__)
Base.metadata.create_all(engine)
# %%
ed_user = User(name='ed', fullname='Ed Jones', nickname='edsnickname')
ed_user = User(name="ed", fullname="Ed Jones", nickname="edsnickname")
print(ed_user.name)
print(ed_user.nickname)
print(str(ed_user.id))
# %%
from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=engine)
session = Session()
@ -60,24 +69,27 @@ session = Session()
# # Adding and Updating Objects
# %%
ed_user = User(name='ed', fullname='Ed Jones', nickname='edsnickname')
ed_user = User(name="ed", fullname="Ed Jones", nickname="edsnickname")
session.add(ed_user)
# %%
our_user = session.query(User).filter_by(name='ed').first()
our_user = session.query(User).filter_by(name="ed").first()
print(our_user)
# %%
print(ed_user is our_user)
# %%
session.add_all([
User(name='wendy', fullname='Wendy Williams', nickname='windy'),
User(name='mary', fullname='Mary Contrary', nickname='mary'),
User(name='fred', fullname='Fred Flintstone', nickname='freddy')])
session.add_all(
[
User(name="wendy", fullname="Wendy Williams", nickname="windy"),
User(name="mary", fullname="Mary Contrary", nickname="mary"),
User(name="fred", fullname="Fred Flintstone", nickname="freddy"),
]
)
# %%
ed_user.nickname = 'eddie'
ed_user.nickname = "eddie"
# %%
print(session.dirty)
@ -95,14 +107,14 @@ print(ed_user.id)
# # Rolling back
# %%
ed_user.name = 'Edwardo'
ed_user.name = "Edwardo"
# %%
fake_user = User(name='fakeuser', fullname='Invalid', nickname='12345')
fake_user = User(name="fakeuser", fullname="Invalid", nickname="12345")
session.add(fake_user)
# %%
session.query(User).filter(User.name.in_(['Edwardo', 'fakeuser'])).all()
session.query(User).filter(User.name.in_(["Edwardo", "fakeuser"])).all()
# %%
session.rollback()
@ -110,4 +122,4 @@ print(ed_user.name)
print(fake_user in session)
# %%
session.query(User).filter(User.name.in_(['ed', 'fakeuser'])).all()
session.query(User).filter(User.name.in_(["ed", "fakeuser"])).all()

View File

@ -15,7 +15,7 @@ def get_call_data(usernames: Collection) -> pd.DataFrame:
Parameters
----------
usernames: List
usernames: Collection
A list of usernames to put into the WHERE condition.
Returns
@ -39,7 +39,7 @@ def get_sms_data(usernames: Collection) -> pd.DataFrame:
Parameters
----------
usernames: List
usernames: Collection
A list of usernames to put into the WHERE condition.
Returns
@ -111,9 +111,10 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
comm_features: pd.DataFrame
A list of communication features for every participant.
These are:
* the number of messages by type (received, sent),
* the number of calls by type (incoming, outgoing missed), and
* the duration of calls by type.
* the number of calls by type (incoming, outgoing missed) and in total,
* the ratio of incoming and outgoing calls to the total number of calls,
* the total and maximum duration of calls by type, and
* the number of messages by type (received, sent).
"""
if "call_type" in comm_df:
comm_counts = (
@ -122,16 +123,35 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
.rename(columns=call_types)
.add_prefix("no_")
)
comm_duration = (
# Count calls by type.
comm_counts["no_all"] = comm_counts.sum(axis=1)
# Add a total count of calls.
comm_counts = comm_counts.assign(
no_incoming_ratio=lambda x: x.no_incoming / x.no_all,
no_outgoing_ratio=lambda x: x.no_outgoing / x.no_all,
)
# Ratio of incoming and outgoing calls to all calls.
comm_duration_total = (
comm_df.groupby(["participant_id", "call_type"])
.sum()["call_duration"]
.unstack()
.rename(columns=call_types)
.add_prefix("duration_")
.add_prefix("duration_total_")
)
comm_features = comm_counts.join(comm_duration)
# Total call duration by type.
comm_duration_max = (
comm_df.groupby(["participant_id", "call_type"])
.max()["call_duration"]
.unstack()
.rename(columns=call_types)
.add_prefix("duration_max_")
)
# Max call duration by type
comm_features = comm_counts.join(comm_duration_total)
comm_features = comm_features.join(comm_duration_max)
try:
comm_features.drop(columns="duration_" + call_types[3], inplace=True)
comm_features.drop(columns="duration_total_" + call_types[3], inplace=True)
comm_features.drop(columns="duration_max_" + call_types[3], inplace=True)
# The missed calls are always of 0 duration.
except KeyError:
pass
@ -145,6 +165,21 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
.add_prefix("no_")
)
comm_features = comm_counts
# TODO Add ratio of outgoing and incoming texts.
else:
raise KeyError("The dataframe contains neither call_type or message_type")
return comm_features
def contact_features():
# TODO Implement a method that takes a DF with enumerated contacts as argument and calculates:
# * Duration of calls per caller (for most common callers)
# * Determine work vs non-work contacts by work hours heuristics
# * Numer of people contacted
# And similarly for SMS.
pass
def calls_sms_features():
# TODO Relate the calls and sms data, such as comparing the number of (missed) calls and messages.
pass

View File

@ -14,7 +14,7 @@ def get_screen_data(usernames: Collection) -> pd.DataFrame:
Parameters
----------
usernames: List
usernames: Collection
A list of usernames to put into the WHERE condition.
Returns

View File

@ -71,4 +71,5 @@ class CallsFeatures(unittest.TestCase):
def test_count_comms(self):
self.features = count_comms(self.calls)
print(self.features)
self.assertIsInstance(self.features, pd.DataFrame)