Add more call features.

The total duration of calls and maximum duration by type.
The number of all calls and ratio of incoming and outgoing calls.
communication
junos 2021-05-07 15:18:56 +02:00
parent 0ea9d76a6f
commit 6e9c13d5d8
4 changed files with 73 additions and 25 deletions

View File

@ -14,20 +14,25 @@
# %% # %%
import sqlalchemy import sqlalchemy
print(sqlalchemy.__version__) print(sqlalchemy.__version__)
# %% # %%
from sqlalchemy import create_engine from sqlalchemy import create_engine
engine = create_engine('sqlite:///:memory:', echo=True)
engine = create_engine("sqlite:///:memory:", echo=True)
# %% # %%
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base() Base = declarative_base()
# %% # %%
from sqlalchemy import Column, Integer, String from sqlalchemy import Column, Integer, String
class User(Base): class User(Base):
__tablename__ = 'users' __tablename__ = "users"
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
name = Column(String) name = Column(String)
@ -36,7 +41,10 @@ class User(Base):
def __repr__(self): def __repr__(self):
return "<User(name='%s', fullname='%s', nickname='%s')>" % ( return "<User(name='%s', fullname='%s', nickname='%s')>" % (
self.name, self.fullname, self.nickname) self.name,
self.fullname,
self.nickname,
)
# %% # %%
@ -46,13 +54,14 @@ print(User.__table__)
Base.metadata.create_all(engine) Base.metadata.create_all(engine)
# %% # %%
ed_user = User(name='ed', fullname='Ed Jones', nickname='edsnickname') ed_user = User(name="ed", fullname="Ed Jones", nickname="edsnickname")
print(ed_user.name) print(ed_user.name)
print(ed_user.nickname) print(ed_user.nickname)
print(str(ed_user.id)) print(str(ed_user.id))
# %% # %%
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=engine) Session = sessionmaker(bind=engine)
session = Session() session = Session()
@ -60,24 +69,27 @@ session = Session()
# # Adding and Updating Objects # # Adding and Updating Objects
# %% # %%
ed_user = User(name='ed', fullname='Ed Jones', nickname='edsnickname') ed_user = User(name="ed", fullname="Ed Jones", nickname="edsnickname")
session.add(ed_user) session.add(ed_user)
# %% # %%
our_user = session.query(User).filter_by(name='ed').first() our_user = session.query(User).filter_by(name="ed").first()
print(our_user) print(our_user)
# %% # %%
print(ed_user is our_user) print(ed_user is our_user)
# %% # %%
session.add_all([ session.add_all(
User(name='wendy', fullname='Wendy Williams', nickname='windy'), [
User(name='mary', fullname='Mary Contrary', nickname='mary'), User(name="wendy", fullname="Wendy Williams", nickname="windy"),
User(name='fred', fullname='Fred Flintstone', nickname='freddy')]) User(name="mary", fullname="Mary Contrary", nickname="mary"),
User(name="fred", fullname="Fred Flintstone", nickname="freddy"),
]
)
# %% # %%
ed_user.nickname = 'eddie' ed_user.nickname = "eddie"
# %% # %%
print(session.dirty) print(session.dirty)
@ -95,14 +107,14 @@ print(ed_user.id)
# # Rolling back # # Rolling back
# %% # %%
ed_user.name = 'Edwardo' ed_user.name = "Edwardo"
# %% # %%
fake_user = User(name='fakeuser', fullname='Invalid', nickname='12345') fake_user = User(name="fakeuser", fullname="Invalid", nickname="12345")
session.add(fake_user) session.add(fake_user)
# %% # %%
session.query(User).filter(User.name.in_(['Edwardo', 'fakeuser'])).all() session.query(User).filter(User.name.in_(["Edwardo", "fakeuser"])).all()
# %% # %%
session.rollback() session.rollback()
@ -110,4 +122,4 @@ print(ed_user.name)
print(fake_user in session) print(fake_user in session)
# %% # %%
session.query(User).filter(User.name.in_(['ed', 'fakeuser'])).all() session.query(User).filter(User.name.in_(["ed", "fakeuser"])).all()

View File

@ -15,7 +15,7 @@ def get_call_data(usernames: Collection) -> pd.DataFrame:
Parameters Parameters
---------- ----------
usernames: List usernames: Collection
A list of usernames to put into the WHERE condition. A list of usernames to put into the WHERE condition.
Returns Returns
@ -39,7 +39,7 @@ def get_sms_data(usernames: Collection) -> pd.DataFrame:
Parameters Parameters
---------- ----------
usernames: List usernames: Collection
A list of usernames to put into the WHERE condition. A list of usernames to put into the WHERE condition.
Returns Returns
@ -111,9 +111,10 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
comm_features: pd.DataFrame comm_features: pd.DataFrame
A list of communication features for every participant. A list of communication features for every participant.
These are: These are:
* the number of messages by type (received, sent), * the number of calls by type (incoming, outgoing missed) and in total,
* the number of calls by type (incoming, outgoing missed), and * the ratio of incoming and outgoing calls to the total number of calls,
* the duration of calls by type. * the total and maximum duration of calls by type, and
* the number of messages by type (received, sent).
""" """
if "call_type" in comm_df: if "call_type" in comm_df:
comm_counts = ( comm_counts = (
@ -122,16 +123,35 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
.rename(columns=call_types) .rename(columns=call_types)
.add_prefix("no_") .add_prefix("no_")
) )
comm_duration = ( # Count calls by type.
comm_counts["no_all"] = comm_counts.sum(axis=1)
# Add a total count of calls.
comm_counts = comm_counts.assign(
no_incoming_ratio=lambda x: x.no_incoming / x.no_all,
no_outgoing_ratio=lambda x: x.no_outgoing / x.no_all,
)
# Ratio of incoming and outgoing calls to all calls.
comm_duration_total = (
comm_df.groupby(["participant_id", "call_type"]) comm_df.groupby(["participant_id", "call_type"])
.sum()["call_duration"] .sum()["call_duration"]
.unstack() .unstack()
.rename(columns=call_types) .rename(columns=call_types)
.add_prefix("duration_") .add_prefix("duration_total_")
) )
comm_features = comm_counts.join(comm_duration) # Total call duration by type.
comm_duration_max = (
comm_df.groupby(["participant_id", "call_type"])
.max()["call_duration"]
.unstack()
.rename(columns=call_types)
.add_prefix("duration_max_")
)
# Max call duration by type
comm_features = comm_counts.join(comm_duration_total)
comm_features = comm_features.join(comm_duration_max)
try: try:
comm_features.drop(columns="duration_" + call_types[3], inplace=True) comm_features.drop(columns="duration_total_" + call_types[3], inplace=True)
comm_features.drop(columns="duration_max_" + call_types[3], inplace=True)
# The missed calls are always of 0 duration. # The missed calls are always of 0 duration.
except KeyError: except KeyError:
pass pass
@ -145,6 +165,21 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
.add_prefix("no_") .add_prefix("no_")
) )
comm_features = comm_counts comm_features = comm_counts
# TODO Add ratio of outgoing and incoming texts.
else: else:
raise KeyError("The dataframe contains neither call_type or message_type") raise KeyError("The dataframe contains neither call_type or message_type")
return comm_features return comm_features
def contact_features():
# TODO Implement a method that takes a DF with enumerated contacts as argument and calculates:
# * Duration of calls per caller (for most common callers)
# * Determine work vs non-work contacts by work hours heuristics
# * Numer of people contacted
# And similarly for SMS.
pass
def calls_sms_features():
# TODO Relate the calls and sms data, such as comparing the number of (missed) calls and messages.
pass

View File

@ -14,7 +14,7 @@ def get_screen_data(usernames: Collection) -> pd.DataFrame:
Parameters Parameters
---------- ----------
usernames: List usernames: Collection
A list of usernames to put into the WHERE condition. A list of usernames to put into the WHERE condition.
Returns Returns

View File

@ -71,4 +71,5 @@ class CallsFeatures(unittest.TestCase):
def test_count_comms(self): def test_count_comms(self):
self.features = count_comms(self.calls) self.features = count_comms(self.calls)
print(self.features)
self.assertIsInstance(self.features, pd.DataFrame) self.assertIsInstance(self.features, pd.DataFrame)