Add more call features.
The total duration of calls and maximum duration by type. The number of all calls and ratio of incoming and outgoing calls.communication
parent
0ea9d76a6f
commit
6e9c13d5d8
|
@ -14,20 +14,25 @@
|
|||
|
||||
# %%
|
||||
import sqlalchemy
|
||||
|
||||
print(sqlalchemy.__version__)
|
||||
|
||||
# %%
|
||||
from sqlalchemy import create_engine
|
||||
engine = create_engine('sqlite:///:memory:', echo=True)
|
||||
|
||||
engine = create_engine("sqlite:///:memory:", echo=True)
|
||||
|
||||
# %%
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
# %%
|
||||
from sqlalchemy import Column, Integer, String
|
||||
|
||||
|
||||
class User(Base):
|
||||
__tablename__ = 'users'
|
||||
__tablename__ = "users"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
name = Column(String)
|
||||
|
@ -36,7 +41,10 @@ class User(Base):
|
|||
|
||||
def __repr__(self):
|
||||
return "<User(name='%s', fullname='%s', nickname='%s')>" % (
|
||||
self.name, self.fullname, self.nickname)
|
||||
self.name,
|
||||
self.fullname,
|
||||
self.nickname,
|
||||
)
|
||||
|
||||
|
||||
# %%
|
||||
|
@ -46,13 +54,14 @@ print(User.__table__)
|
|||
Base.metadata.create_all(engine)
|
||||
|
||||
# %%
|
||||
ed_user = User(name='ed', fullname='Ed Jones', nickname='edsnickname')
|
||||
ed_user = User(name="ed", fullname="Ed Jones", nickname="edsnickname")
|
||||
print(ed_user.name)
|
||||
print(ed_user.nickname)
|
||||
print(str(ed_user.id))
|
||||
|
||||
# %%
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
|
||||
|
@ -60,24 +69,27 @@ session = Session()
|
|||
# # Adding and Updating Objects
|
||||
|
||||
# %%
|
||||
ed_user = User(name='ed', fullname='Ed Jones', nickname='edsnickname')
|
||||
ed_user = User(name="ed", fullname="Ed Jones", nickname="edsnickname")
|
||||
session.add(ed_user)
|
||||
|
||||
# %%
|
||||
our_user = session.query(User).filter_by(name='ed').first()
|
||||
our_user = session.query(User).filter_by(name="ed").first()
|
||||
print(our_user)
|
||||
|
||||
# %%
|
||||
print(ed_user is our_user)
|
||||
|
||||
# %%
|
||||
session.add_all([
|
||||
User(name='wendy', fullname='Wendy Williams', nickname='windy'),
|
||||
User(name='mary', fullname='Mary Contrary', nickname='mary'),
|
||||
User(name='fred', fullname='Fred Flintstone', nickname='freddy')])
|
||||
session.add_all(
|
||||
[
|
||||
User(name="wendy", fullname="Wendy Williams", nickname="windy"),
|
||||
User(name="mary", fullname="Mary Contrary", nickname="mary"),
|
||||
User(name="fred", fullname="Fred Flintstone", nickname="freddy"),
|
||||
]
|
||||
)
|
||||
|
||||
# %%
|
||||
ed_user.nickname = 'eddie'
|
||||
ed_user.nickname = "eddie"
|
||||
|
||||
# %%
|
||||
print(session.dirty)
|
||||
|
@ -95,14 +107,14 @@ print(ed_user.id)
|
|||
# # Rolling back
|
||||
|
||||
# %%
|
||||
ed_user.name = 'Edwardo'
|
||||
ed_user.name = "Edwardo"
|
||||
|
||||
# %%
|
||||
fake_user = User(name='fakeuser', fullname='Invalid', nickname='12345')
|
||||
fake_user = User(name="fakeuser", fullname="Invalid", nickname="12345")
|
||||
session.add(fake_user)
|
||||
|
||||
# %%
|
||||
session.query(User).filter(User.name.in_(['Edwardo', 'fakeuser'])).all()
|
||||
session.query(User).filter(User.name.in_(["Edwardo", "fakeuser"])).all()
|
||||
|
||||
# %%
|
||||
session.rollback()
|
||||
|
@ -110,4 +122,4 @@ print(ed_user.name)
|
|||
print(fake_user in session)
|
||||
|
||||
# %%
|
||||
session.query(User).filter(User.name.in_(['ed', 'fakeuser'])).all()
|
||||
session.query(User).filter(User.name.in_(["ed", "fakeuser"])).all()
|
||||
|
|
|
@ -15,7 +15,7 @@ def get_call_data(usernames: Collection) -> pd.DataFrame:
|
|||
|
||||
Parameters
|
||||
----------
|
||||
usernames: List
|
||||
usernames: Collection
|
||||
A list of usernames to put into the WHERE condition.
|
||||
|
||||
Returns
|
||||
|
@ -39,7 +39,7 @@ def get_sms_data(usernames: Collection) -> pd.DataFrame:
|
|||
|
||||
Parameters
|
||||
----------
|
||||
usernames: List
|
||||
usernames: Collection
|
||||
A list of usernames to put into the WHERE condition.
|
||||
|
||||
Returns
|
||||
|
@ -111,9 +111,10 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
|
|||
comm_features: pd.DataFrame
|
||||
A list of communication features for every participant.
|
||||
These are:
|
||||
* the number of messages by type (received, sent),
|
||||
* the number of calls by type (incoming, outgoing missed), and
|
||||
* the duration of calls by type.
|
||||
* the number of calls by type (incoming, outgoing missed) and in total,
|
||||
* the ratio of incoming and outgoing calls to the total number of calls,
|
||||
* the total and maximum duration of calls by type, and
|
||||
* the number of messages by type (received, sent).
|
||||
"""
|
||||
if "call_type" in comm_df:
|
||||
comm_counts = (
|
||||
|
@ -122,16 +123,35 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
|
|||
.rename(columns=call_types)
|
||||
.add_prefix("no_")
|
||||
)
|
||||
comm_duration = (
|
||||
# Count calls by type.
|
||||
comm_counts["no_all"] = comm_counts.sum(axis=1)
|
||||
# Add a total count of calls.
|
||||
comm_counts = comm_counts.assign(
|
||||
no_incoming_ratio=lambda x: x.no_incoming / x.no_all,
|
||||
no_outgoing_ratio=lambda x: x.no_outgoing / x.no_all,
|
||||
)
|
||||
# Ratio of incoming and outgoing calls to all calls.
|
||||
comm_duration_total = (
|
||||
comm_df.groupby(["participant_id", "call_type"])
|
||||
.sum()["call_duration"]
|
||||
.unstack()
|
||||
.rename(columns=call_types)
|
||||
.add_prefix("duration_")
|
||||
.add_prefix("duration_total_")
|
||||
)
|
||||
comm_features = comm_counts.join(comm_duration)
|
||||
# Total call duration by type.
|
||||
comm_duration_max = (
|
||||
comm_df.groupby(["participant_id", "call_type"])
|
||||
.max()["call_duration"]
|
||||
.unstack()
|
||||
.rename(columns=call_types)
|
||||
.add_prefix("duration_max_")
|
||||
)
|
||||
# Max call duration by type
|
||||
comm_features = comm_counts.join(comm_duration_total)
|
||||
comm_features = comm_features.join(comm_duration_max)
|
||||
try:
|
||||
comm_features.drop(columns="duration_" + call_types[3], inplace=True)
|
||||
comm_features.drop(columns="duration_total_" + call_types[3], inplace=True)
|
||||
comm_features.drop(columns="duration_max_" + call_types[3], inplace=True)
|
||||
# The missed calls are always of 0 duration.
|
||||
except KeyError:
|
||||
pass
|
||||
|
@ -145,6 +165,21 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
|
|||
.add_prefix("no_")
|
||||
)
|
||||
comm_features = comm_counts
|
||||
# TODO Add ratio of outgoing and incoming texts.
|
||||
else:
|
||||
raise KeyError("The dataframe contains neither call_type or message_type")
|
||||
return comm_features
|
||||
|
||||
|
||||
def contact_features():
|
||||
# TODO Implement a method that takes a DF with enumerated contacts as argument and calculates:
|
||||
# * Duration of calls per caller (for most common callers)
|
||||
# * Determine work vs non-work contacts by work hours heuristics
|
||||
# * Numer of people contacted
|
||||
# And similarly for SMS.
|
||||
pass
|
||||
|
||||
|
||||
def calls_sms_features():
|
||||
# TODO Relate the calls and sms data, such as comparing the number of (missed) calls and messages.
|
||||
pass
|
||||
|
|
|
@ -14,7 +14,7 @@ def get_screen_data(usernames: Collection) -> pd.DataFrame:
|
|||
|
||||
Parameters
|
||||
----------
|
||||
usernames: List
|
||||
usernames: Collection
|
||||
A list of usernames to put into the WHERE condition.
|
||||
|
||||
Returns
|
||||
|
|
|
@ -71,4 +71,5 @@ class CallsFeatures(unittest.TestCase):
|
|||
|
||||
def test_count_comms(self):
|
||||
self.features = count_comms(self.calls)
|
||||
print(self.features)
|
||||
self.assertIsInstance(self.features, pd.DataFrame)
|
||||
|
|
Loading…
Reference in New Issue