Add more call features.
The total duration of calls and maximum duration by type. The number of all calls and ratio of incoming and outgoing calls.communication
parent
0ea9d76a6f
commit
6e9c13d5d8
|
@ -14,20 +14,25 @@
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
import sqlalchemy
|
import sqlalchemy
|
||||||
|
|
||||||
print(sqlalchemy.__version__)
|
print(sqlalchemy.__version__)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
from sqlalchemy import create_engine
|
from sqlalchemy import create_engine
|
||||||
engine = create_engine('sqlite:///:memory:', echo=True)
|
|
||||||
|
engine = create_engine("sqlite:///:memory:", echo=True)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
from sqlalchemy import Column, Integer, String
|
from sqlalchemy import Column, Integer, String
|
||||||
|
|
||||||
|
|
||||||
class User(Base):
|
class User(Base):
|
||||||
__tablename__ = 'users'
|
__tablename__ = "users"
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
name = Column(String)
|
name = Column(String)
|
||||||
|
@ -36,7 +41,10 @@ class User(Base):
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<User(name='%s', fullname='%s', nickname='%s')>" % (
|
return "<User(name='%s', fullname='%s', nickname='%s')>" % (
|
||||||
self.name, self.fullname, self.nickname)
|
self.name,
|
||||||
|
self.fullname,
|
||||||
|
self.nickname,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
@ -46,13 +54,14 @@ print(User.__table__)
|
||||||
Base.metadata.create_all(engine)
|
Base.metadata.create_all(engine)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
ed_user = User(name='ed', fullname='Ed Jones', nickname='edsnickname')
|
ed_user = User(name="ed", fullname="Ed Jones", nickname="edsnickname")
|
||||||
print(ed_user.name)
|
print(ed_user.name)
|
||||||
print(ed_user.nickname)
|
print(ed_user.nickname)
|
||||||
print(str(ed_user.id))
|
print(str(ed_user.id))
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
from sqlalchemy.orm import sessionmaker
|
from sqlalchemy.orm import sessionmaker
|
||||||
|
|
||||||
Session = sessionmaker(bind=engine)
|
Session = sessionmaker(bind=engine)
|
||||||
session = Session()
|
session = Session()
|
||||||
|
|
||||||
|
@ -60,24 +69,27 @@ session = Session()
|
||||||
# # Adding and Updating Objects
|
# # Adding and Updating Objects
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
ed_user = User(name='ed', fullname='Ed Jones', nickname='edsnickname')
|
ed_user = User(name="ed", fullname="Ed Jones", nickname="edsnickname")
|
||||||
session.add(ed_user)
|
session.add(ed_user)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
our_user = session.query(User).filter_by(name='ed').first()
|
our_user = session.query(User).filter_by(name="ed").first()
|
||||||
print(our_user)
|
print(our_user)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
print(ed_user is our_user)
|
print(ed_user is our_user)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
session.add_all([
|
session.add_all(
|
||||||
User(name='wendy', fullname='Wendy Williams', nickname='windy'),
|
[
|
||||||
User(name='mary', fullname='Mary Contrary', nickname='mary'),
|
User(name="wendy", fullname="Wendy Williams", nickname="windy"),
|
||||||
User(name='fred', fullname='Fred Flintstone', nickname='freddy')])
|
User(name="mary", fullname="Mary Contrary", nickname="mary"),
|
||||||
|
User(name="fred", fullname="Fred Flintstone", nickname="freddy"),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
ed_user.nickname = 'eddie'
|
ed_user.nickname = "eddie"
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
print(session.dirty)
|
print(session.dirty)
|
||||||
|
@ -95,14 +107,14 @@ print(ed_user.id)
|
||||||
# # Rolling back
|
# # Rolling back
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
ed_user.name = 'Edwardo'
|
ed_user.name = "Edwardo"
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
fake_user = User(name='fakeuser', fullname='Invalid', nickname='12345')
|
fake_user = User(name="fakeuser", fullname="Invalid", nickname="12345")
|
||||||
session.add(fake_user)
|
session.add(fake_user)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
session.query(User).filter(User.name.in_(['Edwardo', 'fakeuser'])).all()
|
session.query(User).filter(User.name.in_(["Edwardo", "fakeuser"])).all()
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
session.rollback()
|
session.rollback()
|
||||||
|
@ -110,4 +122,4 @@ print(ed_user.name)
|
||||||
print(fake_user in session)
|
print(fake_user in session)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
session.query(User).filter(User.name.in_(['ed', 'fakeuser'])).all()
|
session.query(User).filter(User.name.in_(["ed", "fakeuser"])).all()
|
||||||
|
|
|
@ -15,7 +15,7 @@ def get_call_data(usernames: Collection) -> pd.DataFrame:
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
usernames: List
|
usernames: Collection
|
||||||
A list of usernames to put into the WHERE condition.
|
A list of usernames to put into the WHERE condition.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
|
@ -39,7 +39,7 @@ def get_sms_data(usernames: Collection) -> pd.DataFrame:
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
usernames: List
|
usernames: Collection
|
||||||
A list of usernames to put into the WHERE condition.
|
A list of usernames to put into the WHERE condition.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
|
@ -111,9 +111,10 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
|
||||||
comm_features: pd.DataFrame
|
comm_features: pd.DataFrame
|
||||||
A list of communication features for every participant.
|
A list of communication features for every participant.
|
||||||
These are:
|
These are:
|
||||||
* the number of messages by type (received, sent),
|
* the number of calls by type (incoming, outgoing missed) and in total,
|
||||||
* the number of calls by type (incoming, outgoing missed), and
|
* the ratio of incoming and outgoing calls to the total number of calls,
|
||||||
* the duration of calls by type.
|
* the total and maximum duration of calls by type, and
|
||||||
|
* the number of messages by type (received, sent).
|
||||||
"""
|
"""
|
||||||
if "call_type" in comm_df:
|
if "call_type" in comm_df:
|
||||||
comm_counts = (
|
comm_counts = (
|
||||||
|
@ -122,16 +123,35 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
|
||||||
.rename(columns=call_types)
|
.rename(columns=call_types)
|
||||||
.add_prefix("no_")
|
.add_prefix("no_")
|
||||||
)
|
)
|
||||||
comm_duration = (
|
# Count calls by type.
|
||||||
|
comm_counts["no_all"] = comm_counts.sum(axis=1)
|
||||||
|
# Add a total count of calls.
|
||||||
|
comm_counts = comm_counts.assign(
|
||||||
|
no_incoming_ratio=lambda x: x.no_incoming / x.no_all,
|
||||||
|
no_outgoing_ratio=lambda x: x.no_outgoing / x.no_all,
|
||||||
|
)
|
||||||
|
# Ratio of incoming and outgoing calls to all calls.
|
||||||
|
comm_duration_total = (
|
||||||
comm_df.groupby(["participant_id", "call_type"])
|
comm_df.groupby(["participant_id", "call_type"])
|
||||||
.sum()["call_duration"]
|
.sum()["call_duration"]
|
||||||
.unstack()
|
.unstack()
|
||||||
.rename(columns=call_types)
|
.rename(columns=call_types)
|
||||||
.add_prefix("duration_")
|
.add_prefix("duration_total_")
|
||||||
)
|
)
|
||||||
comm_features = comm_counts.join(comm_duration)
|
# Total call duration by type.
|
||||||
|
comm_duration_max = (
|
||||||
|
comm_df.groupby(["participant_id", "call_type"])
|
||||||
|
.max()["call_duration"]
|
||||||
|
.unstack()
|
||||||
|
.rename(columns=call_types)
|
||||||
|
.add_prefix("duration_max_")
|
||||||
|
)
|
||||||
|
# Max call duration by type
|
||||||
|
comm_features = comm_counts.join(comm_duration_total)
|
||||||
|
comm_features = comm_features.join(comm_duration_max)
|
||||||
try:
|
try:
|
||||||
comm_features.drop(columns="duration_" + call_types[3], inplace=True)
|
comm_features.drop(columns="duration_total_" + call_types[3], inplace=True)
|
||||||
|
comm_features.drop(columns="duration_max_" + call_types[3], inplace=True)
|
||||||
# The missed calls are always of 0 duration.
|
# The missed calls are always of 0 duration.
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
|
@ -145,6 +165,21 @@ def count_comms(comm_df: pd.DataFrame) -> pd.DataFrame:
|
||||||
.add_prefix("no_")
|
.add_prefix("no_")
|
||||||
)
|
)
|
||||||
comm_features = comm_counts
|
comm_features = comm_counts
|
||||||
|
# TODO Add ratio of outgoing and incoming texts.
|
||||||
else:
|
else:
|
||||||
raise KeyError("The dataframe contains neither call_type or message_type")
|
raise KeyError("The dataframe contains neither call_type or message_type")
|
||||||
return comm_features
|
return comm_features
|
||||||
|
|
||||||
|
|
||||||
|
def contact_features():
|
||||||
|
# TODO Implement a method that takes a DF with enumerated contacts as argument and calculates:
|
||||||
|
# * Duration of calls per caller (for most common callers)
|
||||||
|
# * Determine work vs non-work contacts by work hours heuristics
|
||||||
|
# * Numer of people contacted
|
||||||
|
# And similarly for SMS.
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def calls_sms_features():
|
||||||
|
# TODO Relate the calls and sms data, such as comparing the number of (missed) calls and messages.
|
||||||
|
pass
|
||||||
|
|
|
@ -14,7 +14,7 @@ def get_screen_data(usernames: Collection) -> pd.DataFrame:
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
usernames: List
|
usernames: Collection
|
||||||
A list of usernames to put into the WHERE condition.
|
A list of usernames to put into the WHERE condition.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
|
|
|
@ -71,4 +71,5 @@ class CallsFeatures(unittest.TestCase):
|
||||||
|
|
||||||
def test_count_comms(self):
|
def test_count_comms(self):
|
||||||
self.features = count_comms(self.calls)
|
self.features = count_comms(self.calls)
|
||||||
|
print(self.features)
|
||||||
self.assertIsInstance(self.features, pd.DataFrame)
|
self.assertIsInstance(self.features, pd.DataFrame)
|
||||||
|
|
Loading…
Reference in New Issue