36 lines
1.4 KiB
Python
36 lines
1.4 KiB
Python
from typing import List
|
|
|
|
import pandas as pd
|
|
|
|
from config.models import Call, Participant
|
|
from setup import db_engine, session
|
|
|
|
|
|
def get_call_data(usernames: List) -> pd.DataFrame:
|
|
query_calls = (
|
|
session.query(Call, Participant.username)
|
|
.filter(Participant.id == Call.participant_id)
|
|
.filter(Participant.username.in_(usernames))
|
|
)
|
|
with db_engine.connect() as connection:
|
|
df_calls = pd.read_sql(query_calls.statement, connection)
|
|
return df_calls
|
|
|
|
|
|
def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame:
|
|
"""" Count contacts (callers, senders) and enumerate them by their frequency. """
|
|
contact_counts = (
|
|
comm_df["trace"]
|
|
.value_counts(sort=True, ascending=False)
|
|
.to_frame(name="frequency")
|
|
)
|
|
# A frequency table of different traces (contacts).
|
|
contact_counts["contact_id"] = list(range(len(contact_counts.index)))
|
|
contact_code = contact_counts["contact_id"].to_dict()
|
|
# Create a dictionary translating traces into integers, enumerated by their frequency.
|
|
comm_df["contact_id"] = comm_df["trace"].astype("category")
|
|
# Transform to categorical data instead of a simple character column.
|
|
comm_df["contact_id"] = comm_df["contact_id"].cat.rename_categories(contact_code)
|
|
# Recode the contacts into integers from 0 to n_contacts, so that the first one is contacted the most often.
|
|
return comm_df
|