from typing import List import pandas as pd from config.models import Call, Participant from setup import db_engine, session def get_call_data(usernames: List) -> pd.DataFrame: query_calls = ( session.query(Call, Participant.username) .filter(Participant.id == Call.participant_id) .filter(Participant.username.in_(usernames)) ) with db_engine.connect() as connection: df_calls = pd.read_sql(query_calls.statement, connection) return df_calls def enumerate_contacts(comm_df: pd.DataFrame) -> pd.DataFrame: """" Count contacts (callers, senders) and enumerate them by their frequency. """ contact_counts = comm_df["trace"].value_counts(sort=True, ascending=False).to_frame(name="frequency") # A frequency table of different traces (contacts). contact_counts["contact_id"] = list(range(len(contact_counts.index))) contact_code = contact_counts["contact_id"].to_dict() # Create a dictionary translating traces into integers, enumerated by their frequency. comm_df["contact_id"] = comm_df["trace"].astype("category") # Transform to categorical data instead of a simple character column. comm_df["contact_id"] = comm_df["contact_id"].cat.rename_categories(contact_code) # Recode the contacts into integers from 0 to n_contacts, so that the first one is contacted the most often. return comm_df