# --- # jupyter: # jupytext: # formats: ipynb,py:percent # text_representation: # extension: .py # format_name: percent # format_version: '1.3' # jupytext_version: 1.11.2 # kernelspec: # display_name: straw2analysis # language: python # name: straw2analysis # --- # %% import os import sys import matplotlib.pyplot as plt # %% import seaborn as sns nb_dir = os.path.split(os.getcwd())[0] if nb_dir not in sys.path: sys.path.append(nb_dir) # %% from features.communication import * # %% [markdown] # # Example of communication data and feature calculation # %% df_calls = get_call_data(["nokia_0000003"]) print(df_calls) # %% count_comms(df_calls) # %% df_sms = get_sms_data(["nokia_0000003"]) count_comms(df_sms) # %% [markdown] # # Call data # %% import participants.query_db # %% participants_inactive_usernames = participants.query_db.get_usernames() df_calls_inactive = get_call_data(participants_inactive_usernames) # %% df_calls_features = count_comms(df_calls_inactive) df_calls_features.head() # %% df_calls_features.describe() # %% calls_number = pd.wide_to_long( df_calls_features.reset_index(), i="participant_id", j="call_type", stubnames="no", sep="_", suffix="\D+", ) # %% sns.displot(calls_number, x="no", hue="call_type", binwidth=5, element="step", height=8) # %% calls_duration = pd.wide_to_long( df_calls_features.reset_index(), i="participant_id", j="call_type", stubnames="duration", sep="_", suffix="\D+", ) sns.displot( calls_duration, x="duration", hue="call_type", multiple="dodge", height=8, log_scale=(True, False), ) # %% [markdown] # ## Most frequent contacts by participant # %% df_calls_inactive = enumerate_contacts(df_calls_inactive) df_calls_inactive.tail() # %% df_calls_frequent = df_calls_inactive.query("contact_id < 5") # %% sns.boxplot(x="contact_id", y="freq", data=df_calls_frequent) # %% [markdown] # # SMS data # %% df_sms_inactive = get_sms_data(participants_inactive_usernames) df_sms_features = count_comms(df_sms_inactive) df_sms_features.describe() # %% sms_number = pd.wide_to_long( df_sms_features.reset_index(), i="participant_id", j="message_type", stubnames="no", sep="_", suffix="\D+", ) sns.displot( sms_number, x="no", hue="message_type", binwidth=5, element="step", height=8 )