Add SMS data exploration and use Jupytext to save JupyterNotebooks as py scripts.
parent
056db73786
commit
10bdc8aa1d
|
@ -2,6 +2,6 @@
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (straw2analysis)" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (straw2analysis)" project-jdk-type="Python SDK" />
|
||||||
<component name="PyCharmProfessionalAdvertiser">
|
<component name="PyCharmProfessionalAdvertiser">
|
||||||
<option name="shown" value="false" />
|
<option name="shown" value="true" />
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
|
@ -8,7 +8,9 @@ dependencies:
|
||||||
- isort
|
- isort
|
||||||
- flake8
|
- flake8
|
||||||
- jupyterlab
|
- jupyterlab
|
||||||
|
- jupytext
|
||||||
- mypy
|
- mypy
|
||||||
|
- nodejs
|
||||||
- pandas
|
- pandas
|
||||||
- psycopg2
|
- psycopg2
|
||||||
- python-dotenv
|
- python-dotenv
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,126 @@
|
||||||
|
# ---
|
||||||
|
# jupyter:
|
||||||
|
# jupytext:
|
||||||
|
# formats: ipynb,py:percent
|
||||||
|
# text_representation:
|
||||||
|
# extension: .py
|
||||||
|
# format_name: percent
|
||||||
|
# format_version: '1.3'
|
||||||
|
# jupytext_version: 1.11.2
|
||||||
|
# kernelspec:
|
||||||
|
# display_name: straw2analysis
|
||||||
|
# language: python
|
||||||
|
# name: straw2analysis
|
||||||
|
# ---
|
||||||
|
|
||||||
|
# %%
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# %%
|
||||||
|
import seaborn as sns
|
||||||
|
|
||||||
|
nb_dir = os.path.split(os.getcwd())[0]
|
||||||
|
if nb_dir not in sys.path:
|
||||||
|
sys.path.append(nb_dir)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
from features.communication import *
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# # Example of communication data and feature calculation
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_calls = get_call_data(["nokia_0000003"])
|
||||||
|
print(df_calls)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
count_comms(df_calls)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_sms = get_sms_data(["nokia_0000003"])
|
||||||
|
count_comms(df_sms)
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# # Call data
|
||||||
|
|
||||||
|
# %%
|
||||||
|
import participants.query_db
|
||||||
|
|
||||||
|
# %%
|
||||||
|
participants_inactive_usernames = participants.query_db.get_usernames()
|
||||||
|
df_calls_inactive = get_call_data(participants_inactive_usernames)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_calls_features = count_comms(df_calls_inactive)
|
||||||
|
df_calls_features.head()
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_calls_features.describe()
|
||||||
|
|
||||||
|
# %%
|
||||||
|
calls_number = pd.wide_to_long(
|
||||||
|
df_calls_features.reset_index(),
|
||||||
|
i="participant_id",
|
||||||
|
j="call_type",
|
||||||
|
stubnames="no",
|
||||||
|
sep="_",
|
||||||
|
suffix="\D+",
|
||||||
|
)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
sns.displot(calls_number, x="no", hue="call_type", binwidth=5, element="step", height=8)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
calls_duration = pd.wide_to_long(
|
||||||
|
df_calls_features.reset_index(),
|
||||||
|
i="participant_id",
|
||||||
|
j="call_type",
|
||||||
|
stubnames="duration",
|
||||||
|
sep="_",
|
||||||
|
suffix="\D+",
|
||||||
|
)
|
||||||
|
sns.displot(
|
||||||
|
calls_duration,
|
||||||
|
x="duration",
|
||||||
|
hue="call_type",
|
||||||
|
multiple="dodge",
|
||||||
|
height=8,
|
||||||
|
log_scale=(True, False),
|
||||||
|
)
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# ## Most frequent contacts by participant
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_calls_inactive = enumerate_contacts(df_calls_inactive)
|
||||||
|
df_calls_inactive.tail()
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_calls_frequent = df_calls_inactive.query("contact_id < 5")
|
||||||
|
|
||||||
|
# %%
|
||||||
|
sns.boxplot(x="contact_id", y="freq", data=df_calls_frequent)
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# # SMS data
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_sms_inactive = get_sms_data(participants_inactive_usernames)
|
||||||
|
df_sms_features = count_comms(df_sms_inactive)
|
||||||
|
df_sms_features.describe()
|
||||||
|
|
||||||
|
# %%
|
||||||
|
sms_number = pd.wide_to_long(
|
||||||
|
df_sms_features.reset_index(),
|
||||||
|
i="participant_id",
|
||||||
|
j="message_type",
|
||||||
|
stubnames="no",
|
||||||
|
sep="_",
|
||||||
|
suffix="\D+",
|
||||||
|
)
|
||||||
|
sns.displot(
|
||||||
|
sms_number, x="no", hue="message_type", binwidth=5, element="step", height=8
|
||||||
|
)
|
|
@ -231,6 +231,9 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"jupytext": {
|
||||||
|
"formats": "ipynb,auto:percent"
|
||||||
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "straw2analysis",
|
"display_name": "straw2analysis",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
# ---
|
||||||
|
# jupyter:
|
||||||
|
# jupytext:
|
||||||
|
# formats: ipynb,py:percent
|
||||||
|
# text_representation:
|
||||||
|
# extension: .py
|
||||||
|
# format_name: percent
|
||||||
|
# format_version: '1.3'
|
||||||
|
# jupytext_version: 1.11.2
|
||||||
|
# kernelspec:
|
||||||
|
# display_name: straw2analysis
|
||||||
|
# language: python
|
||||||
|
# name: straw2analysis
|
||||||
|
# ---
|
||||||
|
|
||||||
|
# %%
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from tabulate import tabulate
|
||||||
|
|
||||||
|
nb_dir = os.path.split(os.getcwd())[0]
|
||||||
|
if nb_dir not in sys.path:
|
||||||
|
sys.path.append(nb_dir)
|
||||||
|
|
||||||
|
import participants.query_db
|
||||||
|
|
||||||
|
# %%
|
||||||
|
from features.screen import *
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_screen_nokia = get_screen_data(["nokia_0000003"])
|
||||||
|
|
||||||
|
# %%
|
||||||
|
print(df_screen_nokia)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
participants_inactive_usernames = participants.query_db.get_usernames()
|
||||||
|
df_screen_inactive = get_screen_data(participants_inactive_usernames)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
df_screen_inactive["screen_status"] = (
|
||||||
|
df_screen_inactive["screen_status"]
|
||||||
|
.astype("category")
|
||||||
|
.cat.rename_categories(screen_status)
|
||||||
|
)
|
||||||
|
screen_freq = df_screen_inactive.value_counts("screen_status")
|
||||||
|
tabulate(screen_freq.to_frame(), tablefmt="html")
|
||||||
|
|
||||||
|
# %%
|
||||||
|
screen_status
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# A typical sequence might be: off -> locked -> on -> unlocked (0 -> 2 -> 1 -> 3)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
status_diff = df_screen_nokia.sort_values("timestamp")["screen_status"].diff()
|
||||||
|
status_diff.value_counts().to_frame()
|
||||||
|
|
||||||
|
# %% [markdown]
|
||||||
|
# But I have also seen off -> on -> unlocked (with 2 - locked missing) and off -> locked -> on -> off -> locked (*again*).
|
|
@ -2,7 +2,7 @@ from typing import List
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from config.models import Screen, Participant
|
from config.models import Participant, Screen
|
||||||
from setup import db_engine, session
|
from setup import db_engine, session
|
||||||
|
|
||||||
screen_status = {0: "off", 1: "on", 2: "locked", 3: "unlocked"}
|
screen_status = {0: "off", 1: "on", 2: "locked", 3: "unlocked"}
|
||||||
|
|
Loading…
Reference in New Issue