additional communication features

communication
Ivan Kobe 2021-08-04 13:45:54 +02:00 committed by junos
parent d8899fa75b
commit 4ac5f37c19
5 changed files with 1505 additions and 0 deletions

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,393 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"nb_dir = os.path.split(os.getcwd())[0]\n",
"if nb_dir not in sys.path:\n",
" sys.path.append(nb_dir)\n",
" \n",
"from features.communication import *\n",
"import participants.query_db\n",
"\n",
"participants_inactive_usernames = participants.query_db.get_usernames()\n",
"df_sms = get_sms_data(participants_inactive_usernames)\n",
"df_calls = get_call_data(participants_inactive_usernames)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
" count_calls = count_comms(df_calls)\n",
" count_sms = count_comms(df_sms)\n",
"\n",
" count_joined = count_calls.merge(\n",
" count_sms, on=\"participant_id\", suffixes=(\"_calls\", \"_sms\")\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>participant_id</th>\n",
" <th>no_incoming</th>\n",
" <th>no_outgoing</th>\n",
" <th>no_missed</th>\n",
" <th>no_all_calls</th>\n",
" <th>no_incoming_ratio</th>\n",
" <th>no_outgoing_ratio</th>\n",
" <th>duration_total_incoming</th>\n",
" <th>duration_total_outgoing</th>\n",
" <th>duration_max_incoming</th>\n",
" <th>duration_max_outgoing</th>\n",
" <th>no_received</th>\n",
" <th>no_sent</th>\n",
" <th>no_all_sms</th>\n",
" <th>no_received_ratio</th>\n",
" <th>no_sent_ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13</td>\n",
" <td>3.0</td>\n",
" <td>21.0</td>\n",
" <td>2.0</td>\n",
" <td>26.0</td>\n",
" <td>0.115385</td>\n",
" <td>0.807692</td>\n",
" <td>342.0</td>\n",
" <td>2836.0</td>\n",
" <td>196.0</td>\n",
" <td>355.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" <td>14.0</td>\n",
" <td>0.500000</td>\n",
" <td>0.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>14</td>\n",
" <td>16.0</td>\n",
" <td>22.0</td>\n",
" <td>11.0</td>\n",
" <td>49.0</td>\n",
" <td>0.326531</td>\n",
" <td>0.448980</td>\n",
" <td>1873.0</td>\n",
" <td>2789.0</td>\n",
" <td>346.0</td>\n",
" <td>694.0</td>\n",
" <td>20.0</td>\n",
" <td>14.0</td>\n",
" <td>34.0</td>\n",
" <td>0.588235</td>\n",
" <td>0.411765</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>15</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>0.600000</td>\n",
" <td>0.400000</td>\n",
" <td>310.0</td>\n",
" <td>19.0</td>\n",
" <td>154.0</td>\n",
" <td>19.0</td>\n",
" <td>73.0</td>\n",
" <td>73.0</td>\n",
" <td>146.0</td>\n",
" <td>0.500000</td>\n",
" <td>0.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>16</td>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" <td>3.0</td>\n",
" <td>13.0</td>\n",
" <td>0.307692</td>\n",
" <td>0.461538</td>\n",
" <td>1963.0</td>\n",
" <td>849.0</td>\n",
" <td>1037.0</td>\n",
" <td>638.0</td>\n",
" <td>8.0</td>\n",
" <td>2.0</td>\n",
" <td>10.0</td>\n",
" <td>0.800000</td>\n",
" <td>0.200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>17</td>\n",
" <td>20.0</td>\n",
" <td>60.0</td>\n",
" <td>8.0</td>\n",
" <td>88.0</td>\n",
" <td>0.227273</td>\n",
" <td>0.681818</td>\n",
" <td>5789.0</td>\n",
" <td>17046.0</td>\n",
" <td>1966.0</td>\n",
" <td>3830.0</td>\n",
" <td>7.0</td>\n",
" <td>1.0</td>\n",
" <td>8.0</td>\n",
" <td>0.875000</td>\n",
" <td>0.125000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>91</td>\n",
" <td>15.0</td>\n",
" <td>13.0</td>\n",
" <td>3.0</td>\n",
" <td>31.0</td>\n",
" <td>0.483871</td>\n",
" <td>0.419355</td>\n",
" <td>3443.0</td>\n",
" <td>3636.0</td>\n",
" <td>644.0</td>\n",
" <td>1315.0</td>\n",
" <td>83.0</td>\n",
" <td>44.0</td>\n",
" <td>127.0</td>\n",
" <td>0.653543</td>\n",
" <td>0.346457</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>92</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>8.0</td>\n",
" <td>0.375000</td>\n",
" <td>0.500000</td>\n",
" <td>231.0</td>\n",
" <td>648.0</td>\n",
" <td>167.0</td>\n",
" <td>433.0</td>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" <td>10.0</td>\n",
" <td>0.400000</td>\n",
" <td>0.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>93</td>\n",
" <td>22.0</td>\n",
" <td>20.0</td>\n",
" <td>9.0</td>\n",
" <td>51.0</td>\n",
" <td>0.431373</td>\n",
" <td>0.392157</td>\n",
" <td>2534.0</td>\n",
" <td>1444.0</td>\n",
" <td>443.0</td>\n",
" <td>672.0</td>\n",
" <td>48.0</td>\n",
" <td>19.0</td>\n",
" <td>67.0</td>\n",
" <td>0.716418</td>\n",
" <td>0.283582</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>106</td>\n",
" <td>12.0</td>\n",
" <td>30.0</td>\n",
" <td>6.0</td>\n",
" <td>48.0</td>\n",
" <td>0.250000</td>\n",
" <td>0.625000</td>\n",
" <td>3049.0</td>\n",
" <td>2637.0</td>\n",
" <td>878.0</td>\n",
" <td>380.0</td>\n",
" <td>10.0</td>\n",
" <td>10.0</td>\n",
" <td>20.0</td>\n",
" <td>0.500000</td>\n",
" <td>0.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>107</td>\n",
" <td>11.0</td>\n",
" <td>42.0</td>\n",
" <td>13.0</td>\n",
" <td>66.0</td>\n",
" <td>0.166667</td>\n",
" <td>0.636364</td>\n",
" <td>3804.0</td>\n",
" <td>9977.0</td>\n",
" <td>1519.0</td>\n",
" <td>1943.0</td>\n",
" <td>80.0</td>\n",
" <td>176.0</td>\n",
" <td>256.0</td>\n",
" <td>0.312500</td>\n",
" <td>0.687500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>61 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" participant_id no_incoming no_outgoing no_missed no_all_calls \\\n",
"0 13 3.0 21.0 2.0 26.0 \n",
"1 14 16.0 22.0 11.0 49.0 \n",
"2 15 3.0 2.0 NaN 5.0 \n",
"3 16 4.0 6.0 3.0 13.0 \n",
"4 17 20.0 60.0 8.0 88.0 \n",
".. ... ... ... ... ... \n",
"56 91 15.0 13.0 3.0 31.0 \n",
"57 92 3.0 4.0 1.0 8.0 \n",
"58 93 22.0 20.0 9.0 51.0 \n",
"59 106 12.0 30.0 6.0 48.0 \n",
"60 107 11.0 42.0 13.0 66.0 \n",
"\n",
" no_incoming_ratio no_outgoing_ratio duration_total_incoming \\\n",
"0 0.115385 0.807692 342.0 \n",
"1 0.326531 0.448980 1873.0 \n",
"2 0.600000 0.400000 310.0 \n",
"3 0.307692 0.461538 1963.0 \n",
"4 0.227273 0.681818 5789.0 \n",
".. ... ... ... \n",
"56 0.483871 0.419355 3443.0 \n",
"57 0.375000 0.500000 231.0 \n",
"58 0.431373 0.392157 2534.0 \n",
"59 0.250000 0.625000 3049.0 \n",
"60 0.166667 0.636364 3804.0 \n",
"\n",
" duration_total_outgoing duration_max_incoming duration_max_outgoing \\\n",
"0 2836.0 196.0 355.0 \n",
"1 2789.0 346.0 694.0 \n",
"2 19.0 154.0 19.0 \n",
"3 849.0 1037.0 638.0 \n",
"4 17046.0 1966.0 3830.0 \n",
".. ... ... ... \n",
"56 3636.0 644.0 1315.0 \n",
"57 648.0 167.0 433.0 \n",
"58 1444.0 443.0 672.0 \n",
"59 2637.0 878.0 380.0 \n",
"60 9977.0 1519.0 1943.0 \n",
"\n",
" no_received no_sent no_all_sms no_received_ratio no_sent_ratio \n",
"0 7.0 7.0 14.0 0.500000 0.500000 \n",
"1 20.0 14.0 34.0 0.588235 0.411765 \n",
"2 73.0 73.0 146.0 0.500000 0.500000 \n",
"3 8.0 2.0 10.0 0.800000 0.200000 \n",
"4 7.0 1.0 8.0 0.875000 0.125000 \n",
".. ... ... ... ... ... \n",
"56 83.0 44.0 127.0 0.653543 0.346457 \n",
"57 4.0 6.0 10.0 0.400000 0.600000 \n",
"58 48.0 19.0 67.0 0.716418 0.283582 \n",
"59 10.0 10.0 20.0 0.500000 0.500000 \n",
"60 80.0 176.0 256.0 0.312500 0.687500 \n",
"\n",
"[61 rows x 16 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"count_joined.reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "straw2analysis",
"language": "python",
"name": "straw2analysis"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

315
Untitled.ipynb 100644
View File

@ -0,0 +1,315 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"nb_dir = os.path.split(os.getcwd())[0]\n",
"if nb_dir not in sys.path:\n",
" sys.path.append(nb_dir)\n",
" \n",
"from features.communication import *\n",
"import participants.query_db\n",
"\n",
"participants_inactive_usernames = participants.query_db.get_usernames()\n",
"df_sms = get_sms_data(participants_inactive_usernames)\n",
"df_calls = get_call_data(participants_inactive_usernames)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_calls"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>_id</th>\n",
" <th>timestamp</th>\n",
" <th>device_id</th>\n",
" <th>call_type</th>\n",
" <th>call_duration</th>\n",
" <th>trace</th>\n",
" <th>participant_id</th>\n",
" <th>username</th>\n",
" <th>freq</th>\n",
" <th>contact_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>118</td>\n",
" <td>1</td>\n",
" <td>1581936081010</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>176</td>\n",
" <td>3</td>\n",
" <td>1582295247982</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>1</td>\n",
" <td>196</td>\n",
" <td>87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>209</td>\n",
" <td>4</td>\n",
" <td>1582305634014</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>237</td>\n",
" <td>87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>210</td>\n",
" <td>5</td>\n",
" <td>1582561530334</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>126</td>\n",
" <td>87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>253</td>\n",
" <td>6</td>\n",
" <td>1582627576077</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>255</td>\n",
" <td>87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4645</th>\n",
" <td>5874</td>\n",
" <td>131</td>\n",
" <td>1625602519843</td>\n",
" <td>4b62a655-cbf0-4ac0-a448-06726f45b56a</td>\n",
" <td>2</td>\n",
" <td>2270</td>\n",
" <td>f4ebca8dc7305fe424d6bf7fbcd2e5086f98b453</td>\n",
" <td>90</td>\n",
" <td>uploader_53573</td>\n",
" <td>6</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4646</th>\n",
" <td>5882</td>\n",
" <td>139</td>\n",
" <td>1625753023456</td>\n",
" <td>4b62a655-cbf0-4ac0-a448-06726f45b56a</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>2e5d63f6fddca2b66be810b5946c42eda24f2dbe</td>\n",
" <td>90</td>\n",
" <td>uploader_53573</td>\n",
" <td>2</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4647</th>\n",
" <td>5883</td>\n",
" <td>140</td>\n",
" <td>1625754998767</td>\n",
" <td>4b62a655-cbf0-4ac0-a448-06726f45b56a</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>2e5d63f6fddca2b66be810b5946c42eda24f2dbe</td>\n",
" <td>90</td>\n",
" <td>uploader_53573</td>\n",
" <td>2</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4648</th>\n",
" <td>5884</td>\n",
" <td>141</td>\n",
" <td>1625823008392</td>\n",
" <td>4b62a655-cbf0-4ac0-a448-06726f45b56a</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>7316d58b7bb7de097a2421c56010ac024a489451</td>\n",
" <td>90</td>\n",
" <td>uploader_53573</td>\n",
" <td>1</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4649</th>\n",
" <td>5903</td>\n",
" <td>158</td>\n",
" <td>1626110930233</td>\n",
" <td>4b62a655-cbf0-4ac0-a448-06726f45b56a</td>\n",
" <td>2</td>\n",
" <td>53</td>\n",
" <td>7db4e9acf7c73837ddecdae5da523a28c774ba94</td>\n",
" <td>90</td>\n",
" <td>uploader_53573</td>\n",
" <td>1</td>\n",
" <td>24</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4650 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" id _id timestamp device_id \\\n",
"0 118 1 1581936081010 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"1 176 3 1582295247982 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"2 209 4 1582305634014 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"3 210 5 1582561530334 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"4 253 6 1582627576077 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"... ... ... ... ... \n",
"4645 5874 131 1625602519843 4b62a655-cbf0-4ac0-a448-06726f45b56a \n",
"4646 5882 139 1625753023456 4b62a655-cbf0-4ac0-a448-06726f45b56a \n",
"4647 5883 140 1625754998767 4b62a655-cbf0-4ac0-a448-06726f45b56a \n",
"4648 5884 141 1625823008392 4b62a655-cbf0-4ac0-a448-06726f45b56a \n",
"4649 5903 158 1626110930233 4b62a655-cbf0-4ac0-a448-06726f45b56a \n",
"\n",
" call_type call_duration trace \\\n",
"0 2 0 87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b \n",
"1 1 196 87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b \n",
"2 2 237 87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b \n",
"3 2 126 87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b \n",
"4 2 255 87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b \n",
"... ... ... ... \n",
"4645 2 2270 f4ebca8dc7305fe424d6bf7fbcd2e5086f98b453 \n",
"4646 3 0 2e5d63f6fddca2b66be810b5946c42eda24f2dbe \n",
"4647 3 0 2e5d63f6fddca2b66be810b5946c42eda24f2dbe \n",
"4648 2 0 7316d58b7bb7de097a2421c56010ac024a489451 \n",
"4649 2 53 7db4e9acf7c73837ddecdae5da523a28c774ba94 \n",
"\n",
" participant_id username freq contact_id \n",
"0 13 uploader_20449 21 0 \n",
"1 13 uploader_20449 21 0 \n",
"2 13 uploader_20449 21 0 \n",
"3 13 uploader_20449 21 0 \n",
"4 13 uploader_20449 21 0 \n",
"... ... ... ... ... \n",
"4645 90 uploader_53573 6 9 \n",
"4646 90 uploader_53573 2 13 \n",
"4647 90 uploader_53573 2 13 \n",
"4648 90 uploader_53573 1 21 \n",
"4649 90 uploader_53573 1 24 \n",
"\n",
"[4650 rows x 11 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"contact_features(enumerate_contacts(df_calls))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "straw2analysis",
"language": "python",
"name": "straw2analysis"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

788
Untitled1.ipynb 100644
View File

@ -0,0 +1,788 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"nb_dir = os.path.split(os.getcwd())[0]\n",
"if nb_dir not in sys.path:\n",
" sys.path.append(nb_dir)\n",
" \n",
"from features.communication import *\n",
"import participants.query_db\n",
"\n",
"participants_inactive_usernames = participants.query_db.get_usernames()\n",
"df_sms = get_sms_data(participants_inactive_usernames)\n",
"df_calls = get_call_data(participants_inactive_usernames)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>_id</th>\n",
" <th>timestamp</th>\n",
" <th>device_id</th>\n",
" <th>call_type</th>\n",
" <th>call_duration</th>\n",
" <th>trace</th>\n",
" <th>participant_id</th>\n",
" <th>username</th>\n",
" <th>freq</th>\n",
" <th>contact_id</th>\n",
" <th>total_call_duration</th>\n",
" <th>no_contacts</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>118</td>\n",
" <td>1</td>\n",
" <td>1581936081010</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" <td>2844</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>176</td>\n",
" <td>3</td>\n",
" <td>1582295247982</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>1</td>\n",
" <td>196</td>\n",
" <td>87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" <td>2844</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>209</td>\n",
" <td>4</td>\n",
" <td>1582305634014</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>237</td>\n",
" <td>87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" <td>2844</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>210</td>\n",
" <td>5</td>\n",
" <td>1582561530334</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>126</td>\n",
" <td>87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" <td>2844</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>253</td>\n",
" <td>6</td>\n",
" <td>1582627576077</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>255</td>\n",
" <td>87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" <td>2844</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4352</th>\n",
" <td>5560</td>\n",
" <td>59</td>\n",
" <td>1621275689589</td>\n",
" <td>9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>5eb72fe829c2af4a654007220119bdcf47499555</td>\n",
" <td>107</td>\n",
" <td>uploader_89606</td>\n",
" <td>1</td>\n",
" <td>17</td>\n",
" <td>0</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4336</th>\n",
" <td>5443</td>\n",
" <td>29</td>\n",
" <td>1620746142636</td>\n",
" <td>9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e</td>\n",
" <td>2</td>\n",
" <td>189</td>\n",
" <td>6dd761532337dfe596eb2e34f4c91216b38e28e2</td>\n",
" <td>107</td>\n",
" <td>uploader_89606</td>\n",
" <td>1</td>\n",
" <td>18</td>\n",
" <td>189</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4316</th>\n",
" <td>5237</td>\n",
" <td>10</td>\n",
" <td>1620140109908</td>\n",
" <td>9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e</td>\n",
" <td>2</td>\n",
" <td>85</td>\n",
" <td>9c4eab1dfc0114aecd64a7f594977acc9ab7936c</td>\n",
" <td>107</td>\n",
" <td>uploader_89606</td>\n",
" <td>1</td>\n",
" <td>19</td>\n",
" <td>85</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4347</th>\n",
" <td>5524</td>\n",
" <td>44</td>\n",
" <td>1620971679122</td>\n",
" <td>9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e</td>\n",
" <td>1</td>\n",
" <td>120</td>\n",
" <td>a9fa73b6137d09288429de20172095978730e4b8</td>\n",
" <td>107</td>\n",
" <td>uploader_89606</td>\n",
" <td>1</td>\n",
" <td>20</td>\n",
" <td>120</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4331</th>\n",
" <td>5364</td>\n",
" <td>26</td>\n",
" <td>1620630328635</td>\n",
" <td>9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e</td>\n",
" <td>2</td>\n",
" <td>184</td>\n",
" <td>cfe98eee4a27b377f4cde1ea5c39d24d0475b533</td>\n",
" <td>107</td>\n",
" <td>uploader_89606</td>\n",
" <td>1</td>\n",
" <td>21</td>\n",
" <td>184</td>\n",
" <td>22</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4650 rows × 13 columns</p>\n",
"</div>"
],
"text/plain": [
" id _id timestamp device_id \\\n",
"0 118 1 1581936081010 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"1 176 3 1582295247982 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"2 209 4 1582305634014 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"3 210 5 1582561530334 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"4 253 6 1582627576077 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"... ... ... ... ... \n",
"4352 5560 59 1621275689589 9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e \n",
"4336 5443 29 1620746142636 9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e \n",
"4316 5237 10 1620140109908 9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e \n",
"4347 5524 44 1620971679122 9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e \n",
"4331 5364 26 1620630328635 9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e \n",
"\n",
" call_type call_duration trace \\\n",
"0 2 0 87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b \n",
"1 1 196 87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b \n",
"2 2 237 87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b \n",
"3 2 126 87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b \n",
"4 2 255 87ae5eb2c5b7fe30bea2821e2ec052453d89ea6b \n",
"... ... ... ... \n",
"4352 2 0 5eb72fe829c2af4a654007220119bdcf47499555 \n",
"4336 2 189 6dd761532337dfe596eb2e34f4c91216b38e28e2 \n",
"4316 2 85 9c4eab1dfc0114aecd64a7f594977acc9ab7936c \n",
"4347 1 120 a9fa73b6137d09288429de20172095978730e4b8 \n",
"4331 2 184 cfe98eee4a27b377f4cde1ea5c39d24d0475b533 \n",
"\n",
" participant_id username freq contact_id total_call_duration \\\n",
"0 13 uploader_20449 21 0 2844 \n",
"1 13 uploader_20449 21 0 2844 \n",
"2 13 uploader_20449 21 0 2844 \n",
"3 13 uploader_20449 21 0 2844 \n",
"4 13 uploader_20449 21 0 2844 \n",
"... ... ... ... ... ... \n",
"4352 107 uploader_89606 1 17 0 \n",
"4336 107 uploader_89606 1 18 189 \n",
"4316 107 uploader_89606 1 19 85 \n",
"4347 107 uploader_89606 1 20 120 \n",
"4331 107 uploader_89606 1 21 184 \n",
"\n",
" no_contacts \n",
"0 5 \n",
"1 5 \n",
"2 5 \n",
"3 5 \n",
"4 5 \n",
"... ... \n",
"4352 22 \n",
"4336 22 \n",
"4316 22 \n",
"4347 22 \n",
"4331 22 \n",
"\n",
"[4650 rows x 13 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"contact_features(enumerate_contacts(df_calls))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>_id</th>\n",
" <th>timestamp</th>\n",
" <th>device_id</th>\n",
" <th>message_type</th>\n",
" <th>trace</th>\n",
" <th>participant_id</th>\n",
" <th>username</th>\n",
" <th>freq</th>\n",
" <th>contact_id</th>\n",
" <th>no_contacts</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>195</th>\n",
" <td>379</td>\n",
" <td>7</td>\n",
" <td>1582964434597</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>417b9c87f5b573530bcffba8577777b3a964d671</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>196</th>\n",
" <td>380</td>\n",
" <td>8</td>\n",
" <td>1582964434974</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>417b9c87f5b573530bcffba8577777b3a964d671</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>197</th>\n",
" <td>382</td>\n",
" <td>10</td>\n",
" <td>1582965988609</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>417b9c87f5b573530bcffba8577777b3a964d671</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>383</td>\n",
" <td>11</td>\n",
" <td>1582965988873</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>417b9c87f5b573530bcffba8577777b3a964d671</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>199</th>\n",
" <td>396</td>\n",
" <td>12</td>\n",
" <td>1582965988873</td>\n",
" <td>78082f9f-98c2-468d-b4a2-7c835bd812bd</td>\n",
" <td>2</td>\n",
" <td>417b9c87f5b573530bcffba8577777b3a964d671</td>\n",
" <td>13</td>\n",
" <td>uploader_20449</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5193</th>\n",
" <td>6137</td>\n",
" <td>4</td>\n",
" <td>1619789360665</td>\n",
" <td>9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e</td>\n",
" <td>2</td>\n",
" <td>2340c1d2b9e5d550373423a599014468a4dc3678</td>\n",
" <td>107</td>\n",
" <td>uploader_89606</td>\n",
" <td>3</td>\n",
" <td>12</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5194</th>\n",
" <td>6135</td>\n",
" <td>2</td>\n",
" <td>1619787273829</td>\n",
" <td>9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e</td>\n",
" <td>1</td>\n",
" <td>2340c1d2b9e5d550373423a599014468a4dc3678</td>\n",
" <td>107</td>\n",
" <td>uploader_89606</td>\n",
" <td>3</td>\n",
" <td>12</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5417</th>\n",
" <td>6690</td>\n",
" <td>210</td>\n",
" <td>1620980437198</td>\n",
" <td>9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e</td>\n",
" <td>1</td>\n",
" <td>198d7e63a2b4d8a7ca9bb92f74e6974ca17edc56</td>\n",
" <td>107</td>\n",
" <td>uploader_89606</td>\n",
" <td>1</td>\n",
" <td>13</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5447</th>\n",
" <td>6770</td>\n",
" <td>256</td>\n",
" <td>1621407668019</td>\n",
" <td>9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e</td>\n",
" <td>1</td>\n",
" <td>d4a67b53e704247de47064850efd3647e8dcaffb</td>\n",
" <td>107</td>\n",
" <td>uploader_89606</td>\n",
" <td>1</td>\n",
" <td>14</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5440</th>\n",
" <td>6742</td>\n",
" <td>248</td>\n",
" <td>1621253313544</td>\n",
" <td>9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e</td>\n",
" <td>1</td>\n",
" <td>ffbc6a5f0f601cf2d9cdad9d3a588633e1a1967c</td>\n",
" <td>107</td>\n",
" <td>uploader_89606</td>\n",
" <td>1</td>\n",
" <td>15</td>\n",
" <td>16</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5864 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" id _id timestamp device_id \\\n",
"195 379 7 1582964434597 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"196 380 8 1582964434974 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"197 382 10 1582965988609 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"198 383 11 1582965988873 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"199 396 12 1582965988873 78082f9f-98c2-468d-b4a2-7c835bd812bd \n",
"... ... ... ... ... \n",
"5193 6137 4 1619789360665 9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e \n",
"5194 6135 2 1619787273829 9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e \n",
"5417 6690 210 1620980437198 9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e \n",
"5447 6770 256 1621407668019 9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e \n",
"5440 6742 248 1621253313544 9f54e35c-d7cb-4f4c-8dc1-17dc86f2635e \n",
"\n",
" message_type trace participant_id \\\n",
"195 2 417b9c87f5b573530bcffba8577777b3a964d671 13 \n",
"196 2 417b9c87f5b573530bcffba8577777b3a964d671 13 \n",
"197 2 417b9c87f5b573530bcffba8577777b3a964d671 13 \n",
"198 2 417b9c87f5b573530bcffba8577777b3a964d671 13 \n",
"199 2 417b9c87f5b573530bcffba8577777b3a964d671 13 \n",
"... ... ... ... \n",
"5193 2 2340c1d2b9e5d550373423a599014468a4dc3678 107 \n",
"5194 1 2340c1d2b9e5d550373423a599014468a4dc3678 107 \n",
"5417 1 198d7e63a2b4d8a7ca9bb92f74e6974ca17edc56 107 \n",
"5447 1 d4a67b53e704247de47064850efd3647e8dcaffb 107 \n",
"5440 1 ffbc6a5f0f601cf2d9cdad9d3a588633e1a1967c 107 \n",
"\n",
" username freq contact_id no_contacts \n",
"195 uploader_20449 5 0 6 \n",
"196 uploader_20449 5 0 6 \n",
"197 uploader_20449 5 0 6 \n",
"198 uploader_20449 5 0 6 \n",
"199 uploader_20449 5 0 6 \n",
"... ... ... ... ... \n",
"5193 uploader_89606 3 12 16 \n",
"5194 uploader_89606 3 12 16 \n",
"5417 uploader_89606 1 13 16 \n",
"5447 uploader_89606 1 14 16 \n",
"5440 uploader_89606 1 15 16 \n",
"\n",
"[5864 rows x 11 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"contact_features(enumerate_contacts(df_sms))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>participant_id</th>\n",
" <th>no_calls_no_sms_ratio</th>\n",
" <th>no_incoming_calls_no_recieved_sms_ratio</th>\n",
" <th>no_outgoing_calls_no_sent_sms_ratio</th>\n",
" <th>no_calls_contacts_no_sms_contacts_ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13</td>\n",
" <td>0.650000</td>\n",
" <td>0.700000</td>\n",
" <td>0.750000</td>\n",
" <td>0.454545</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>14</td>\n",
" <td>0.590361</td>\n",
" <td>0.555556</td>\n",
" <td>0.611111</td>\n",
" <td>0.714286</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>15</td>\n",
" <td>0.033113</td>\n",
" <td>0.960526</td>\n",
" <td>0.026667</td>\n",
" <td>0.173913</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>16</td>\n",
" <td>0.565217</td>\n",
" <td>0.666667</td>\n",
" <td>0.750000</td>\n",
" <td>0.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>17</td>\n",
" <td>0.916667</td>\n",
" <td>0.259259</td>\n",
" <td>0.983607</td>\n",
" <td>0.857143</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>91</td>\n",
" <td>0.196203</td>\n",
" <td>0.846939</td>\n",
" <td>0.228070</td>\n",
" <td>0.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>92</td>\n",
" <td>0.444444</td>\n",
" <td>0.571429</td>\n",
" <td>0.400000</td>\n",
" <td>0.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>93</td>\n",
" <td>0.432203</td>\n",
" <td>0.685714</td>\n",
" <td>0.512821</td>\n",
" <td>0.428571</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>106</td>\n",
" <td>0.705882</td>\n",
" <td>0.454545</td>\n",
" <td>0.750000</td>\n",
" <td>0.769231</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>107</td>\n",
" <td>0.204969</td>\n",
" <td>0.879121</td>\n",
" <td>0.192661</td>\n",
" <td>0.578947</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>61 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" participant_id no_calls_no_sms_ratio \\\n",
"0 13 0.650000 \n",
"1 14 0.590361 \n",
"2 15 0.033113 \n",
"3 16 0.565217 \n",
"4 17 0.916667 \n",
".. ... ... \n",
"56 91 0.196203 \n",
"57 92 0.444444 \n",
"58 93 0.432203 \n",
"59 106 0.705882 \n",
"60 107 0.204969 \n",
"\n",
" no_incoming_calls_no_recieved_sms_ratio \\\n",
"0 0.700000 \n",
"1 0.555556 \n",
"2 0.960526 \n",
"3 0.666667 \n",
"4 0.259259 \n",
".. ... \n",
"56 0.846939 \n",
"57 0.571429 \n",
"58 0.685714 \n",
"59 0.454545 \n",
"60 0.879121 \n",
"\n",
" no_outgoing_calls_no_sent_sms_ratio \\\n",
"0 0.750000 \n",
"1 0.611111 \n",
"2 0.026667 \n",
"3 0.750000 \n",
"4 0.983607 \n",
".. ... \n",
"56 0.228070 \n",
"57 0.400000 \n",
"58 0.512821 \n",
"59 0.750000 \n",
"60 0.192661 \n",
"\n",
" no_calls_contacts_no_sms_contacts_ratio \n",
"0 0.454545 \n",
"1 0.714286 \n",
"2 0.173913 \n",
"3 0.666667 \n",
"4 0.857143 \n",
".. ... \n",
"56 0.666667 \n",
"57 0.600000 \n",
"58 0.428571 \n",
"59 0.769231 \n",
"60 0.578947 \n",
"\n",
"[61 rows x 5 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"calls_sms_features(df_calls, df_sms)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "straw2analysis",
"language": "python",
"name": "straw2analysis"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -40,6 +40,9 @@ print(df_calls)
# %% # %%
count_comms(df_calls) count_comms(df_calls)
# %%
enumerate_contacts(df_calls)
# %% # %%
df_sms = get_sms_data(["nokia_0000003"]) df_sms = get_sms_data(["nokia_0000003"])
count_comms(df_sms) count_comms(df_sms)