diff --git a/dev/developers/testing/index.html b/dev/developers/testing/index.html
index 42992185..1b34bf9c 100644
--- a/dev/developers/testing/index.html
+++ b/dev/developers/testing/index.html
@@ -1563,13 +1563,6 @@
     How do we execute the tests?
   </a>
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#what-cases-do-we-test" class="md-nav__link">
-    What cases do we test?
-  </a>
-  
 </li>
       
     </ul>
@@ -1764,13 +1757,6 @@
     How do we execute the tests?
   </a>
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#what-cases-do-we-test" class="md-nav__link">
-    What cases do we test?
-  </a>
-  
 </li>
       
     </ul>
@@ -2014,1227 +2000,6 @@ FAIL
 </code></pre></div>
 </td></tr></table>
 </details>
-<h2 id="what-cases-do-we-test">What cases do we test?<a class="headerlink" href="#what-cases-do-we-test" title="Permanent link">&para;</a></h2>
-<p>The sample data includes 7 tests cases. Take phone battery as an example, on this platform, battery status 2 represents <code>charging</code> and battery status 4 represents <code>discharge</code>. </p>
-<details><summary>1. A daily segment instance with no battery episodes</summary><details><summary>Example</summary><p>Input time segments:</p>
-<table>
-<thead>
-<tr>
-<th>timestamp</th>
-<th>device_id</th>
-<th>battery_status</th>
-<th>battery_level</th>
-<th>battery_scale</th>
-<th>battery_voltage</th>
-<th>battery_temperature</th>
-<th>battery_adaptor</th>
-<th>battery_health</th>
-<th>battery_technology</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>00:08:10.415</td>
-<td>per_ios</td>
-<td>4</td>
-<td>80</td>
-<td>100</td>
-<td>4170</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>00:17:38.602</td>
-<td>per_ios</td>
-<td>4</td>
-<td>77</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>03:20:30.415</td>
-<td>per_ios</td>
-<td>2</td>
-<td>77</td>
-<td>100</td>
-<td>4170</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>03:30:35.875</td>
-<td>per_ios</td>
-<td>2</td>
-<td>80</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-</tbody>
-</table>
-<p>Output results</p>
-<table>
-<thead>
-<tr>
-<th>local_segment</th>
-<th>local_segment_label</th>
-<th>local_segment_start_datetime</th>
-<th>local_segment_end_datetime</th>
-<th>phone_battery_rapids_countdischarge</th>
-<th>phone_battery_rapids_sumdurationdischarge</th>
-<th>phone_battery_rapids_avgconsumptionrate</th>
-<th>phone_battery_rapids_maxconsumptionrate</th>
-<th>phone_battery_rapids_countcharge</th>
-<th>phone_battery_rapids_sumdurationcharge</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>00:00:00,00:29:59</td>
-<td>thirtyminutes0000</td>
-<td>2020-07-01 00:00:00</td>
-<td>2020-07-01 00:29:59</td>
-<td>1</td>
-<td>21.8259833333333</td>
-<td>0.137450851775292</td>
-<td>0.137450851775292</td>
-<td>0</td>
-<td>0</td>
-</tr>
-<tr>
-<td>00:03:00,03:29:59</td>
-<td>thirtyminutes0006</td>
-<td>2020-07-01 03:00:00</td>
-<td>2020-07-01 03:29:59</td>
-<td>0</td>
-<td>0</td>
-<td>0</td>
-<td>0</td>
-<td>1</td>
-<td>9.49288333333333</td>
-</tr>
-</tbody>
-</table>
-<p>Since there is no battery episode between 00:00:30 and 03:00:00, no result will be generated for this epoch.</p>
-</details>
-</details>
-<details><summary>2. A daily segment instance with two battery episodes (one charging, one discharge)</summary><details><summary>Periodic (daily)</summary><p>Input time segments:</p>
-<table>
-<thead>
-<tr>
-<th>timestamp</th>
-<th>device_id</th>
-<th>battery_status</th>
-<th>battery_level</th>
-<th>battery_scale</th>
-<th>battery_voltage</th>
-<th>battery_temperature</th>
-<th>battery_adaptor</th>
-<th>battery_health</th>
-<th>battery_technology</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>17:59:41.434</td>
-<td>per_ios</td>
-<td>4</td>
-<td>59</td>
-<td>100</td>
-<td>4094</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>18:04:14.321</td>
-<td>per_ios</td>
-<td>4</td>
-<td>58</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>18:07:24.456</td>
-<td>per_ios</td>
-<td>4</td>
-<td>57</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>20:03:03.415</td>
-<td>per_ios</td>
-<td>2</td>
-<td>72</td>
-<td>100</td>
-<td>4170</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>20:05:12.434</td>
-<td>per_ios</td>
-<td>2</td>
-<td>73</td>
-<td>100</td>
-<td>4094</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>20:07:24.678</td>
-<td>per_ios</td>
-<td>2</td>
-<td>74</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>20:10:34.875</td>
-<td>per_ios</td>
-<td>2</td>
-<td>75</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>21:30:04.415</td>
-<td>per_ios</td>
-<td>4</td>
-<td>74</td>
-<td>100</td>
-<td>4170</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>21:32:14.434</td>
-<td>per_ios</td>
-<td>4</td>
-<td>73</td>
-<td>100</td>
-<td>4094</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>21:35:23.678</td>
-<td>per_ios</td>
-<td>4</td>
-<td>72</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>21:37:47.875</td>
-<td>per_ios</td>
-<td>4</td>
-<td>71</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-</tbody>
-</table>
-<p>Output results:</p>
-<table>
-<thead>
-<tr>
-<th>local_segment</th>
-<th>local_segment_label</th>
-<th>local_segment_start_datetime</th>
-<th>local_segment_end_datetime</th>
-<th>phone_battery_rapids_countdischarge</th>
-<th>phone_battery_rapids_sumdurationdischarge</th>
-<th>phone_battery_rapids_avgconsumptionrate</th>
-<th>phone_battery_rapids_maxconsumptionrate</th>
-<th>phone_battery_rapids_countcharge</th>
-<th>phone_battery_rapids_sumdurationcharge</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>18:00:00,23:59:59</td>
-<td>evening</td>
-<td>2020-07-01 18:00:00</td>
-<td>2020-07-01 23:59:59</td>
-<td>2</td>
-<td>75.1306166666666</td>
-<td>0.0664958369201784</td>
-<td>0.079525673538274</td>
-<td>1</td>
-<td>37.5236666666667</td>
-</tr>
-</tbody>
-</table>
-</details>
-<details><summary>Frequency (30 mins)</summary><p>Input time segments:</p>
-<table>
-<thead>
-<tr>
-<th>timestamp</th>
-<th>device_id</th>
-<th>battery_status</th>
-<th>battery_level</th>
-<th>battery_scale</th>
-<th>battery_voltage</th>
-<th>battery_temperature</th>
-<th>battery_adaptor</th>
-<th>battery_health</th>
-<th>battery_technology</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>20:10:34.875</td>
-<td>fre_ios</td>
-<td>2</td>
-<td>75</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>20:20:17.171</td>
-<td>fre_ios</td>
-<td>4</td>
-<td>74</td>
-<td>100</td>
-<td>4170</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-</tbody>
-</table>
-<p>Output results</p>
-<table>
-<thead>
-<tr>
-<th>local_segment</th>
-<th>local_segment_label</th>
-<th>local_segment_start_datetime</th>
-<th>local_segment_end_datetime</th>
-<th>phone_battery_rapids_countdischarge</th>
-<th>phone_battery_rapids_sumdurationdischarge</th>
-<th>phone_battery_rapids_avgconsumptionrate</th>
-<th>phone_battery_rapids_maxconsumptionrate</th>
-<th>phone_battery_rapids_countcharge</th>
-<th>phone_battery_rapids_sumdurationcharge</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>20:00:00,20:29:59</td>
-<td>thirtyminutes0040</td>
-<td>2020-07-01 20:00:00</td>
-<td>2020-07-01 20:29:59</td>
-<td>1</td>
-<td>14.6351666666667</td>
-<td>0.0683285693136395</td>
-<td>0.0683285693136395</td>
-<td>1</td>
-<td>12.3074</td>
-</tr>
-</tbody>
-</table>
-</details>
-</details>
-<details><summary>3. A daily segment instance with a charging episode that spans to the next daily instance</summary><details><summary>Periodic (daily)</summary><p>Input time segments:</p>
-<table>
-<thead>
-<tr>
-<th>timestamp</th>
-<th>device_id</th>
-<th>battery_status</th>
-<th>battery_level</th>
-<th>battery_scale</th>
-<th>battery_voltage</th>
-<th>battery_temperature</th>
-<th>battery_adaptor</th>
-<th>battery_health</th>
-<th>battery_technology</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>11:59:28.434</td>
-<td>per_ios</td>
-<td>2</td>
-<td>63</td>
-<td>100</td>
-<td>4094</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>12:04:37.678</td>
-<td>per_ios</td>
-<td>2</td>
-<td>64</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-</tbody>
-</table>
-</details>
-<details><summary>Frequency (30 mins)</summary><p>Input time segements:</p>
-<table>
-<thead>
-<tr>
-<th>timestamp</th>
-<th>device_id</th>
-<th>battery_status</th>
-<th>battery_level</th>
-<th>battery_scale</th>
-<th>battery_voltage</th>
-<th>battery_temperature</th>
-<th>battery_adaptor</th>
-<th>battery_health</th>
-<th>battery_technology</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>11:59:28.434</td>
-<td>fre_ios</td>
-<td>2</td>
-<td>63</td>
-<td>100</td>
-<td>4094</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>12:04:37.678</td>
-<td>fre_ios</td>
-<td>2</td>
-<td>64</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-</tbody>
-</table>
-</details>
-</details>
-<details><summary>4. A daily segment instance with a discharge episode that spans to the next daily instance</summary><details><summary>Periodic (daily)</summary><p>Input time segements:</p>
-<table>
-<thead>
-<tr>
-<th>timestamp</th>
-<th>device_id</th>
-<th>battery_status</th>
-<th>battery_level</th>
-<th>battery_scale</th>
-<th>battery_voltage</th>
-<th>battery_temperature</th>
-<th>battery_adaptor</th>
-<th>battery_health</th>
-<th>battery_technology</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>05:59:49.434</td>
-<td>per_ios</td>
-<td>4</td>
-<td>79</td>
-<td>100</td>
-<td>4094</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>06:02:19.321</td>
-<td>per_ios</td>
-<td>4</td>
-<td>78</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-</tbody>
-</table>
-</details>
-<details><summary>Frequency (30 mins)</summary><p>Input time segements:</p>
-<table>
-<thead>
-<tr>
-<th>timestamp</th>
-<th>device_id</th>
-<th>battery_status</th>
-<th>battery_level</th>
-<th>battery_scale</th>
-<th>battery_voltage</th>
-<th>battery_temperature</th>
-<th>battery_adaptor</th>
-<th>battery_health</th>
-<th>battery_technology</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>17:59:41.434</td>
-<td>fre_ios</td>
-<td>4</td>
-<td>59</td>
-<td>100</td>
-<td>4094</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>18:04:14.321</td>
-<td>fre_ios</td>
-<td>4</td>
-<td>58</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-</tbody>
-</table>
-</details>
-</details>
-<details><summary>5. Three-day segments that repeat everyday</summary><p><a href="../../setup/configuration/#time-segments">Time segment tested:</a></p>
-<table>
-<thead>
-<tr>
-<th>label</th>
-<th>start_time</th>
-<th>length</th>
-<th>repeats_on</th>
-<th>repeats_value</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>daily</td>
-<td>00:00:00</td>
-<td>23H 59M 59S</td>
-<td>every_day</td>
-<td>0</td>
-</tr>
-</tbody>
-</table>
-<p>Data tested:</p>
-<p>We test 14 segments, one at the beginning of the first day, one at the end of the last day</p>
-<table>
-<thead>
-<tr>
-<th>timestamp</th>
-<th>device_id</th>
-<th>battery_status</th>
-<th>battery_level</th>
-<th>battery_scale</th>
-<th>battery_voltage</th>
-<th>battery_temperature</th>
-<th>battery_adaptor</th>
-<th>battery_health</th>
-<th>battery_technology</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>2020-07-02 00:03:47.875</td>
-<td>per_and</td>
-<td>3</td>
-<td>63</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-02 00:05:47.875</td>
-<td>per_and</td>
-<td>3</td>
-<td>62</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-02 23:55:47.875</td>
-<td>per_and</td>
-<td>3</td>
-<td>55</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-02 23:59:47.875</td>
-<td>per_and</td>
-<td>3</td>
-<td>54</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-03 00:06:47.875</td>
-<td>per_and</td>
-<td>3</td>
-<td>53</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-03 00:09:47.875</td>
-<td>per_and</td>
-<td>3</td>
-<td>52</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-03 23:47:05.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>60</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-03 23:55:05.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>59</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-04 00:15:05.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>58</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-04 00:18:05.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>57</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-04 23:51:00.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>41</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-04 23:57:00.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>40</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-05 00:21:00.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>39</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-05 00:23:00.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>38</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-</tbody>
-</table>
-<p>Output results:</p>
-<table>
-<thead>
-<tr>
-<th>local_segment</th>
-<th>local_segment_label</th>
-<th>local_segment_start_datetime</th>
-<th>local_segment_end_datetime</th>
-<th>phone_battery_rapids_countdischarge</th>
-<th>phone_battery_rapids_sumdurationdischarge</th>
-<th>phone_battery_rapids_avgconsumptionrate</th>
-<th>phone_battery_rapids_maxconsumptionrate</th>
-<th>phone_battery_rapids_countcharge</th>
-<th>phone_battery_rapids_sumdurationcharge</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>threeday#2020-07-02 00:00:00,2020-07-04 23:59:59</td>
-<td>threeday</td>
-<td>2020-07-02 00:00:00</td>
-<td>2020-07-04 23:59:59</td>
-<td>4</td>
-<td>149.7954</td>
-<td>0.0710868450815781</td>
-<td>0.111113168762384</td>
-<td>0</td>
-<td>0</td>
-</tr>
-<tr>
-<td>threeday#2020-07-03 00:00:00,2020-07-05 23:59:59</td>
-<td>threeday</td>
-<td>2020-07-03 00:00:00</td>
-<td>2020-07-05 23:59:59</td>
-<td>3</td>
-<td>162.7952</td>
-<td>0.0492745931499224</td>
-<td>0.0502547286558745</td>
-<td>0</td>
-<td>0</td>
-</tr>
-<tr>
-<td>threeday#2020-07-04 00:00:00,2020-07-06 23:59:59</td>
-<td>threeday</td>
-<td>2020-07-04 00:00:00</td>
-<td>2020-07-06 23:59:59</td>
-<td>2</td>
-<td>110.0815</td>
-<td>0.0449915246814979</td>
-<td>0.0483879032392475</td>
-<td>0</td>
-<td>0</td>
-</tr>
-<tr>
-<td>threeday#2020-07-05 00:00:00,2020-07-07 23:59:59</td>
-<td>threeday</td>
-<td>2020-07-05 00:00:00</td>
-<td>2020-07-07 23:59:59</td>
-<td>1</td>
-<td>52.9991166666667</td>
-<td>0.0377364779979038</td>
-<td>0.0377364779979038</td>
-<td>0</td>
-<td>0</td>
-</tr>
-</tbody>
-</table>
-</details>
-<details><summary>6. A three-day segment that repeats on a fixed day</summary><p><a href="../../setup/configuration/#time-segments">Time segment tested:</a></p>
-<table>
-<thead>
-<tr>
-<th>label</th>
-<th>start_time</th>
-<th>length</th>
-<th>repeats_on</th>
-<th>repeats_value</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>weekends</td>
-<td>00:00:00</td>
-<td>2D 23H 59M 59S</td>
-<td>wday</td>
-<td>5</td>
-</tr>
-</tbody>
-</table>
-<p>Data tested:</p>
-<p>We test 10 segments, one at the beginning of the first day, one at the end of the last day</p>
-<table>
-<thead>
-<tr>
-<th>timestamp</th>
-<th>device_id</th>
-<th>battery_status</th>
-<th>battery_level</th>
-<th>battery_scale</th>
-<th>battery_voltage</th>
-<th>battery_temperature</th>
-<th>battery_adaptor</th>
-<th>battery_health</th>
-<th>battery_technology</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>2020-07-03 00:06:47.875</td>
-<td>per_and</td>
-<td>3</td>
-<td>53</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-03 00:09:47.875</td>
-<td>per_and</td>
-<td>3</td>
-<td>52</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-03 23:47:05.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>60</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-03 23:55:05.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>59</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-04 00:15:05.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>58</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-04 00:18:05.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>57</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-04 23:51:00.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>41</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-04 23:57:00.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>40</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-05 00:21:00.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>39</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-07-05 00:23:00.000</td>
-<td>per_and</td>
-<td>3</td>
-<td>38</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-</tbody>
-</table>
-<p>Output results:</p>
-<table>
-<thead>
-<tr>
-<th>local_segment</th>
-<th>local_segment_label</th>
-<th>local_segment_start_datetime</th>
-<th>local_segment_end_datetime</th>
-<th>phone_battery_rapids_countdischarge</th>
-<th>phone_battery_rapids_sumdurationdischarge</th>
-<th>phone_battery_rapids_avgconsumptionrate</th>
-<th>phone_battery_rapids_maxconsumptionrate</th>
-<th>phone_battery_rapids_countcharge</th>
-<th>phone_battery_rapids_sumdurationcharge</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>weekends#2020-07-03 00:00:00,2020-07-05 23:59:59</td>
-<td>weekends</td>
-<td>2020-07-03 00:00:00</td>
-<td>2020-07-05 23:59:59</td>
-<td>3</td>
-<td>162.7952</td>
-<td>0.0492745931499224</td>
-<td>0.0502547286558745</td>
-<td>0</td>
-<td>0</td>
-</tr>
-</tbody>
-</table>
-</details>
-<details><summary>7. Event segements</summary><p><a href="../../setup/configuration/#time-segments">Time segments tested:</a></p>
-<table>
-<thead>
-<tr>
-<th>label</th>
-<th>event_timestamp</th>
-<th>length</th>
-<th>shift</th>
-<th>shift_direction</th>
-<th>device_id</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>survey1</td>
-<td>1587661220000</td>
-<td>10H</td>
-<td>10H</td>
-<td>-1</td>
-<td>a748ee1a-1d0b-4ae9-9074-279a2b6ba524</td>
-</tr>
-<tr>
-<td>survey2</td>
-<td>1587661220000</td>
-<td>10H</td>
-<td>5H</td>
-<td>-1</td>
-<td>a748ee1a-1d0b-4ae9-9074-279a2b6ba524</td>
-</tr>
-<tr>
-<td>survey3</td>
-<td>1587661220000</td>
-<td>10H</td>
-<td>0H</td>
-<td>1</td>
-<td>a748ee1a-1d0b-4ae9-9074-279a2b6ba524</td>
-</tr>
-</tbody>
-</table>
-<p>Data tested: </p>
-<p>We test 7 segments, one at the beginning of the first day, one at the end of the last day</p>
-<table>
-<thead>
-<tr>
-<th>timestamp</th>
-<th>device_id</th>
-<th>battery_status</th>
-<th>battery_level</th>
-<th>battery_scale</th>
-<th>battery_voltage</th>
-<th>battery_temperature</th>
-<th>battery_adaptor</th>
-<th>battery_health</th>
-<th>battery_technology</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>2020-04-23 03:15:00.000</td>
-<td>a748ee1a-1d0b-4ae9-9074-279a2b6ba524</td>
-<td>3</td>
-<td>90</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-04-23 03:21:00.000</td>
-<td>a748ee1a-1d0b-4ae9-9074-279a2b6ba524</td>
-<td>3</td>
-<td>89</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-04-23 07:50:00.000</td>
-<td>a748ee1a-1d0b-4ae9-9074-279a2b6ba524</td>
-<td>3</td>
-<td>80</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-04-23 08:05:00.000</td>
-<td>a748ee1a-1d0b-4ae9-9074-279a2b6ba524</td>
-<td>3</td>
-<td>79</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-04-23 08:12:00.000</td>
-<td>a748ee1a-1d0b-4ae9-9074-279a2b6ba524</td>
-<td>3</td>
-<td>78</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-04-23 22:50:00.000</td>
-<td>a748ee1a-1d0b-4ae9-9074-279a2b6ba524</td>
-<td>3</td>
-<td>50</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-<tr>
-<td>2020-04-23 22:53:00.000</td>
-<td>a748ee1a-1d0b-4ae9-9074-279a2b6ba524</td>
-<td>3</td>
-<td>49</td>
-<td>100</td>
-<td>4157</td>
-<td>23</td>
-<td>0</td>
-<td>2</td>
-<td>Li-ion</td>
-</tr>
-</tbody>
-</table>
-<p>Output results:</p>
-<table>
-<thead>
-<tr>
-<th>local_segment</th>
-<th>local_segment_label</th>
-<th>local_segment_start_datetime</th>
-<th>local_segment_end_datetime</th>
-<th>phone_battery_rapids_sumdurationcharge</th>
-<th>phone_battery_rapids_countdischarge</th>
-<th>phone_battery_rapids_sumdurationdischarge</th>
-<th>phone_battery_rapids_maxconsumptionrate</th>
-<th>phone_battery_rapids_avgconsumptionrate</th>
-<th>phone_battery_rapids_countcharge</th>
-</tr>
-</thead>
-<tbody>
-<tr>
-<td>survey1#2020-04-23 03:00:20,2020-04-23 13:00:20</td>
-<td>survey1</td>
-<td>2020-04-23 03:00:20</td>
-<td>2020-04-23 13:00:20</td>
-<td>0</td>
-<td>2</td>
-<td>87.9985333333333</td>
-<td>0.0384621794978634</td>
-<td>0.0331202101231602</td>
-<td>0</td>
-</tr>
-<tr>
-<td>survey2#2020-04-23 08:00:20,2020-04-23 18:00:20</td>
-<td>survey2</td>
-<td>2020-04-23 08:00:20</td>
-<td>2020-04-23 18:00:20</td>
-<td>0</td>
-<td>1</td>
-<td>41.6659833333333</td>
-<td>0.0480007872129103</td>
-<td>0.0480007872129103</td>
-<td>0</td>
-</tr>
-<tr>
-<td>survey3#2020-04-23 13:00:20,2020-04-23 23:00:20</td>
-<td>survey3</td>
-<td>2020-04-23 13:00:20</td>
-<td>2020-04-23 23:00:20</td>
-<td>0</td>
-<td>1</td>
-<td>10.3498</td>
-<td>0.0966202245454018</td>
-<td>0.0966202245454018</td>
-<td>0</td>
-</tr>
-</tbody>
-</table>
-</details>
                 
               
               
diff --git a/dev/search/search_index.json b/dev/search/search_index.json
index a7659055..9d5ef7c7 100644
--- a/dev/search/search_index.json
+++ b/dev/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Welcome to RAPIDS documentation \u00b6 Reproducible Analysis Pipeline for Data Streams (RAPIDS) allows you to process smartphone and wearable data to extract and create behavioral features (a.k.a. digital biomarkers), visualize mobile sensor data, and structure your analysis into reproducible workflows. RAPIDS is open source, documented, modular, tested, and reproducible. At the moment, we support data streams logged by smartphones, Fitbit wearables, and, in collaboration with the DBDP , Empatica wearables (but you can add your own too). If you want to know more head over to Overview Tip Questions or feedback can be posted on the #rapids channel in AWARE Framework's slack . Bugs and feature requests should be posted on Github . Join our discussions on our algorithms and assumptions for feature processing . Are you upgrading from RAPIDS 0.4.x or older? Follow this guide Ready? Go to Overview . What are the benefits of using RAPIDS? \u00b6 Consistent analysis . Every participant sensor dataset is analyzed in the same way and isolated from each other. Efficient analysis . Every analysis step is executed only once. Whenever your data or configuration changes, only the affected files are updated. Parallel execution . Thanks to Snakemake, your analysis can be executed over multiple cores without changing your code. Code-free features . Extract any of the behavioral features offered by RAPIDS without writing any code. Extensible code . You can easily add your own data streams or behavioral features in R or Python, share them with the community, and keep authorship and citations. Timezone aware . Your data is adjusted to one or more time zones per participant. Flexible time segments . You can extract behavioral features on time windows of any length (e.g., 5 minutes, 3 hours, 2 days), on every day or particular days (e.g., weekends, Mondays, the 1 st of each month, etc.), or around events of interest (e.g., surveys or clinical relapses). Tested code . We are continually adding tests to make sure our behavioral features are correct. Reproducible code . If you structure your analysis within RAPIDS, you can be sure your code will run in other computers as intended, thanks to R and Python virtual environments. You can share your analysis code along with your publications without any overhead. Private . All your data is processed locally.","title":"Home"},{"location":"#welcome-to-rapids-documentation","text":"Reproducible Analysis Pipeline for Data Streams (RAPIDS) allows you to process smartphone and wearable data to extract and create behavioral features (a.k.a. digital biomarkers), visualize mobile sensor data, and structure your analysis into reproducible workflows. RAPIDS is open source, documented, modular, tested, and reproducible. At the moment, we support data streams logged by smartphones, Fitbit wearables, and, in collaboration with the DBDP , Empatica wearables (but you can add your own too). If you want to know more head over to Overview Tip Questions or feedback can be posted on the #rapids channel in AWARE Framework's slack . Bugs and feature requests should be posted on Github . Join our discussions on our algorithms and assumptions for feature processing . Are you upgrading from RAPIDS 0.4.x or older? Follow this guide Ready? Go to Overview .","title":"Welcome to RAPIDS documentation"},{"location":"#what-are-the-benefits-of-using-rapids","text":"Consistent analysis . Every participant sensor dataset is analyzed in the same way and isolated from each other. Efficient analysis . Every analysis step is executed only once. Whenever your data or configuration changes, only the affected files are updated. Parallel execution . Thanks to Snakemake, your analysis can be executed over multiple cores without changing your code. Code-free features . Extract any of the behavioral features offered by RAPIDS without writing any code. Extensible code . You can easily add your own data streams or behavioral features in R or Python, share them with the community, and keep authorship and citations. Timezone aware . Your data is adjusted to one or more time zones per participant. Flexible time segments . You can extract behavioral features on time windows of any length (e.g., 5 minutes, 3 hours, 2 days), on every day or particular days (e.g., weekends, Mondays, the 1 st of each month, etc.), or around events of interest (e.g., surveys or clinical relapses). Tested code . We are continually adding tests to make sure our behavioral features are correct. Reproducible code . If you structure your analysis within RAPIDS, you can be sure your code will run in other computers as intended, thanks to R and Python virtual environments. You can share your analysis code along with your publications without any overhead. Private . All your data is processed locally.","title":"What are the benefits of using RAPIDS?"},{"location":"change-log/","text":"Change Log \u00b6 v1.0.1 \u00b6 Fix crash in chunk_episodes of utils.py for multi time zone data Fix crash in BT Doryab provider when the number of clusters is 2 Fix Fitbit multi time zone inference from phone data (simplify) Fix missing columns when the input for phone data yield is empty Fix wrong date time labels for event segments for multi time zone data (all labels are computed based on a single tz) Fix periodic segment crash when there are no segments to assign (only affects wday, mday, qday, or yday) Fix crash in Analysis Workflow with new suffix in segments\u2019 labels v1.0.0 \u00b6 Add a new Overview page. You can extend RAPIDS with your own data streams . Data streams are data collected with other sensing apps besides AWARE (like Beiwe, mindLAMP), and stored in other data containers (databases, files) besides MySQL. Support to analyze Empatica wearable data (thanks to Joe Kim and Brinnae Bent from the DBDP ) Support to analyze AWARE data stored in CSV files and InfluxDB databases Support to analyze data collected over multiple time zones Support for sleep intraday features from the core team and also from the community (thanks to Stephen Price) Users can comment on the documentation (powered by utterances). SCR_SCRIPT and SRC_LANGUAGE are replaced by SRC_SCRIPT . Add RAPIDS new logo Move Citation and Minimal Example page to the Setup section Add config.yaml validation schema and documentation. Now it\u2019s more difficult to modify the config.yaml file with invalid values. Add new time at home Doryab location feature Add and home coordinates to the location data file so location providers can build features based on it. If you are migrating from RAPIDS 0.4.3 or older, check this guide v0.4.3 \u00b6 Fix bug when any of the rows from any sensor do not belong a time segment v0.4.2 \u00b6 Update battery testing Fix location processing bug when certain columns don\u2019t exist Fix HR intraday bug when minutesonZONE features were 0 Update FAQs Fix HR summary bug when restinghr=0 (ignore those rows) Fix ROG, location entropy and normalized entropy in Doryab location provider Remove sampling frequency dependance in Doryab location provider Update documentation of Doryab location provider Add new FITBIT_DATA_YIELD RAPIDS provider Deprecate Doryab circadian movement feature until it is fixed v0.4.1 \u00b6 Fix bug when no error message was displayed for an empty [PHONE_DATA_YIELD][SENSORS] when resampling location data v0.4.0 \u00b6 Add four new phone sensors that can be used for PHONE_DATA_YIELD Add code so new feature providers can be added for the new four sensors Add new clustering algorithm (OPTICS) for Doryab features Update default EPS parameter for Doryab location clustering Add clearer error message for invalid phone data yield sensors Add ALL_RESAMPLED flag and accuracy limit for location features Add FAQ about null characters in phone tables Reactivate light and wifi tests and update testing docs Fix bug when parsing Fitbit steps data Fix bugs when merging features from empty time segments Fix minor issues in the documentation v0.3.2 \u00b6 Update docker and linux instructions to use RSPM binary repo for for faster installation Update CI to create a release on a tagged push that passes the tests Clarify in DB credential configuration that we only support MySQL Add Windows installation instructions Fix bugs in the create_participants_file script Fix bugs in Fitbit data parsing. Fixed Doryab location features context of clustering. Fixed the wrong shifting while calculating distance in Doryab location features. Refactored the haversine function v0.3.1 \u00b6 Update installation docs for RAPIDS\u2019 docker container Fix example analysis use of accelerometer data in a plot Update FAQ Update minimal example documentation Minor doc updates v0.3.0 \u00b6 Update R and Python virtual environments Add GH actions CI support for tests and docker Add release and test badges to README v0.2.6 \u00b6 Fix old versions banner on nested pages v0.2.5 \u00b6 Fix docs deploy typo v0.2.4 \u00b6 Fix broken links in landing page and docs deploy v0.2.3 \u00b6 Fix participant IDS in the example analysis workflow v0.2.2 \u00b6 Fix readme link to docs v0.2.1 \u00b6 FIx link to the most recent version in the old version banner v0.2.0 \u00b6 Add new PHONE_BLUETOOTH DORYAB provider Deprecate PHONE_BLUETOOTH RAPIDS provider Fix bug in filter_data_by_segment for Python when dataset was empty Minor doc updates New FAQ item v0.1.0 \u00b6 New and more consistent docs (this website). The previous docs are marked as beta Consolidate configuration instructions Flexible time segments Simplify Fitbit behavioral feature extraction and documentation Sensor\u2019s configuration and output is more consistent Update visualizations to handle flexible day segments Create a RAPIDS execution script that allows re-computation of the pipeline after configuration changes Add citation guide Update virtual environment guide Update analysis workflow example Add a Code of Conduct Update Team page","title":"Change Log"},{"location":"change-log/#change-log","text":"","title":"Change Log"},{"location":"change-log/#v101","text":"Fix crash in chunk_episodes of utils.py for multi time zone data Fix crash in BT Doryab provider when the number of clusters is 2 Fix Fitbit multi time zone inference from phone data (simplify) Fix missing columns when the input for phone data yield is empty Fix wrong date time labels for event segments for multi time zone data (all labels are computed based on a single tz) Fix periodic segment crash when there are no segments to assign (only affects wday, mday, qday, or yday) Fix crash in Analysis Workflow with new suffix in segments\u2019 labels","title":"v1.0.1"},{"location":"change-log/#v100","text":"Add a new Overview page. You can extend RAPIDS with your own data streams . Data streams are data collected with other sensing apps besides AWARE (like Beiwe, mindLAMP), and stored in other data containers (databases, files) besides MySQL. Support to analyze Empatica wearable data (thanks to Joe Kim and Brinnae Bent from the DBDP ) Support to analyze AWARE data stored in CSV files and InfluxDB databases Support to analyze data collected over multiple time zones Support for sleep intraday features from the core team and also from the community (thanks to Stephen Price) Users can comment on the documentation (powered by utterances). SCR_SCRIPT and SRC_LANGUAGE are replaced by SRC_SCRIPT . Add RAPIDS new logo Move Citation and Minimal Example page to the Setup section Add config.yaml validation schema and documentation. Now it\u2019s more difficult to modify the config.yaml file with invalid values. Add new time at home Doryab location feature Add and home coordinates to the location data file so location providers can build features based on it. If you are migrating from RAPIDS 0.4.3 or older, check this guide","title":"v1.0.0"},{"location":"change-log/#v043","text":"Fix bug when any of the rows from any sensor do not belong a time segment","title":"v0.4.3"},{"location":"change-log/#v042","text":"Update battery testing Fix location processing bug when certain columns don\u2019t exist Fix HR intraday bug when minutesonZONE features were 0 Update FAQs Fix HR summary bug when restinghr=0 (ignore those rows) Fix ROG, location entropy and normalized entropy in Doryab location provider Remove sampling frequency dependance in Doryab location provider Update documentation of Doryab location provider Add new FITBIT_DATA_YIELD RAPIDS provider Deprecate Doryab circadian movement feature until it is fixed","title":"v0.4.2"},{"location":"change-log/#v041","text":"Fix bug when no error message was displayed for an empty [PHONE_DATA_YIELD][SENSORS] when resampling location data","title":"v0.4.1"},{"location":"change-log/#v040","text":"Add four new phone sensors that can be used for PHONE_DATA_YIELD Add code so new feature providers can be added for the new four sensors Add new clustering algorithm (OPTICS) for Doryab features Update default EPS parameter for Doryab location clustering Add clearer error message for invalid phone data yield sensors Add ALL_RESAMPLED flag and accuracy limit for location features Add FAQ about null characters in phone tables Reactivate light and wifi tests and update testing docs Fix bug when parsing Fitbit steps data Fix bugs when merging features from empty time segments Fix minor issues in the documentation","title":"v0.4.0"},{"location":"change-log/#v032","text":"Update docker and linux instructions to use RSPM binary repo for for faster installation Update CI to create a release on a tagged push that passes the tests Clarify in DB credential configuration that we only support MySQL Add Windows installation instructions Fix bugs in the create_participants_file script Fix bugs in Fitbit data parsing. Fixed Doryab location features context of clustering. Fixed the wrong shifting while calculating distance in Doryab location features. Refactored the haversine function","title":"v0.3.2"},{"location":"change-log/#v031","text":"Update installation docs for RAPIDS\u2019 docker container Fix example analysis use of accelerometer data in a plot Update FAQ Update minimal example documentation Minor doc updates","title":"v0.3.1"},{"location":"change-log/#v030","text":"Update R and Python virtual environments Add GH actions CI support for tests and docker Add release and test badges to README","title":"v0.3.0"},{"location":"change-log/#v026","text":"Fix old versions banner on nested pages","title":"v0.2.6"},{"location":"change-log/#v025","text":"Fix docs deploy typo","title":"v0.2.5"},{"location":"change-log/#v024","text":"Fix broken links in landing page and docs deploy","title":"v0.2.4"},{"location":"change-log/#v023","text":"Fix participant IDS in the example analysis workflow","title":"v0.2.3"},{"location":"change-log/#v022","text":"Fix readme link to docs","title":"v0.2.2"},{"location":"change-log/#v021","text":"FIx link to the most recent version in the old version banner","title":"v0.2.1"},{"location":"change-log/#v020","text":"Add new PHONE_BLUETOOTH DORYAB provider Deprecate PHONE_BLUETOOTH RAPIDS provider Fix bug in filter_data_by_segment for Python when dataset was empty Minor doc updates New FAQ item","title":"v0.2.0"},{"location":"change-log/#v010","text":"New and more consistent docs (this website). The previous docs are marked as beta Consolidate configuration instructions Flexible time segments Simplify Fitbit behavioral feature extraction and documentation Sensor\u2019s configuration and output is more consistent Update visualizations to handle flexible day segments Create a RAPIDS execution script that allows re-computation of the pipeline after configuration changes Add citation guide Update virtual environment guide Update analysis workflow example Add a Code of Conduct Update Team page","title":"v0.1.0"},{"location":"citation/","text":"Cite RAPIDS and providers \u00b6 RAPIDS and the community RAPIDS is a community effort and as such we want to continue recognizing the contributions from other researchers. Besides citing RAPIDS, we ask you to cite any of the authors listed below if you used those sensor providers in your analysis, thank you! RAPIDS \u00b6 If you used RAPIDS, please cite this paper . RAPIDS et al. citation Vega J, Li M, Aguillera K, Goel N, Joshi E, Durica KC, Kunta AR, Low CA RAPIDS: Reproducible Analysis Pipeline for Data Streams Collected with Mobile Devices JMIR Preprints. 18/08/2020:23246 DOI: 10.2196/preprints.23246 URL: https://preprints.jmir.org/preprint/23246 DBDP (all Empatica sensors) \u00b6 If you computed features using the provider [DBDP] of any of the Empatica sensors (accelerometer, heart rate, temperature, EDA, BVP, IBI, tags) cite this paper in addition to RAPIDS. Bent et al. citation Bent, B., Wang, K., Grzesiak, E., Jiang, C., Qi, Y., Jiang, Y., Cho, P., Zingler, K., Ogbeide, F.I., Zhao, A., Runge, R., Sim, I., Dunn, J. (2020). The Digital Biomarker Discovery Pipeline: An open source software platform for the development of digital biomarkers using mHealth and wearables data. Journal of Clinical and Translational Science, 1-28. doi:10.1017/cts.2020.511 Panda (accelerometer) \u00b6 If you computed accelerometer features using the provider [PHONE_ACCLEROMETER][PANDA] cite this paper in addition to RAPIDS. Panda et al. citation Panda N, Solsky I, Huang EJ, Lipsitz S, Pradarelli JC, Delisle M, Cusack JC, Gadd MA, Lubitz CC, Mullen JT, Qadan M, Smith BL, Specht M, Stephen AE, Tanabe KK, Gawande AA, Onnela JP, Haynes AB. Using Smartphones to Capture Novel Recovery Metrics After Cancer Surgery. JAMA Surg. 2020 Feb 1;155(2):123-129. doi: 10.1001/jamasurg.2019.4702. PMID: 31657854; PMCID: PMC6820047. Stachl (applications foreground) \u00b6 If you computed applications foreground features using the app category (genre) catalogue in [PHONE_APPLICATIONS_FOREGROUND][RAPIDS] cite this paper in addition to RAPIDS. Stachl et al. citation Clemens Stachl, Quay Au, Ramona Schoedel, Samuel D. Gosling, Gabriella M. Harari, Daniel Buschek, Sarah Theres V\u00f6lkel, Tobias Schuwerk, Michelle Oldemeier, Theresa Ullmann, Heinrich Hussmann, Bernd Bischl, Markus B\u00fchner. Proceedings of the National Academy of Sciences Jul 2020, 117 (30) 17680-17687; DOI: 10.1073/pnas.1920484117 Doryab (bluetooth) \u00b6 If you computed bluetooth features using the provider [PHONE_BLUETOOTH][DORYAB] cite this paper in addition to RAPIDS. Doryab et al. citation Doryab, A., Chikarsel, P., Liu, X., & Dey, A. K. (2019). Extraction of Behavioral Features from Smartphone and Wearable Data. ArXiv:1812.10394 [Cs, Stat]. http://arxiv.org/abs/1812.10394 Barnett (locations) \u00b6 If you computed locations features using the provider [PHONE_LOCATIONS][BARNETT] cite this paper and this paper in addition to RAPIDS. Barnett et al. citation Ian Barnett, Jukka-Pekka Onnela, Inferring mobility measures from GPS traces with missing data, Biostatistics, Volume 21, Issue 2, April 2020, Pages e98\u2013e112, https://doi.org/10.1093/biostatistics/kxy059 Canzian et al. citation Luca Canzian and Mirco Musolesi. 2015. Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis. In Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp \u201815). Association for Computing Machinery, New York, NY, USA, 1293\u20131304. DOI: https://doi.org/10.1145/2750858.2805845 Doryab (locations) \u00b6 If you computed locations features using the provider [PHONE_LOCATIONS][DORYAB] cite this paper and this paper in addition to RAPIDS. Doryab et al. citation Doryab, A., Chikarsel, P., Liu, X., & Dey, A. K. (2019). Extraction of Behavioral Features from Smartphone and Wearable Data. ArXiv:1812.10394 [Cs, Stat]. http://arxiv.org/abs/1812.10394 Canzian et al. citation Luca Canzian and Mirco Musolesi. 2015. Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis. In Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp \u201815). Association for Computing Machinery, New York, NY, USA, 1293\u20131304. DOI: https://doi.org/10.1145/2750858.2805845","title":"Citation"},{"location":"citation/#cite-rapids-and-providers","text":"RAPIDS and the community RAPIDS is a community effort and as such we want to continue recognizing the contributions from other researchers. Besides citing RAPIDS, we ask you to cite any of the authors listed below if you used those sensor providers in your analysis, thank you!","title":"Cite RAPIDS and providers"},{"location":"citation/#rapids","text":"If you used RAPIDS, please cite this paper . RAPIDS et al. citation Vega J, Li M, Aguillera K, Goel N, Joshi E, Durica KC, Kunta AR, Low CA RAPIDS: Reproducible Analysis Pipeline for Data Streams Collected with Mobile Devices JMIR Preprints. 18/08/2020:23246 DOI: 10.2196/preprints.23246 URL: https://preprints.jmir.org/preprint/23246","title":"RAPIDS"},{"location":"citation/#dbdp-all-empatica-sensors","text":"If you computed features using the provider [DBDP] of any of the Empatica sensors (accelerometer, heart rate, temperature, EDA, BVP, IBI, tags) cite this paper in addition to RAPIDS. Bent et al. citation Bent, B., Wang, K., Grzesiak, E., Jiang, C., Qi, Y., Jiang, Y., Cho, P., Zingler, K., Ogbeide, F.I., Zhao, A., Runge, R., Sim, I., Dunn, J. (2020). The Digital Biomarker Discovery Pipeline: An open source software platform for the development of digital biomarkers using mHealth and wearables data. Journal of Clinical and Translational Science, 1-28. doi:10.1017/cts.2020.511","title":"DBDP (all Empatica sensors)"},{"location":"citation/#panda-accelerometer","text":"If you computed accelerometer features using the provider [PHONE_ACCLEROMETER][PANDA] cite this paper in addition to RAPIDS. Panda et al. citation Panda N, Solsky I, Huang EJ, Lipsitz S, Pradarelli JC, Delisle M, Cusack JC, Gadd MA, Lubitz CC, Mullen JT, Qadan M, Smith BL, Specht M, Stephen AE, Tanabe KK, Gawande AA, Onnela JP, Haynes AB. Using Smartphones to Capture Novel Recovery Metrics After Cancer Surgery. JAMA Surg. 2020 Feb 1;155(2):123-129. doi: 10.1001/jamasurg.2019.4702. PMID: 31657854; PMCID: PMC6820047.","title":"Panda (accelerometer)"},{"location":"citation/#stachl-applications-foreground","text":"If you computed applications foreground features using the app category (genre) catalogue in [PHONE_APPLICATIONS_FOREGROUND][RAPIDS] cite this paper in addition to RAPIDS. Stachl et al. citation Clemens Stachl, Quay Au, Ramona Schoedel, Samuel D. Gosling, Gabriella M. Harari, Daniel Buschek, Sarah Theres V\u00f6lkel, Tobias Schuwerk, Michelle Oldemeier, Theresa Ullmann, Heinrich Hussmann, Bernd Bischl, Markus B\u00fchner. Proceedings of the National Academy of Sciences Jul 2020, 117 (30) 17680-17687; DOI: 10.1073/pnas.1920484117","title":"Stachl (applications foreground)"},{"location":"citation/#doryab-bluetooth","text":"If you computed bluetooth features using the provider [PHONE_BLUETOOTH][DORYAB] cite this paper in addition to RAPIDS. Doryab et al. citation Doryab, A., Chikarsel, P., Liu, X., & Dey, A. K. (2019). Extraction of Behavioral Features from Smartphone and Wearable Data. ArXiv:1812.10394 [Cs, Stat]. http://arxiv.org/abs/1812.10394","title":"Doryab (bluetooth)"},{"location":"citation/#barnett-locations","text":"If you computed locations features using the provider [PHONE_LOCATIONS][BARNETT] cite this paper and this paper in addition to RAPIDS. Barnett et al. citation Ian Barnett, Jukka-Pekka Onnela, Inferring mobility measures from GPS traces with missing data, Biostatistics, Volume 21, Issue 2, April 2020, Pages e98\u2013e112, https://doi.org/10.1093/biostatistics/kxy059 Canzian et al. citation Luca Canzian and Mirco Musolesi. 2015. Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis. In Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp \u201815). Association for Computing Machinery, New York, NY, USA, 1293\u20131304. DOI: https://doi.org/10.1145/2750858.2805845","title":"Barnett (locations)"},{"location":"citation/#doryab-locations","text":"If you computed locations features using the provider [PHONE_LOCATIONS][DORYAB] cite this paper and this paper in addition to RAPIDS. Doryab et al. citation Doryab, A., Chikarsel, P., Liu, X., & Dey, A. K. (2019). Extraction of Behavioral Features from Smartphone and Wearable Data. ArXiv:1812.10394 [Cs, Stat]. http://arxiv.org/abs/1812.10394 Canzian et al. citation Luca Canzian and Mirco Musolesi. 2015. Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis. In Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp \u201815). Association for Computing Machinery, New York, NY, USA, 1293\u20131304. DOI: https://doi.org/10.1145/2750858.2805845","title":"Doryab (locations)"},{"location":"code_of_conduct/","text":"Contributor Covenant Code of Conduct \u00b6 Our Pledge \u00b6 We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. Our Standards \u00b6 Examples of behavior that contributes to a positive environment for our community include: Demonstrating empathy and kindness toward other people Being respectful of differing opinions, viewpoints, and experiences Giving and gracefully accepting constructive feedback Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: The use of sexualized language or imagery, and sexual attention or advances of any kind Trolling, insulting or derogatory comments, and personal or political attacks Public or private harassment Publishing others\u2019 private information, such as a physical or email address, without their explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting Enforcement Responsibilities \u00b6 Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. Scope \u00b6 This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Enforcement \u00b6 Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at moshi@pitt.edu . All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. Enforcement Guidelines \u00b6 Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: 1. Correction \u00b6 Community Impact : Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. Consequence : A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. 2. Warning \u00b6 Community Impact : A violation through a single incident or series of actions. Consequence : A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. 3. Temporary Ban \u00b6 Community Impact : A serious violation of community standards, including sustained inappropriate behavior. Consequence : A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. 4. Permanent Ban \u00b6 Community Impact : Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. Consequence : A permanent ban from any sort of public interaction within the community. Attribution \u00b6 This Code of Conduct is adapted from the Contributor Covenant , version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html . Community Impact Guidelines were inspired by Mozilla\u2019s code of conduct enforcement ladder . For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq . Translations are available at https://www.contributor-covenant.org/translations .","title":"Code of Conduct"},{"location":"code_of_conduct/#contributor-covenant-code-of-conduct","text":"","title":"Contributor Covenant Code of Conduct"},{"location":"code_of_conduct/#our-pledge","text":"We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.","title":"Our Pledge"},{"location":"code_of_conduct/#our-standards","text":"Examples of behavior that contributes to a positive environment for our community include: Demonstrating empathy and kindness toward other people Being respectful of differing opinions, viewpoints, and experiences Giving and gracefully accepting constructive feedback Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: The use of sexualized language or imagery, and sexual attention or advances of any kind Trolling, insulting or derogatory comments, and personal or political attacks Public or private harassment Publishing others\u2019 private information, such as a physical or email address, without their explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting","title":"Our Standards"},{"location":"code_of_conduct/#enforcement-responsibilities","text":"Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.","title":"Enforcement Responsibilities"},{"location":"code_of_conduct/#scope","text":"This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.","title":"Scope"},{"location":"code_of_conduct/#enforcement","text":"Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at moshi@pitt.edu . All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident.","title":"Enforcement"},{"location":"code_of_conduct/#enforcement-guidelines","text":"Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:","title":"Enforcement Guidelines"},{"location":"code_of_conduct/#1-correction","text":"Community Impact : Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. Consequence : A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.","title":"1. Correction"},{"location":"code_of_conduct/#2-warning","text":"Community Impact : A violation through a single incident or series of actions. Consequence : A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.","title":"2. Warning"},{"location":"code_of_conduct/#3-temporary-ban","text":"Community Impact : A serious violation of community standards, including sustained inappropriate behavior. Consequence : A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.","title":"3. Temporary Ban"},{"location":"code_of_conduct/#4-permanent-ban","text":"Community Impact : Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. Consequence : A permanent ban from any sort of public interaction within the community.","title":"4. Permanent Ban"},{"location":"code_of_conduct/#attribution","text":"This Code of Conduct is adapted from the Contributor Covenant , version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html . Community Impact Guidelines were inspired by Mozilla\u2019s code of conduct enforcement ladder . For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq . Translations are available at https://www.contributor-covenant.org/translations .","title":"Attribution"},{"location":"common-errors/","text":"Common Errors \u00b6 Cannot connect to your MySQL server \u00b6 Problem **Error in .local ( drv, \\. .. ) :** **Failed to connect to database: Error: Can \\' t initialize character set unknown ( path: compiled \\_ in ) ** : Calls: dbConnect -> dbConnect -> .local -> .Call Execution halted [ Tue Mar 10 19 :40:15 2020 ] Error in rule download_dataset: jobid: 531 output: data/raw/p60/locations_raw.csv RuleException: CalledProcessError in line 20 of /home/ubuntu/rapids/rules/preprocessing.snakefile: Command 'set -euo pipefail; Rscript --vanilla /home/ubuntu/rapids/.snakemake/scripts/tmp_2jnvqs7.download_dataset.R' returned non-zero exit status 1 . File \"/home/ubuntu/rapids/rules/preprocessing.snakefile\" , line 20 , in __rule_download_dataset File \"/home/ubuntu/anaconda3/envs/moshi-env/lib/python3.7/concurrent/futures/thread.py\" , line 57 , in run Shutting down, this might take some time. Exiting because a job execution failed. Look above for error message Solution Please make sure the DATABASE_GROUP in config.yaml matches your DB credentials group in .env . Cannot start mysql in linux via brew services start mysql \u00b6 Problem Cannot start mysql in linux via brew services start mysql Solution Use mysql.server start Every time I run force the download_dataset rule all rules are executed \u00b6 Problem When running snakemake -j1 -R pull_phone_data or ./rapids -j1 -R pull_phone_data all the rules and files are re-computed Solution This is expected behavior. The advantage of using snakemake under the hood is that every time a file containing data is modified every rule that depends on that file will be re-executed to update their results. In this case, since download_dataset updates all the raw data, and you are forcing the rule with the flag -R every single rule that depends on those raw files will be executed. Error Table XXX doesn't exist while running the download_phone_data or download_fitbit_data rule. \u00b6 Problem Error in .local ( conn, statement, ... ) : could not run statement: Table 'db_name.table_name' doesn ' t exist Calls: colnames ... .local -> dbSendQuery -> dbSendQuery -> .local -> .Call Execution halted Solution Please make sure the sensors listed in [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] and the [CONTAINER] of each sensor you activated in config.yaml match your database tables or files. How do I install RAPIDS on Ubuntu 16.04 \u00b6 Solution Install dependencies (Homebrew - if not installed): sudo apt-get install libmariadb-client-lgpl-dev libxml2-dev libssl-dev Install brew for linux and add the following line to ~/.bashrc : export PATH=$HOME/.linuxbrew/bin:$PATH source ~/.bashrc Install MySQL brew install mysql brew services start mysql Install R, pandoc and rmarkdown: brew install r brew install gcc@6 (needed due to this bug ) HOMEBREW_CC=gcc-6 brew install pandoc Install miniconda using these instructions Clone our repo: git clone https://github.com/carissalow/rapids Create a python virtual environment: cd rapids conda env create -f environment.yml -n MY_ENV_NAME conda activate MY_ENV_NAME Install R packages and virtual environment: snakemake renv_install snakemake renv_init snakemake renv_restore This step could take several minutes to complete. Please be patient and let it run until completion. mysql.h cannot be found \u00b6 Problem -------------------------- [ ERROR MESSAGE ] ---------------------------- <stdin>:1:10: fatal error: mysql.h: No such file or directory compilation terminated. ----------------------------------------------------------------------- ERROR: configuration failed for package 'RMySQL' Solution sudo apt install libmariadbclient-dev No package libcurl found \u00b6 Problem libcurl cannot be found Solution Install libcurl sudo apt install libcurl4-openssl-dev Configuration failed because openssl was not found. \u00b6 Problem openssl cannot be found Solution Install openssl sudo apt install libssl-dev Configuration failed because libxml-2.0 was not found \u00b6 Problem libxml-2.0 cannot be found Solution Install libxml-2.0 sudo apt install libxml2-dev SSL connection error when running RAPIDS \u00b6 Problem You are getting the following error message when running RAPIDS: Error: Failed to connect: SSL connection error: error:1425F102:SSL routines:ssl_choose_client_version:unsupported protocol. Solution This is a bug in Ubuntu 20.04 when trying to connect to an old MySQL server with MySQL client 8.0. You should get the same error message if you try to connect from the command line. There you can add the option --ssl-mode=DISABLED but we can't do this from the R connector. If you can't update your server, the quickest solution would be to import your database to another server or to a local environment. Alternatively, you could replace mysql-client and libmysqlclient-dev with mariadb-client and libmariadbclient-dev and reinstall renv. More info about this issue here DB_TABLES key not found \u00b6 Problem If you get the following error KeyError in line 43 of preprocessing.smk: 'PHONE_SENSORS' , it means that the indentation of the key [PHONE_SENSORS] is not matching the other child elements of PHONE_VALID_SENSED_BINS Solution You need to add or remove any leading whitespaces as needed on that line. PHONE_VALID_SENSED_BINS : COMPUTE : False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features BIN_SIZE : &bin_size 5 # (in minutes) PHONE_SENSORS : [] Error while updating your conda environment in Ubuntu \u00b6 Problem You get the following error: CondaMultiError: CondaVerificationError: The package for tk located at /home/ubuntu/miniconda2/pkgs/tk-8.6.9-hed695b0_1003 appears to be corrupted. The path 'include/mysqlStubs.h' specified in the package manifest cannot be found. ClobberError: This transaction has incompatible packages due to a shared path. packages: conda-forge/linux-64::llvm-openmp-10.0.0-hc9558a2_0, anaconda/linux-64::intel-openmp-2019.4-243 path: 'lib/libiomp5.so' Solution Reinstall conda Embedded nul in string \u00b6 Problem You get the following error when downloading sensor data: Error in result_fetch ( res@ptr, n = n ) : embedded nul in string: Solution This problem is due to the way RMariaDB handles a mismatch between data types in R and MySQL (see this issue ). Since it seems this problem won\u2019t be handled by RMariaDB , you have two options: Remove the the null character from the conflictive table cell(s). You can adapt the following query on a MySQL server 8.0 or older update YOUR_TABLE set YOUR_COLUMN = regexp_replace ( YOUR_COLUMN , '\\0' , '' ); If it\u2019s not feasible to modify your data you can try swapping RMariaDB with RMySQL . Just have in mind you might have problems connecting to modern MySQL servers running in Linux: Add RMySQL to the renv environment by running the following command in a terminal open on RAPIDS root folder R -e 'renv::install(\"RMySQL\")' Go to src/data/streams/pull_phone_data.R or src/data/streams/pull_fitbit_data.R and replace library(RMariaDB) with library(RMySQL) In the same file(s) replace dbEngine <- dbConnect(MariaDB(), default.file = \"./.env\", group = group) with dbEngine <- dbConnect(MySQL(), default.file = \"./.env\", group = group) There is no package called RMariaDB \u00b6 Problem You get the following error when executing RAPIDS: Error in library ( RMariaDB ) : there is no package called 'RMariaDB' Execution halted Solution In RAPIDS v0.1.0 we replaced RMySQL R package with RMariaDB , this error means your R virtual environment is out of date, to update it run snakemake -j1 renv_restore Unrecognized output timezone \u201cAmerica/New_York\u201d \u00b6 Problem When running RAPIDS with R 4.0.3 on MacOS on M1, lubridate may throw an error associated with the timezone. Error in C_force_tz ( time, tz = tzone, roll ) : CCTZ: Unrecognized output timezone: \"America/New_York\" Calls: get_timestamp_filter ... .parse_date_time -> .strptime -> force_tz -> C_force_tz Solution This is because R timezone library is not set. Please add Sys.setenv(\u201cTZDIR\u201d = file.path(R.home(), \u201cshare\u201d, \u201czoneinfo\u201d)) to the file active.R in renv folder to set the timezone library. For further details on how to test if TZDIR is properly set, please refer to https://github.com/tidyverse/lubridate/issues/928#issuecomment-720059233 . Unimplemented MAX_NO_FIELD_TYPES \u00b6 Problem You get the following error when downloading Fitbit data: Error: Unimplemented MAX_NO_FIELD_TYPES Execution halted Solution At the moment RMariaDB cannot handle MySQL columns of JSON type. Change the type of your Fitbit data column to longtext (note that the content will not change and will still be a JSON object just interpreted as a string). Running RAPIDS on Apple Silicon M1 Mac \u00b6 Problem You get the following error when installing pandoc or running rapids: MoSHI/rapids/renv/staging/1/00LOCK-KernSmooth/00new/KernSmooth/libs/KernSmooth.so: mach-0, but wrong architecture Solution As of Feb 2020 in M1 macs, R needs to be installed via brew under Rosetta (x86 arch) due to some incompatibility with selected R libraries. To do this, run your terminal via Rosetta , then proceed with the usual brew installation command. x86 homebrew should be installed in /usr/local/bin/brew , you can check which brew you are using by typing which brew . Then use x86 homebrew to install R and restore RAPIDS packages ( renv_restore ).","title":"Common Errors"},{"location":"common-errors/#common-errors","text":"","title":"Common Errors"},{"location":"common-errors/#cannot-connect-to-your-mysql-server","text":"Problem **Error in .local ( drv, \\. .. ) :** **Failed to connect to database: Error: Can \\' t initialize character set unknown ( path: compiled \\_ in ) ** : Calls: dbConnect -> dbConnect -> .local -> .Call Execution halted [ Tue Mar 10 19 :40:15 2020 ] Error in rule download_dataset: jobid: 531 output: data/raw/p60/locations_raw.csv RuleException: CalledProcessError in line 20 of /home/ubuntu/rapids/rules/preprocessing.snakefile: Command 'set -euo pipefail; Rscript --vanilla /home/ubuntu/rapids/.snakemake/scripts/tmp_2jnvqs7.download_dataset.R' returned non-zero exit status 1 . File \"/home/ubuntu/rapids/rules/preprocessing.snakefile\" , line 20 , in __rule_download_dataset File \"/home/ubuntu/anaconda3/envs/moshi-env/lib/python3.7/concurrent/futures/thread.py\" , line 57 , in run Shutting down, this might take some time. Exiting because a job execution failed. Look above for error message Solution Please make sure the DATABASE_GROUP in config.yaml matches your DB credentials group in .env .","title":"Cannot connect to your MySQL server"},{"location":"common-errors/#cannot-start-mysql-in-linux-via-brew-services-start-mysql","text":"Problem Cannot start mysql in linux via brew services start mysql Solution Use mysql.server start","title":"Cannot start mysql in linux via brew services start mysql"},{"location":"common-errors/#every-time-i-run-force-the-download_dataset-rule-all-rules-are-executed","text":"Problem When running snakemake -j1 -R pull_phone_data or ./rapids -j1 -R pull_phone_data all the rules and files are re-computed Solution This is expected behavior. The advantage of using snakemake under the hood is that every time a file containing data is modified every rule that depends on that file will be re-executed to update their results. In this case, since download_dataset updates all the raw data, and you are forcing the rule with the flag -R every single rule that depends on those raw files will be executed.","title":"Every time I run force the download_dataset rule all rules are executed"},{"location":"common-errors/#error-table-xxx-doesnt-exist-while-running-the-download_phone_data-or-download_fitbit_data-rule","text":"Problem Error in .local ( conn, statement, ... ) : could not run statement: Table 'db_name.table_name' doesn ' t exist Calls: colnames ... .local -> dbSendQuery -> dbSendQuery -> .local -> .Call Execution halted Solution Please make sure the sensors listed in [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] and the [CONTAINER] of each sensor you activated in config.yaml match your database tables or files.","title":"Error Table XXX doesn't exist while running the download_phone_data or download_fitbit_data rule."},{"location":"common-errors/#how-do-i-install-rapids-on-ubuntu-1604","text":"Solution Install dependencies (Homebrew - if not installed): sudo apt-get install libmariadb-client-lgpl-dev libxml2-dev libssl-dev Install brew for linux and add the following line to ~/.bashrc : export PATH=$HOME/.linuxbrew/bin:$PATH source ~/.bashrc Install MySQL brew install mysql brew services start mysql Install R, pandoc and rmarkdown: brew install r brew install gcc@6 (needed due to this bug ) HOMEBREW_CC=gcc-6 brew install pandoc Install miniconda using these instructions Clone our repo: git clone https://github.com/carissalow/rapids Create a python virtual environment: cd rapids conda env create -f environment.yml -n MY_ENV_NAME conda activate MY_ENV_NAME Install R packages and virtual environment: snakemake renv_install snakemake renv_init snakemake renv_restore This step could take several minutes to complete. Please be patient and let it run until completion.","title":"How do I install RAPIDS on Ubuntu 16.04"},{"location":"common-errors/#mysqlh-cannot-be-found","text":"Problem -------------------------- [ ERROR MESSAGE ] ---------------------------- <stdin>:1:10: fatal error: mysql.h: No such file or directory compilation terminated. ----------------------------------------------------------------------- ERROR: configuration failed for package 'RMySQL' Solution sudo apt install libmariadbclient-dev","title":"mysql.h cannot be found"},{"location":"common-errors/#no-package-libcurl-found","text":"Problem libcurl cannot be found Solution Install libcurl sudo apt install libcurl4-openssl-dev","title":"No package libcurl found"},{"location":"common-errors/#configuration-failed-because-openssl-was-not-found","text":"Problem openssl cannot be found Solution Install openssl sudo apt install libssl-dev","title":"Configuration failed because openssl was not found."},{"location":"common-errors/#configuration-failed-because-libxml-20-was-not-found","text":"Problem libxml-2.0 cannot be found Solution Install libxml-2.0 sudo apt install libxml2-dev","title":"Configuration failed because libxml-2.0 was not found"},{"location":"common-errors/#ssl-connection-error-when-running-rapids","text":"Problem You are getting the following error message when running RAPIDS: Error: Failed to connect: SSL connection error: error:1425F102:SSL routines:ssl_choose_client_version:unsupported protocol. Solution This is a bug in Ubuntu 20.04 when trying to connect to an old MySQL server with MySQL client 8.0. You should get the same error message if you try to connect from the command line. There you can add the option --ssl-mode=DISABLED but we can't do this from the R connector. If you can't update your server, the quickest solution would be to import your database to another server or to a local environment. Alternatively, you could replace mysql-client and libmysqlclient-dev with mariadb-client and libmariadbclient-dev and reinstall renv. More info about this issue here","title":"SSL connection error when running RAPIDS"},{"location":"common-errors/#db_tables-key-not-found","text":"Problem If you get the following error KeyError in line 43 of preprocessing.smk: 'PHONE_SENSORS' , it means that the indentation of the key [PHONE_SENSORS] is not matching the other child elements of PHONE_VALID_SENSED_BINS Solution You need to add or remove any leading whitespaces as needed on that line. PHONE_VALID_SENSED_BINS : COMPUTE : False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features BIN_SIZE : &bin_size 5 # (in minutes) PHONE_SENSORS : []","title":"DB_TABLES key not found"},{"location":"common-errors/#error-while-updating-your-conda-environment-in-ubuntu","text":"Problem You get the following error: CondaMultiError: CondaVerificationError: The package for tk located at /home/ubuntu/miniconda2/pkgs/tk-8.6.9-hed695b0_1003 appears to be corrupted. The path 'include/mysqlStubs.h' specified in the package manifest cannot be found. ClobberError: This transaction has incompatible packages due to a shared path. packages: conda-forge/linux-64::llvm-openmp-10.0.0-hc9558a2_0, anaconda/linux-64::intel-openmp-2019.4-243 path: 'lib/libiomp5.so' Solution Reinstall conda","title":"Error while updating your conda environment in Ubuntu"},{"location":"common-errors/#embedded-nul-in-string","text":"Problem You get the following error when downloading sensor data: Error in result_fetch ( res@ptr, n = n ) : embedded nul in string: Solution This problem is due to the way RMariaDB handles a mismatch between data types in R and MySQL (see this issue ). Since it seems this problem won\u2019t be handled by RMariaDB , you have two options: Remove the the null character from the conflictive table cell(s). You can adapt the following query on a MySQL server 8.0 or older update YOUR_TABLE set YOUR_COLUMN = regexp_replace ( YOUR_COLUMN , '\\0' , '' ); If it\u2019s not feasible to modify your data you can try swapping RMariaDB with RMySQL . Just have in mind you might have problems connecting to modern MySQL servers running in Linux: Add RMySQL to the renv environment by running the following command in a terminal open on RAPIDS root folder R -e 'renv::install(\"RMySQL\")' Go to src/data/streams/pull_phone_data.R or src/data/streams/pull_fitbit_data.R and replace library(RMariaDB) with library(RMySQL) In the same file(s) replace dbEngine <- dbConnect(MariaDB(), default.file = \"./.env\", group = group) with dbEngine <- dbConnect(MySQL(), default.file = \"./.env\", group = group)","title":"Embedded nul in string"},{"location":"common-errors/#there-is-no-package-called-rmariadb","text":"Problem You get the following error when executing RAPIDS: Error in library ( RMariaDB ) : there is no package called 'RMariaDB' Execution halted Solution In RAPIDS v0.1.0 we replaced RMySQL R package with RMariaDB , this error means your R virtual environment is out of date, to update it run snakemake -j1 renv_restore","title":"There is no package called RMariaDB"},{"location":"common-errors/#unrecognized-output-timezone-americanew_york","text":"Problem When running RAPIDS with R 4.0.3 on MacOS on M1, lubridate may throw an error associated with the timezone. Error in C_force_tz ( time, tz = tzone, roll ) : CCTZ: Unrecognized output timezone: \"America/New_York\" Calls: get_timestamp_filter ... .parse_date_time -> .strptime -> force_tz -> C_force_tz Solution This is because R timezone library is not set. Please add Sys.setenv(\u201cTZDIR\u201d = file.path(R.home(), \u201cshare\u201d, \u201czoneinfo\u201d)) to the file active.R in renv folder to set the timezone library. For further details on how to test if TZDIR is properly set, please refer to https://github.com/tidyverse/lubridate/issues/928#issuecomment-720059233 .","title":"Unrecognized output timezone \"America/New_York\""},{"location":"common-errors/#unimplemented-max_no_field_types","text":"Problem You get the following error when downloading Fitbit data: Error: Unimplemented MAX_NO_FIELD_TYPES Execution halted Solution At the moment RMariaDB cannot handle MySQL columns of JSON type. Change the type of your Fitbit data column to longtext (note that the content will not change and will still be a JSON object just interpreted as a string).","title":"Unimplemented MAX_NO_FIELD_TYPES"},{"location":"common-errors/#running-rapids-on-apple-silicon-m1-mac","text":"Problem You get the following error when installing pandoc or running rapids: MoSHI/rapids/renv/staging/1/00LOCK-KernSmooth/00new/KernSmooth/libs/KernSmooth.so: mach-0, but wrong architecture Solution As of Feb 2020 in M1 macs, R needs to be installed via brew under Rosetta (x86 arch) due to some incompatibility with selected R libraries. To do this, run your terminal via Rosetta , then proceed with the usual brew installation command. x86 homebrew should be installed in /usr/local/bin/brew , you can check which brew you are using by typing which brew . Then use x86 homebrew to install R and restore RAPIDS packages ( renv_restore ).","title":"Running RAPIDS on Apple Silicon M1 Mac"},{"location":"migrating-from-old-versions/","text":"Migration guides \u00b6 Migrating from RAPIDS 0.4.x or older \u00b6 There are four actions that you need to take if you were using RAPIDS 0.4.3 or older ( before Feb 9 th , 2021 ): Check the new Overview page Check the new Overview page. Hopefully, it is a better overview of RAPIDS and provides answers to Frequently Asked Questions. Deploy RAPIDS in a new folder Clone RAPIDS 1.x in a new folder (do not pull the updates in your current folder) Activate your conda environment Install renv again snakemake -j1 renv_install (for Ubuntu take advantage of the platform specific R renv instructions ) Restore renv packages snakemake -j1 renv_restore (for Ubuntu take advantage of the platform specific R renv instructions ) Move your participant files pxx.yaml to the new folder Move your time segment files to the new folder Move your .env file to the new folder Migrate your .env file to the new credentials.yaml format The .env file is not used anymore, the same credential groups are stored in credentials.yaml , migrate your .env file by running: python tools/update_format_env.py Reconfigure your config.yaml Reconfigure your config.yaml file by hand (don\u2019t copy and paste the old one). Some keys and values changed but the defaults should be compatible with the things you know from RAPIDS 0.x (see below). The most relevant changes to RAPIDS that you need to know about are: We introduced the concept of data streams RAPIDS abstracts sensor data logged by different devices, platforms and stored in different data containers as data streams . The default data stream for PHONE is aware_mysql , and the default for FITBIT is fitbitjson_mysql . This is compatible with the old functionality (AWARE and JSON Fitbit data stored in MySQL). These values are set in [PHONE_DATA_STREAMS][USE] and [FITBIT_DATA_STREAMS][USE] . You can add new data stream formats (sensing apps) and containers (database engines, file types, etc.). If you were processing your Fitbit data either in JSON or plain text (parsed) format, and it was stored in MySQL or CSV files, the changes that you made to your raw data will be compatible. Just choose fitbitjson_mysql , fitbitparsed_mysql , fitbitjson_csv , fitbitparsed_csv accordingly and set it in [FITBIT_DATA_STREAMS][USE] . In the future, you will not have to change your raw data; you will be able to just change column mappings/values in the data stream\u2019s format.yaml file. We introduced multiple time zones You can now process data from participants that visited multiple time zones. The default is still a single time zone (America/New_York). See how to handle multiple time zones The keyword multiple is now infer When processing data from smartphones, RAPIDS allows you to infer the OS of a smartphone by using the keyword multiple in the [PLATFORM] key of participant files. Now RAPIDS uses infer instead of multiple Nonetheless, multiple still works for backward compatibility. A global DATABASE_GROUP does not exist anymore There is no global DATABASE_GROUP anymore. Each data stream that needs credentials to connect to a database has its own DATABASE_GROUP config key . The groups are defined in credentials.yaml instead of the .env . [DEVICE_SENSOR][TABLE] is now [DEVICE_SENSOR][CONTAINER] We renamed the keys [DEVICE_SENSOR][TABLE] to [DEVICE_SENSOR][CONTAINER] to reflect that, with the introduction of data streams, they can point to a database table, file, or any other data container. Creating participant files from the AWARE_DEVICE_TABLE is deprecated In previous versions of RAPIDS, you could create participant files automatically using the aware_device table. We deprecated this option but you can still achieve the same results if you export the output of the following SQL query as a CSV file and follow the instructions to create participant files from CSV files : SELECT device_id , device_id as fitbit_id , CONCAT ( \"p\" , _id ) as empatica_id , CONCAT ( \"p\" , _id ) as pid , if ( brand = \"iPhone\" , \"ios\" , \"android\" ) as platform , CONCAT ( \"p\" , _id ) as label , DATE_FORMAT ( FROM_UNIXTIME (( timestamp / 1000 ) - 86400 ), \"%Y-%m-%d\" ) as start_date , CURRENT_DATE as end_date from aware_device order by _id ; SCR_SCRIPT and SRC_LANGUAGE are replaced by SRC_SCRIPT The attributes SCR_SCRIPT and SRC_LANGUAGE of every sensor PROVIDER are replaced by SRC_SCRIPT . SRC_SCRIPT is a relative path from the RAPIDS root folder to that provider\u2019s feature script. We did this to simplify and clarify where the features scripts are stored. There are no actions to take unless you created your own feature provider; update it with your feature script path. Migrating from RAPIDS beta \u00b6 If you were relying on the old docs and the most recent version of RAPIDS you are working with is from or before Oct 13, 2020 you are using the beta version of RAPIDS. You can start using the RAPIDS 0.1.0 right away, just take into account the following: Deploy RAPIDS in a new folder Install a new copy of RAPIDS (the R and Python virtual environments didn\u2019t change so the cached versions will be reused) Make sure you don\u2019t skip a new Installation step to give execution permissions to the RAPIDS script: chmod +x rapids Move your old .env file Move your participant files Migrate your participant files You can migrate your old participant files to the new YAML format: python tools/update_format_participant_files.py Follow the new Configuration guide Follow the new Configuration guide Learn more about the new way to run RAPIDS Get familiar with the new way of Executing RAPIDS","title":"Migrating from an old version"},{"location":"migrating-from-old-versions/#migration-guides","text":"","title":"Migration guides"},{"location":"migrating-from-old-versions/#migrating-from-rapids-04x-or-older","text":"There are four actions that you need to take if you were using RAPIDS 0.4.3 or older ( before Feb 9 th , 2021 ): Check the new Overview page Check the new Overview page. Hopefully, it is a better overview of RAPIDS and provides answers to Frequently Asked Questions. Deploy RAPIDS in a new folder Clone RAPIDS 1.x in a new folder (do not pull the updates in your current folder) Activate your conda environment Install renv again snakemake -j1 renv_install (for Ubuntu take advantage of the platform specific R renv instructions ) Restore renv packages snakemake -j1 renv_restore (for Ubuntu take advantage of the platform specific R renv instructions ) Move your participant files pxx.yaml to the new folder Move your time segment files to the new folder Move your .env file to the new folder Migrate your .env file to the new credentials.yaml format The .env file is not used anymore, the same credential groups are stored in credentials.yaml , migrate your .env file by running: python tools/update_format_env.py Reconfigure your config.yaml Reconfigure your config.yaml file by hand (don\u2019t copy and paste the old one). Some keys and values changed but the defaults should be compatible with the things you know from RAPIDS 0.x (see below). The most relevant changes to RAPIDS that you need to know about are: We introduced the concept of data streams RAPIDS abstracts sensor data logged by different devices, platforms and stored in different data containers as data streams . The default data stream for PHONE is aware_mysql , and the default for FITBIT is fitbitjson_mysql . This is compatible with the old functionality (AWARE and JSON Fitbit data stored in MySQL). These values are set in [PHONE_DATA_STREAMS][USE] and [FITBIT_DATA_STREAMS][USE] . You can add new data stream formats (sensing apps) and containers (database engines, file types, etc.). If you were processing your Fitbit data either in JSON or plain text (parsed) format, and it was stored in MySQL or CSV files, the changes that you made to your raw data will be compatible. Just choose fitbitjson_mysql , fitbitparsed_mysql , fitbitjson_csv , fitbitparsed_csv accordingly and set it in [FITBIT_DATA_STREAMS][USE] . In the future, you will not have to change your raw data; you will be able to just change column mappings/values in the data stream\u2019s format.yaml file. We introduced multiple time zones You can now process data from participants that visited multiple time zones. The default is still a single time zone (America/New_York). See how to handle multiple time zones The keyword multiple is now infer When processing data from smartphones, RAPIDS allows you to infer the OS of a smartphone by using the keyword multiple in the [PLATFORM] key of participant files. Now RAPIDS uses infer instead of multiple Nonetheless, multiple still works for backward compatibility. A global DATABASE_GROUP does not exist anymore There is no global DATABASE_GROUP anymore. Each data stream that needs credentials to connect to a database has its own DATABASE_GROUP config key . The groups are defined in credentials.yaml instead of the .env . [DEVICE_SENSOR][TABLE] is now [DEVICE_SENSOR][CONTAINER] We renamed the keys [DEVICE_SENSOR][TABLE] to [DEVICE_SENSOR][CONTAINER] to reflect that, with the introduction of data streams, they can point to a database table, file, or any other data container. Creating participant files from the AWARE_DEVICE_TABLE is deprecated In previous versions of RAPIDS, you could create participant files automatically using the aware_device table. We deprecated this option but you can still achieve the same results if you export the output of the following SQL query as a CSV file and follow the instructions to create participant files from CSV files : SELECT device_id , device_id as fitbit_id , CONCAT ( \"p\" , _id ) as empatica_id , CONCAT ( \"p\" , _id ) as pid , if ( brand = \"iPhone\" , \"ios\" , \"android\" ) as platform , CONCAT ( \"p\" , _id ) as label , DATE_FORMAT ( FROM_UNIXTIME (( timestamp / 1000 ) - 86400 ), \"%Y-%m-%d\" ) as start_date , CURRENT_DATE as end_date from aware_device order by _id ; SCR_SCRIPT and SRC_LANGUAGE are replaced by SRC_SCRIPT The attributes SCR_SCRIPT and SRC_LANGUAGE of every sensor PROVIDER are replaced by SRC_SCRIPT . SRC_SCRIPT is a relative path from the RAPIDS root folder to that provider\u2019s feature script. We did this to simplify and clarify where the features scripts are stored. There are no actions to take unless you created your own feature provider; update it with your feature script path.","title":"Migrating from RAPIDS 0.4.x or older"},{"location":"migrating-from-old-versions/#migrating-from-rapids-beta","text":"If you were relying on the old docs and the most recent version of RAPIDS you are working with is from or before Oct 13, 2020 you are using the beta version of RAPIDS. You can start using the RAPIDS 0.1.0 right away, just take into account the following: Deploy RAPIDS in a new folder Install a new copy of RAPIDS (the R and Python virtual environments didn\u2019t change so the cached versions will be reused) Make sure you don\u2019t skip a new Installation step to give execution permissions to the RAPIDS script: chmod +x rapids Move your old .env file Move your participant files Migrate your participant files You can migrate your old participant files to the new YAML format: python tools/update_format_participant_files.py Follow the new Configuration guide Follow the new Configuration guide Learn more about the new way to run RAPIDS Get familiar with the new way of Executing RAPIDS","title":"Migrating from RAPIDS beta"},{"location":"team/","text":"RAPIDS Team \u00b6 If you are interested in contributing feel free to submit a pull request or contact us. Core Team \u00b6 Julio Vega (Designer and Lead Developer) \u00b6 About Julio Vega is a postdoctoral associate at the Mobile Sensing + Health Institute. He is interested in personalized methodologies to monitor chronic conditions that affect daily human behavior using mobile and wearable data. vegaju at upmc . edu Personal Website Meng Li \u00b6 About Meng Li received her Master of Science degree in Information Science from the University of Pittsburgh. She is interested in applying machine learning algorithms to the medical field. lim11 at upmc . edu Linkedin Profile Github Profile Abhineeth Reddy Kunta \u00b6 About Abhineeth Reddy Kunta is a Senior Software Engineer with the Mobile Sensing + Health Institute. He is experienced in software development and specializes in building solutions using machine learning. Abhineeth likes exploring ways to leverage technology in advancing medicine and education. Previously he worked as a Computer Programmer at Georgia Department of Public Health. He has a master\u2019s degree in Computer Science from George Mason University. Kwesi Aguillera \u00b6 About Kwesi Aguillera is currently in his first year at the University of Pittsburgh pursuing a Master of Sciences in Information Science specializing in Big Data Analytics. He received his Bachelor of Science degree in Computer Science and Management from the University of the West Indies. Kwesi considers himself a full stack developer and looks forward to applying this knowledge to big data analysis. Linkedin Profile Echhit Joshi \u00b6 About Echhit Joshi is a Masters student at the School of Computing and Information at University of Pittsburgh. His areas of interest are Machine/Deep Learning, Data Mining, and Analytics. Linkedin Profile Nicolas Leo \u00b6 About Nicolas is a rising senior studying computer science at the University of Pittsburgh. His academic interests include databases, machine learning, and application development. After completing his undergraduate degree, he plans to attend graduate school for a MS in Computer Science with a focus on Intelligent Systems. Nikunj Goel \u00b6 About Nik is a graduate student at the University of Pittsburgh pursuing Master of Science in Information Science. He earned his Bachelor of Technology degree in Information Technology from India. He is a Data Enthusiasts and passionate about finding the meaning out of raw data. In a long term, his goal is to create a breakthrough in Data Science and Deep Learning. Linkedin Profile Community Contributors \u00b6 Agam Kumar \u00b6 About Agam is a junior at Carnegie Mellon University studying Statistics and Machine Learning and pursuing an additional major in Computer Science. He is a member of the Data Science team in the Health and Human Performance Lab at CMU and has keen interests in software development and data science. His research interests include ML applications in medicine. Linkedin Profile Github Profile Yasaman S. Sefidgar \u00b6 About Linkedin Profile Joe Kim \u00b6 About Personal Website Brinnae Bent \u00b6 About Personal Website Stephen Price \u00b6 About Carnegie Mellon University Neil Singh \u00b6 About University of Virginia Advisors \u00b6 Afsaneh Doryab \u00b6 About Personal Website Carissa Low \u00b6 About Profile","title":"Team"},{"location":"team/#rapids-team","text":"If you are interested in contributing feel free to submit a pull request or contact us.","title":"RAPIDS Team"},{"location":"team/#core-team","text":"","title":"Core Team"},{"location":"team/#julio-vega-designer-and-lead-developer","text":"About Julio Vega is a postdoctoral associate at the Mobile Sensing + Health Institute. He is interested in personalized methodologies to monitor chronic conditions that affect daily human behavior using mobile and wearable data. vegaju at upmc . edu Personal Website","title":"Julio Vega (Designer and Lead Developer)"},{"location":"team/#meng-li","text":"About Meng Li received her Master of Science degree in Information Science from the University of Pittsburgh. She is interested in applying machine learning algorithms to the medical field. lim11 at upmc . edu Linkedin Profile Github Profile","title":"Meng Li"},{"location":"team/#abhineeth-reddy-kunta","text":"About Abhineeth Reddy Kunta is a Senior Software Engineer with the Mobile Sensing + Health Institute. He is experienced in software development and specializes in building solutions using machine learning. Abhineeth likes exploring ways to leverage technology in advancing medicine and education. Previously he worked as a Computer Programmer at Georgia Department of Public Health. He has a master\u2019s degree in Computer Science from George Mason University.","title":"Abhineeth Reddy Kunta"},{"location":"team/#kwesi-aguillera","text":"About Kwesi Aguillera is currently in his first year at the University of Pittsburgh pursuing a Master of Sciences in Information Science specializing in Big Data Analytics. He received his Bachelor of Science degree in Computer Science and Management from the University of the West Indies. Kwesi considers himself a full stack developer and looks forward to applying this knowledge to big data analysis. Linkedin Profile","title":"Kwesi Aguillera"},{"location":"team/#echhit-joshi","text":"About Echhit Joshi is a Masters student at the School of Computing and Information at University of Pittsburgh. His areas of interest are Machine/Deep Learning, Data Mining, and Analytics. Linkedin Profile","title":"Echhit Joshi"},{"location":"team/#nicolas-leo","text":"About Nicolas is a rising senior studying computer science at the University of Pittsburgh. His academic interests include databases, machine learning, and application development. After completing his undergraduate degree, he plans to attend graduate school for a MS in Computer Science with a focus on Intelligent Systems.","title":"Nicolas Leo"},{"location":"team/#nikunj-goel","text":"About Nik is a graduate student at the University of Pittsburgh pursuing Master of Science in Information Science. He earned his Bachelor of Technology degree in Information Technology from India. He is a Data Enthusiasts and passionate about finding the meaning out of raw data. In a long term, his goal is to create a breakthrough in Data Science and Deep Learning. Linkedin Profile","title":"Nikunj Goel"},{"location":"team/#community-contributors","text":"","title":"Community Contributors"},{"location":"team/#agam-kumar","text":"About Agam is a junior at Carnegie Mellon University studying Statistics and Machine Learning and pursuing an additional major in Computer Science. He is a member of the Data Science team in the Health and Human Performance Lab at CMU and has keen interests in software development and data science. His research interests include ML applications in medicine. Linkedin Profile Github Profile","title":"Agam Kumar"},{"location":"team/#yasaman-s-sefidgar","text":"About Linkedin Profile","title":"Yasaman S. Sefidgar"},{"location":"team/#joe-kim","text":"About Personal Website","title":"Joe Kim"},{"location":"team/#brinnae-bent","text":"About Personal Website","title":"Brinnae Bent"},{"location":"team/#stephen-price","text":"About Carnegie Mellon University","title":"Stephen Price"},{"location":"team/#neil-singh","text":"About University of Virginia","title":"Neil Singh"},{"location":"team/#advisors","text":"","title":"Advisors"},{"location":"team/#afsaneh-doryab","text":"About Personal Website","title":"Afsaneh Doryab"},{"location":"team/#carissa-low","text":"About Profile","title":"Carissa Low"},{"location":"datastreams/add-new-data-streams/","text":"Add New Data Streams \u00b6 A data stream is a set of sensor data collected using a specific type of device with a specific format and stored in a specific container . RAPIDS is agnostic to data streams\u2019 formats and container; see the Data Streams Introduction for a list of supported streams. A container is queried with an R or Python script that connects to the database, API or file where your stream\u2019s raw data is stored. A format is described using a format.yaml file that specifies how to map and mutate your stream\u2019s raw data to match the data and format RAPIDS needs. The most common cases when you would want to implement a new data stream are: You collected data with a mobile sensing app RAPIDS does not support yet. For example, Beiwe data stored in MySQL. You will need to define a new format file and a new container script. You collected data with a mobile sensing app RAPIDS supports, but this data is stored in a container that RAPIDS can\u2019t connect to yet. For example, AWARE data stored in PostgreSQL. In this case, you can reuse the format file of the aware_mysql stream, but you will need to implement a new container script. Hint Both the container.[R|py] and the format.yaml are stored in ./src/data/streams/[stream_name] where [stream_name] can be aware_mysql for example. Implement a Container \u00b6 The container script of a data stream can be implemented in R (strongly recommended) or python. This script must have two functions if you are implementing a stream for phone data or one function otherwise. The script can contain other auxiliary functions. First of all, add any parameters your script might need in config.yaml under (device)_DATA_STREAMS . These parameters will be available in the stream_parameters argument of the one or two functions you implement. For example, if you are adding support for Beiwe data stored in PostgreSQL and your container needs a set of credentials to connect to a database, your new data stream configuration would be: PHONE_DATA_STREAMS : USE : aware_python # AVAILABLE: aware_mysql : DATABASE_GROUP : MY_GROUP beiwe_postgresql : DATABASE_GROUP : MY_GROUP # users define this group (user, password, host, etc.) in credentials.yaml Then implement one or both of the following functions: pull_data This function returns the data columns for a specific sensor and participant. It has the following parameters: Param Description stream_parameters Any parameters (keys/values) set by the user in any [DEVICE_DATA_STREAMS][stream_name] key of config.yaml . For example, [DATABASE_GROUP] inside [FITBIT_DATA_STREAMS][fitbitjson_mysql] sensor_container The value set by the user in any [DEVICE_SENSOR][CONTAINER] key of config.yaml . It can be a table, file path, or whatever data source you want to support that contains the data from a single sensor for all participants . For example, [PHONE_ACCELEROMETER][CONTAINER] device The device id that you need to get the data for (this is set by the user in the participant files ). For example, in AWARE this device id is a uuid columns A list of the columns that you need to get from sensor_container . You specify these columns in your stream\u2019s format.yaml Example This is the pull_data function we implemented for aware_mysql . Note that we can message , warn or stop the user during execution. pull_data <- function ( stream_parameters , device , sensor_container , columns ){ # get_db_engine is an auxiliary function not shown here for brevity bu can be found in src/data/streams/aware_mysql/container.R dbEngine <- get_db_engine ( stream_parameters $ DATABASE_GROUP ) query <- paste0 ( \"SELECT \" , paste ( columns , collapse = \",\" ), \" FROM \" , sensor_container , \" WHERE device_id = '\" , device , \"'\" ) # Letting the user know what we are doing message ( paste0 ( \"Executing the following query to download data: \" , query )) sensor_data <- dbGetQuery ( dbEngine , query ) dbDisconnect ( dbEngine ) if ( nrow ( sensor_data ) == 0 ) warning ( paste ( \"The device '\" , device , \"' did not have data in \" , sensor_container )) return ( sensor_data ) } infer_device_os Warning This function is only necessary for phone data streams. RAPIDS allows users to use the keyword infer (previously multiple ) to automatically infer the mobile Operative System a phone was running. If you have a way to infer the OS of a device id, implement this function. For example, for AWARE data we use the aware_device table. If you don\u2019t have a way to infer the OS, call stop(\"Error Message\") so other users know they can\u2019t use infer or the inference failed, and they have to assign the OS manually in the participant file. This function returns the operative system ( android or ios ) for a specific phone device id. It has the following parameters: Param Description stream_parameters Any parameters (keys/values) set by the user in any [DEVICE_DATA_STREAMS][stream_name] key of config.yaml . For example, [DATABASE_GROUP] inside [FITBIT_DATA_STREAMS][fitbitjson_mysql] device The device id that you need to infer the OS for (this is set by the user in the participant files ). For example, in AWARE this device id is a uuid Example This is the infer_device_os function we implemented for aware_mysql . Note that we can message , warn or stop the user during execution. infer_device_os <- function ( stream_parameters , device ){ # get_db_engine is an auxiliary function not shown here for brevity bu can be found in src/data/streams/aware_mysql/container.R group <- stream_parameters $ DATABASE_GROUP dbEngine <- dbConnect ( MariaDB (), default.file = \"./.env\" , group = group ) query <- paste0 ( \"SELECT device_id,brand FROM aware_device WHERE device_id = '\" , device , \"'\" ) message ( paste0 ( \"Executing the following query to infer phone OS: \" , query )) os <- dbGetQuery ( dbEngine , query ) dbDisconnect ( dbEngine ) if ( nrow ( os ) > 0 ) return ( os %>% mutate ( os = ifelse ( brand == \"iPhone\" , \"ios\" , \"android\" )) %>% pull ( os )) else stop ( paste ( \"We cannot infer the OS of the following device id because it does not exist in the aware_device table:\" , device )) return ( os ) } Implement a Format \u00b6 A format file format.yaml describes the mapping between your stream\u2019s raw data and the data that RAPIDS needs. This file has a section per sensor (e.g. PHONE_ACCELEROMETER ), and each section has two attributes (keys): RAPIDS_COLUMN_MAPPINGS are mappings between the columns RAPIDS needs and the columns your raw data already has. The reserved keyword FLAG_TO_MUTATE flags columns that RAPIDS requires but that are not initially present in your container (database, CSV file). These columns have to be created by your mutation scripts. MUTATION . Sometimes your raw data needs to be transformed to match the format RAPIDS can handle (including creating columns marked as FLAG_TO_MUTATE ) COLUMN_MAPPINGS are mappings between the columns a mutation SCRIPT needs and the columns your raw data has. SCRIPTS are a collection of R or Python scripts that transform one or more raw data columns into the format RAPIDS needs. Hint [RAPIDS_COLUMN_MAPPINGS] and [MUTATE][COLUMN_MAPPINGS] have a key (left-hand side string) and a value (right-hand side string). The values are the names used to pulled columns from a container (e.g., columns in a database table). All values are renamed to their keys in lower case. The renamed columns are sent to every mutation script within the data argument, and the final output is the input RAPIDS process further. For example, let\u2019s assume we are implementing beiwe_mysql and defining the following format for PHONE_FAKESENSOR : PHONE_FAKESENSOR : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID MAGNITUDE_SQUARED : FLAG_TO_MUTATE MUTATE : COLUMN_MAPPINGS : MAGNITUDE : beiwe_value SCRIPTS : - src/data/streams/mutations/phone/square_magnitude.py RAPIDS will: Download beiwe_timestamp , beiwe_deviceID , and beiwe_value from the container of beiwe_mysql (MySQL DB) Rename these columns to timestamp , device_id , and magnitude , respectively. Execute square_magnitude.py with a data frame as an argument containing the renamed columns. This script will square magnitude and rename it to magnitude_squared Verify the data frame returned by square_magnitude.py has the columns RAPIDS needs timestamp , device_id , and magnitude_squared . Use this data frame as the input to be processed in the pipeline. Note that although RAPIDS_COLUMN_MAPPINGS and [MUTATE][COLUMN_MAPPINGS] keys are in capital letters for readability (e.g. MAGNITUDE_SQUARED ), the names of the final columns you mutate in your scripts should be lower case. Let\u2019s explain in more depth this column mapping with examples. Name mapping \u00b6 The mapping for some sensors is straightforward. For example, accelerometer data most of the time has a timestamp, three axes (x,y,z), and a device id that produced it. AWARE and a different sensing app like Beiwe likely logged accelerometer data in the same way but with different column names. In this case, we only need to match Beiwe data columns to RAPIDS columns one-to-one: PHONE_ACCELEROMETER : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID DOUBLE_VALUES_0 : beiwe_x DOUBLE_VALUES_1 : beiwe_y DOUBLE_VALUES_2 : beiwe_z MUTATE : COLUMN_MAPPINGS : SCRIPTS : # it's ok if this is empty Value mapping \u00b6 For some sensors, we need to map column names and values. For example, screen data has ON and OFF events; let\u2019s suppose Beiwe represents an ON event with the number 1, but RAPIDS identifies ON events with the number 2 . In this case, we need to mutate the raw data coming from Beiwe and replace all 1 s with 2 s. We do this by listing one or more R or Python scripts in MUTATION_SCRIPTS that will be executed in order. We usually store all mutation scripts under src/data/streams/mutations/[device]/[platform]/ and they can be reused across data streams. PHONE_SCREEN : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID EVENT : beiwe_event MUTATE : COLUMN_MAPPINGS : SCRIPTS : - src/data/streams/mutations/phone/beiwe/beiwe_screen_map.py Hint A MUTATION_SCRIPT can also be used to clean/preprocess your data before extracting behavioral features. A mutation script has to have a main function that receives two arguments, data and stream_parameters . The stream_parameters argument contains the config.yaml key/values of your data stream (this is the same argument that your container.[py|R] script receives, see Implement a Container ). python Example of a python mutation script import pandas as pd def main ( data , stream_parameters ): # mutate data return ( data ) R Example of a R mutation script source ( \"renv/activate.R\" ) # needed to use RAPIDS renv environment library ( dplyr ) main <- function ( data , stream_parameters ){ # mutate data return ( data ) } Complex mapping \u00b6 Sometimes, your raw data doesn\u2019t even have the same columns RAPIDS expects for a sensor. For example, let\u2019s pretend Beiwe stores PHONE_ACCELEROMETER axis data in a single column called acc_col instead of three. You have to create a MUTATION_SCRIPT to split acc_col into three columns x , y , and z . For this, you mark the three axes columns RAPIDS needs in [RAPIDS_COLUMN_MAPPINGS] with the word FLAG_TO_MUTATE , map acc_col in [MUTATION][COLUMN_MAPPINGS] , and list a Python script under [MUTATION][SCRIPTS] with the code to split acc_col . See an example below. RAPIDS expects that every column mapped as FLAG_TO_MUTATE will be generated by your mutation script, so it won\u2019t try to retrieve them from your container (database, CSV file, etc.). In our example, acc_col will be fetched from the stream\u2019s container and renamed to JOINED_AXES because beiwe_split_acc.py will split it into double_values_0 , double_values_1 , and double_values_2 . PHONE_ACCELEROMETER : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID DOUBLE_VALUES_0 : FLAG_TO_MUTATE DOUBLE_VALUES_1 : FLAG_TO_MUTATE DOUBLE_VALUES_2 : FLAG_TO_MUTATE MUTATE : COLUMN_MAPPINGS : JOINED_AXES : acc_col SCRIPTS : - src/data/streams/mutations/phone/beiwe/beiwe_split_acc.py This is a draft of beiwe_split_acc.py MUTATION_SCRIPT : import pandas as pd def main ( data , stream_parameters ): # data has the acc_col # split acc_col into three columns: double_values_0, double_values_1, double_values_2 to match RAPIDS format # remove acc_col since we don't need it anymore return ( data ) OS complex mapping \u00b6 There is a special case for a complex mapping scenario for smartphone data streams. The Android and iOS sensor APIs return data in different formats for certain sensors (like screen, activity recognition, battery, among others). In case you didn\u2019t notice, the examples we have used so far are grouped under an ANDROID key, which means they will be applied to data collected by Android phones. Additionally, each sensor has an IOS key for a similar purpose. We use the complex mapping described above to transform iOS data into an Android format (it\u2019s always iOS to Android and any new phone data stream must do the same). For example, this is the format.yaml key for PHONE_ACTVITY_RECOGNITION . Note that the ANDROID mapping is simple (one-to-one) but the IOS mapping is complex with three FLAG_TO_MUTATE columns, two [MUTATE][COLUMN_MAPPINGS] mappings, and one [MUTATION][SCRIPT] . PHONE_ACTIVITY_RECOGNITION : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : timestamp DEVICE_ID : device_id ACTIVITY_TYPE : activity_type ACTIVITY_NAME : activity_name CONFIDENCE : confidence MUTATION : COLUMN_MAPPINGS : SCRIPTS : IOS : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : timestamp DEVICE_ID : device_id ACTIVITY_TYPE : FLAG_TO_MUTATE ACTIVITY_NAME : FLAG_TO_MUTATE CONFIDENCE : FLAG_TO_MUTATE MUTATION : COLUMN_MAPPINGS : ACTIVITIES : activities CONFIDENCE : confidence SCRIPTS : - \"src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R\" Example activity_recogniton_ios_unification.R In this MUTATION_SCRIPT we create ACTIVITY_NAME and ACTIVITY_TYPE based on activities , and map confidence iOS values to Android values. source ( \"renv/activate.R\" ) library ( \"dplyr\" , warn.conflicts = F ) library ( stringr ) clean_ios_activity_column <- function ( ios_gar ){ ios_gar <- ios_gar %>% mutate ( activities = str_replace_all ( activities , pattern = '(\"|\\\\[|\\\\])' , replacement = \"\" )) existent_multiple_activities <- ios_gar %>% filter ( str_detect ( activities , \",\" )) %>% group_by ( activities ) %>% summarise ( mutiple_activities = unique ( activities ), .groups = \"drop_last\" ) %>% pull ( mutiple_activities ) known_multiple_activities <- c ( \"stationary,automotive\" ) unkown_multiple_actvities <- setdiff ( existent_multiple_activities , known_multiple_activities ) if ( length ( unkown_multiple_actvities ) > 0 ){ stop ( paste0 ( \"There are unkwown combinations of ios activities, you need to implement the decision of the ones to keep: \" , unkown_multiple_actvities )) } ios_gar <- ios_gar %>% mutate ( activities = str_replace_all ( activities , pattern = \"stationary,automotive\" , replacement = \"automotive\" )) return ( ios_gar ) } unify_ios_activity_recognition <- function ( ios_gar ){ # We only need to unify Google Activity Recognition data for iOS # discard rows where activities column is blank ios_gar <- ios_gar [ - which ( ios_gar $ activities == \"\" ), ] # clean \"activities\" column of ios_gar ios_gar <- clean_ios_activity_column ( ios_gar ) # make it compatible with android version: generate \"activity_name\" and \"activity_type\" columns ios_gar <- ios_gar %>% mutate ( activity_name = case_when ( activities == \"automotive\" ~ \"in_vehicle\" , activities == \"cycling\" ~ \"on_bicycle\" , activities == \"walking\" ~ \"walking\" , activities == \"running\" ~ \"running\" , activities == \"stationary\" ~ \"still\" ), activity_type = case_when ( activities == \"automotive\" ~ 0 , activities == \"cycling\" ~ 1 , activities == \"walking\" ~ 7 , activities == \"running\" ~ 8 , activities == \"stationary\" ~ 3 , activities == \"unknown\" ~ 4 ), confidence = case_when ( confidence == 0 ~ 0 , confidence == 1 ~ 50 , confidence == 2 ~ 100 ) ) %>% select ( - activities ) return ( ios_gar ) } main <- function ( data , stream_parameters ){ return ( unify_ios_activity_recognition ( data , stream_parameters )) }","title":"Add New Data Streams"},{"location":"datastreams/add-new-data-streams/#add-new-data-streams","text":"A data stream is a set of sensor data collected using a specific type of device with a specific format and stored in a specific container . RAPIDS is agnostic to data streams\u2019 formats and container; see the Data Streams Introduction for a list of supported streams. A container is queried with an R or Python script that connects to the database, API or file where your stream\u2019s raw data is stored. A format is described using a format.yaml file that specifies how to map and mutate your stream\u2019s raw data to match the data and format RAPIDS needs. The most common cases when you would want to implement a new data stream are: You collected data with a mobile sensing app RAPIDS does not support yet. For example, Beiwe data stored in MySQL. You will need to define a new format file and a new container script. You collected data with a mobile sensing app RAPIDS supports, but this data is stored in a container that RAPIDS can\u2019t connect to yet. For example, AWARE data stored in PostgreSQL. In this case, you can reuse the format file of the aware_mysql stream, but you will need to implement a new container script. Hint Both the container.[R|py] and the format.yaml are stored in ./src/data/streams/[stream_name] where [stream_name] can be aware_mysql for example.","title":"Add New Data Streams"},{"location":"datastreams/add-new-data-streams/#implement-a-container","text":"The container script of a data stream can be implemented in R (strongly recommended) or python. This script must have two functions if you are implementing a stream for phone data or one function otherwise. The script can contain other auxiliary functions. First of all, add any parameters your script might need in config.yaml under (device)_DATA_STREAMS . These parameters will be available in the stream_parameters argument of the one or two functions you implement. For example, if you are adding support for Beiwe data stored in PostgreSQL and your container needs a set of credentials to connect to a database, your new data stream configuration would be: PHONE_DATA_STREAMS : USE : aware_python # AVAILABLE: aware_mysql : DATABASE_GROUP : MY_GROUP beiwe_postgresql : DATABASE_GROUP : MY_GROUP # users define this group (user, password, host, etc.) in credentials.yaml Then implement one or both of the following functions: pull_data This function returns the data columns for a specific sensor and participant. It has the following parameters: Param Description stream_parameters Any parameters (keys/values) set by the user in any [DEVICE_DATA_STREAMS][stream_name] key of config.yaml . For example, [DATABASE_GROUP] inside [FITBIT_DATA_STREAMS][fitbitjson_mysql] sensor_container The value set by the user in any [DEVICE_SENSOR][CONTAINER] key of config.yaml . It can be a table, file path, or whatever data source you want to support that contains the data from a single sensor for all participants . For example, [PHONE_ACCELEROMETER][CONTAINER] device The device id that you need to get the data for (this is set by the user in the participant files ). For example, in AWARE this device id is a uuid columns A list of the columns that you need to get from sensor_container . You specify these columns in your stream\u2019s format.yaml Example This is the pull_data function we implemented for aware_mysql . Note that we can message , warn or stop the user during execution. pull_data <- function ( stream_parameters , device , sensor_container , columns ){ # get_db_engine is an auxiliary function not shown here for brevity bu can be found in src/data/streams/aware_mysql/container.R dbEngine <- get_db_engine ( stream_parameters $ DATABASE_GROUP ) query <- paste0 ( \"SELECT \" , paste ( columns , collapse = \",\" ), \" FROM \" , sensor_container , \" WHERE device_id = '\" , device , \"'\" ) # Letting the user know what we are doing message ( paste0 ( \"Executing the following query to download data: \" , query )) sensor_data <- dbGetQuery ( dbEngine , query ) dbDisconnect ( dbEngine ) if ( nrow ( sensor_data ) == 0 ) warning ( paste ( \"The device '\" , device , \"' did not have data in \" , sensor_container )) return ( sensor_data ) } infer_device_os Warning This function is only necessary for phone data streams. RAPIDS allows users to use the keyword infer (previously multiple ) to automatically infer the mobile Operative System a phone was running. If you have a way to infer the OS of a device id, implement this function. For example, for AWARE data we use the aware_device table. If you don\u2019t have a way to infer the OS, call stop(\"Error Message\") so other users know they can\u2019t use infer or the inference failed, and they have to assign the OS manually in the participant file. This function returns the operative system ( android or ios ) for a specific phone device id. It has the following parameters: Param Description stream_parameters Any parameters (keys/values) set by the user in any [DEVICE_DATA_STREAMS][stream_name] key of config.yaml . For example, [DATABASE_GROUP] inside [FITBIT_DATA_STREAMS][fitbitjson_mysql] device The device id that you need to infer the OS for (this is set by the user in the participant files ). For example, in AWARE this device id is a uuid Example This is the infer_device_os function we implemented for aware_mysql . Note that we can message , warn or stop the user during execution. infer_device_os <- function ( stream_parameters , device ){ # get_db_engine is an auxiliary function not shown here for brevity bu can be found in src/data/streams/aware_mysql/container.R group <- stream_parameters $ DATABASE_GROUP dbEngine <- dbConnect ( MariaDB (), default.file = \"./.env\" , group = group ) query <- paste0 ( \"SELECT device_id,brand FROM aware_device WHERE device_id = '\" , device , \"'\" ) message ( paste0 ( \"Executing the following query to infer phone OS: \" , query )) os <- dbGetQuery ( dbEngine , query ) dbDisconnect ( dbEngine ) if ( nrow ( os ) > 0 ) return ( os %>% mutate ( os = ifelse ( brand == \"iPhone\" , \"ios\" , \"android\" )) %>% pull ( os )) else stop ( paste ( \"We cannot infer the OS of the following device id because it does not exist in the aware_device table:\" , device )) return ( os ) }","title":"Implement a Container"},{"location":"datastreams/add-new-data-streams/#implement-a-format","text":"A format file format.yaml describes the mapping between your stream\u2019s raw data and the data that RAPIDS needs. This file has a section per sensor (e.g. PHONE_ACCELEROMETER ), and each section has two attributes (keys): RAPIDS_COLUMN_MAPPINGS are mappings between the columns RAPIDS needs and the columns your raw data already has. The reserved keyword FLAG_TO_MUTATE flags columns that RAPIDS requires but that are not initially present in your container (database, CSV file). These columns have to be created by your mutation scripts. MUTATION . Sometimes your raw data needs to be transformed to match the format RAPIDS can handle (including creating columns marked as FLAG_TO_MUTATE ) COLUMN_MAPPINGS are mappings between the columns a mutation SCRIPT needs and the columns your raw data has. SCRIPTS are a collection of R or Python scripts that transform one or more raw data columns into the format RAPIDS needs. Hint [RAPIDS_COLUMN_MAPPINGS] and [MUTATE][COLUMN_MAPPINGS] have a key (left-hand side string) and a value (right-hand side string). The values are the names used to pulled columns from a container (e.g., columns in a database table). All values are renamed to their keys in lower case. The renamed columns are sent to every mutation script within the data argument, and the final output is the input RAPIDS process further. For example, let\u2019s assume we are implementing beiwe_mysql and defining the following format for PHONE_FAKESENSOR : PHONE_FAKESENSOR : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID MAGNITUDE_SQUARED : FLAG_TO_MUTATE MUTATE : COLUMN_MAPPINGS : MAGNITUDE : beiwe_value SCRIPTS : - src/data/streams/mutations/phone/square_magnitude.py RAPIDS will: Download beiwe_timestamp , beiwe_deviceID , and beiwe_value from the container of beiwe_mysql (MySQL DB) Rename these columns to timestamp , device_id , and magnitude , respectively. Execute square_magnitude.py with a data frame as an argument containing the renamed columns. This script will square magnitude and rename it to magnitude_squared Verify the data frame returned by square_magnitude.py has the columns RAPIDS needs timestamp , device_id , and magnitude_squared . Use this data frame as the input to be processed in the pipeline. Note that although RAPIDS_COLUMN_MAPPINGS and [MUTATE][COLUMN_MAPPINGS] keys are in capital letters for readability (e.g. MAGNITUDE_SQUARED ), the names of the final columns you mutate in your scripts should be lower case. Let\u2019s explain in more depth this column mapping with examples.","title":"Implement a Format"},{"location":"datastreams/add-new-data-streams/#name-mapping","text":"The mapping for some sensors is straightforward. For example, accelerometer data most of the time has a timestamp, three axes (x,y,z), and a device id that produced it. AWARE and a different sensing app like Beiwe likely logged accelerometer data in the same way but with different column names. In this case, we only need to match Beiwe data columns to RAPIDS columns one-to-one: PHONE_ACCELEROMETER : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID DOUBLE_VALUES_0 : beiwe_x DOUBLE_VALUES_1 : beiwe_y DOUBLE_VALUES_2 : beiwe_z MUTATE : COLUMN_MAPPINGS : SCRIPTS : # it's ok if this is empty","title":"Name mapping"},{"location":"datastreams/add-new-data-streams/#value-mapping","text":"For some sensors, we need to map column names and values. For example, screen data has ON and OFF events; let\u2019s suppose Beiwe represents an ON event with the number 1, but RAPIDS identifies ON events with the number 2 . In this case, we need to mutate the raw data coming from Beiwe and replace all 1 s with 2 s. We do this by listing one or more R or Python scripts in MUTATION_SCRIPTS that will be executed in order. We usually store all mutation scripts under src/data/streams/mutations/[device]/[platform]/ and they can be reused across data streams. PHONE_SCREEN : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID EVENT : beiwe_event MUTATE : COLUMN_MAPPINGS : SCRIPTS : - src/data/streams/mutations/phone/beiwe/beiwe_screen_map.py Hint A MUTATION_SCRIPT can also be used to clean/preprocess your data before extracting behavioral features. A mutation script has to have a main function that receives two arguments, data and stream_parameters . The stream_parameters argument contains the config.yaml key/values of your data stream (this is the same argument that your container.[py|R] script receives, see Implement a Container ). python Example of a python mutation script import pandas as pd def main ( data , stream_parameters ): # mutate data return ( data ) R Example of a R mutation script source ( \"renv/activate.R\" ) # needed to use RAPIDS renv environment library ( dplyr ) main <- function ( data , stream_parameters ){ # mutate data return ( data ) }","title":"Value mapping"},{"location":"datastreams/add-new-data-streams/#complex-mapping","text":"Sometimes, your raw data doesn\u2019t even have the same columns RAPIDS expects for a sensor. For example, let\u2019s pretend Beiwe stores PHONE_ACCELEROMETER axis data in a single column called acc_col instead of three. You have to create a MUTATION_SCRIPT to split acc_col into three columns x , y , and z . For this, you mark the three axes columns RAPIDS needs in [RAPIDS_COLUMN_MAPPINGS] with the word FLAG_TO_MUTATE , map acc_col in [MUTATION][COLUMN_MAPPINGS] , and list a Python script under [MUTATION][SCRIPTS] with the code to split acc_col . See an example below. RAPIDS expects that every column mapped as FLAG_TO_MUTATE will be generated by your mutation script, so it won\u2019t try to retrieve them from your container (database, CSV file, etc.). In our example, acc_col will be fetched from the stream\u2019s container and renamed to JOINED_AXES because beiwe_split_acc.py will split it into double_values_0 , double_values_1 , and double_values_2 . PHONE_ACCELEROMETER : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID DOUBLE_VALUES_0 : FLAG_TO_MUTATE DOUBLE_VALUES_1 : FLAG_TO_MUTATE DOUBLE_VALUES_2 : FLAG_TO_MUTATE MUTATE : COLUMN_MAPPINGS : JOINED_AXES : acc_col SCRIPTS : - src/data/streams/mutations/phone/beiwe/beiwe_split_acc.py This is a draft of beiwe_split_acc.py MUTATION_SCRIPT : import pandas as pd def main ( data , stream_parameters ): # data has the acc_col # split acc_col into three columns: double_values_0, double_values_1, double_values_2 to match RAPIDS format # remove acc_col since we don't need it anymore return ( data )","title":"Complex mapping"},{"location":"datastreams/add-new-data-streams/#os-complex-mapping","text":"There is a special case for a complex mapping scenario for smartphone data streams. The Android and iOS sensor APIs return data in different formats for certain sensors (like screen, activity recognition, battery, among others). In case you didn\u2019t notice, the examples we have used so far are grouped under an ANDROID key, which means they will be applied to data collected by Android phones. Additionally, each sensor has an IOS key for a similar purpose. We use the complex mapping described above to transform iOS data into an Android format (it\u2019s always iOS to Android and any new phone data stream must do the same). For example, this is the format.yaml key for PHONE_ACTVITY_RECOGNITION . Note that the ANDROID mapping is simple (one-to-one) but the IOS mapping is complex with three FLAG_TO_MUTATE columns, two [MUTATE][COLUMN_MAPPINGS] mappings, and one [MUTATION][SCRIPT] . PHONE_ACTIVITY_RECOGNITION : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : timestamp DEVICE_ID : device_id ACTIVITY_TYPE : activity_type ACTIVITY_NAME : activity_name CONFIDENCE : confidence MUTATION : COLUMN_MAPPINGS : SCRIPTS : IOS : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : timestamp DEVICE_ID : device_id ACTIVITY_TYPE : FLAG_TO_MUTATE ACTIVITY_NAME : FLAG_TO_MUTATE CONFIDENCE : FLAG_TO_MUTATE MUTATION : COLUMN_MAPPINGS : ACTIVITIES : activities CONFIDENCE : confidence SCRIPTS : - \"src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R\" Example activity_recogniton_ios_unification.R In this MUTATION_SCRIPT we create ACTIVITY_NAME and ACTIVITY_TYPE based on activities , and map confidence iOS values to Android values. source ( \"renv/activate.R\" ) library ( \"dplyr\" , warn.conflicts = F ) library ( stringr ) clean_ios_activity_column <- function ( ios_gar ){ ios_gar <- ios_gar %>% mutate ( activities = str_replace_all ( activities , pattern = '(\"|\\\\[|\\\\])' , replacement = \"\" )) existent_multiple_activities <- ios_gar %>% filter ( str_detect ( activities , \",\" )) %>% group_by ( activities ) %>% summarise ( mutiple_activities = unique ( activities ), .groups = \"drop_last\" ) %>% pull ( mutiple_activities ) known_multiple_activities <- c ( \"stationary,automotive\" ) unkown_multiple_actvities <- setdiff ( existent_multiple_activities , known_multiple_activities ) if ( length ( unkown_multiple_actvities ) > 0 ){ stop ( paste0 ( \"There are unkwown combinations of ios activities, you need to implement the decision of the ones to keep: \" , unkown_multiple_actvities )) } ios_gar <- ios_gar %>% mutate ( activities = str_replace_all ( activities , pattern = \"stationary,automotive\" , replacement = \"automotive\" )) return ( ios_gar ) } unify_ios_activity_recognition <- function ( ios_gar ){ # We only need to unify Google Activity Recognition data for iOS # discard rows where activities column is blank ios_gar <- ios_gar [ - which ( ios_gar $ activities == \"\" ), ] # clean \"activities\" column of ios_gar ios_gar <- clean_ios_activity_column ( ios_gar ) # make it compatible with android version: generate \"activity_name\" and \"activity_type\" columns ios_gar <- ios_gar %>% mutate ( activity_name = case_when ( activities == \"automotive\" ~ \"in_vehicle\" , activities == \"cycling\" ~ \"on_bicycle\" , activities == \"walking\" ~ \"walking\" , activities == \"running\" ~ \"running\" , activities == \"stationary\" ~ \"still\" ), activity_type = case_when ( activities == \"automotive\" ~ 0 , activities == \"cycling\" ~ 1 , activities == \"walking\" ~ 7 , activities == \"running\" ~ 8 , activities == \"stationary\" ~ 3 , activities == \"unknown\" ~ 4 ), confidence = case_when ( confidence == 0 ~ 0 , confidence == 1 ~ 50 , confidence == 2 ~ 100 ) ) %>% select ( - activities ) return ( ios_gar ) } main <- function ( data , stream_parameters ){ return ( unify_ios_activity_recognition ( data , stream_parameters )) }","title":"OS complex mapping"},{"location":"datastreams/aware-csv/","text":"aware_csv \u00b6 This data stream handles iOS and Android sensor data collected with the AWARE Framework and stored in CSV files. Warning The CSV files have to use , as separator, \\ as escape character (do not escape \" with \"\" ), and wrap any string columns with \" . See examples in the CSV files inside rapids_example_csv.zip Example of a valid CSV file \"_id\",\"timestamp\",\"device_id\",\"activities\",\"confidence\",\"stationary\",\"walking\",\"running\",\"automotive\",\"cycling\",\"unknown\",\"label\" 1,1587528000000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"\" 2,1587528060000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 3,1587528120000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 4,1587528180000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 5,1587528240000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 6,1587528300000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 7,1587528360000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" Container \u00b6 A CSV file per sensor, each containing the data for all participants. The script to connect and download data from this container is at: src/data/streams/aware_csv/container.R Format \u00b6 If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"aware_csv"},{"location":"datastreams/aware-csv/#aware_csv","text":"This data stream handles iOS and Android sensor data collected with the AWARE Framework and stored in CSV files. Warning The CSV files have to use , as separator, \\ as escape character (do not escape \" with \"\" ), and wrap any string columns with \" . See examples in the CSV files inside rapids_example_csv.zip Example of a valid CSV file \"_id\",\"timestamp\",\"device_id\",\"activities\",\"confidence\",\"stationary\",\"walking\",\"running\",\"automotive\",\"cycling\",\"unknown\",\"label\" 1,1587528000000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"\" 2,1587528060000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 3,1587528120000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 4,1587528180000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 5,1587528240000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 6,1587528300000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 7,1587528360000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\"","title":"aware_csv"},{"location":"datastreams/aware-csv/#container","text":"A CSV file per sensor, each containing the data for all participants. The script to connect and download data from this container is at: src/data/streams/aware_csv/container.R","title":"Container"},{"location":"datastreams/aware-csv/#format","text":"If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"Format"},{"location":"datastreams/aware-influxdb/","text":"aware_influxdb (beta) \u00b6 Warning This data stream is being released in beta while we test it thoroughly. This data stream handles iOS and Android sensor data collected with the AWARE Framework and stored in an InfluxDB database. Container \u00b6 An InfluxDB database with a table per sensor, each containing the data for all participants. The script to connect and download data from this container is at: src/data/streams/aware_influxdb/container.R Format \u00b6 If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"aware_influxdb (beta)"},{"location":"datastreams/aware-influxdb/#aware_influxdb-beta","text":"Warning This data stream is being released in beta while we test it thoroughly. This data stream handles iOS and Android sensor data collected with the AWARE Framework and stored in an InfluxDB database.","title":"aware_influxdb (beta)"},{"location":"datastreams/aware-influxdb/#container","text":"An InfluxDB database with a table per sensor, each containing the data for all participants. The script to connect and download data from this container is at: src/data/streams/aware_influxdb/container.R","title":"Container"},{"location":"datastreams/aware-influxdb/#format","text":"If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"Format"},{"location":"datastreams/aware-mysql/","text":"aware_mysql \u00b6 This data stream handles iOS and Android sensor data collected with the AWARE Framework and stored in a MySQL database. Container \u00b6 A MySQL database with a table per sensor, each containing the data for all participants. This is the default database created by the old PHP AWARE server (as opposed to the new JavaScript Micro server). The script to connect and download data from this container is at: src/data/streams/aware_mysql/container.R Format \u00b6 If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"aware_mysql"},{"location":"datastreams/aware-mysql/#aware_mysql","text":"This data stream handles iOS and Android sensor data collected with the AWARE Framework and stored in a MySQL database.","title":"aware_mysql"},{"location":"datastreams/aware-mysql/#container","text":"A MySQL database with a table per sensor, each containing the data for all participants. This is the default database created by the old PHP AWARE server (as opposed to the new JavaScript Micro server). The script to connect and download data from this container is at: src/data/streams/aware_mysql/container.R","title":"Container"},{"location":"datastreams/aware-mysql/#format","text":"If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"Format"},{"location":"datastreams/data-streams-introduction/","text":"Data Streams Introduction \u00b6 A data stream is a set of sensor data collected using a specific type of device with a specific format and stored in a specific container . For example, the aware_mysql data stream handles smartphone data ( device ) collected with the AWARE Framework ( format ) stored in a MySQL database ( container ). Similarly, smartphone data collected with Beiwe will have a different format and could be stored in a container like a PostgreSQL database or a CSV file. If you want to process a data stream using RAPIDS, make sure that your data is stored in a supported format and container (see table below). If RAPIDS doesn\u2019t support your data stream yet (e.g. Beiwe data stored in PostgreSQL, or AWARE data stored in SQLite), you can always implement a new data stream . If it\u2019s something you think other people might be interested on, we will be happy to include your new data stream in RAPIDS, so get in touch!. Hint Currently, you can add new data streams for smartphones, Fitbit, and Empatica devices. If you need RAPIDS to process data from other devices , like Oura Rings or Actigraph wearables, get in touch. It is a more complicated process that could take a couple of days to implement for someone familiar with R or Python, but we would be happy to work on it together. For reference, these are the data streams we currently support: Data Stream Device Format Container Docs aware_mysql Phone AWARE app MySQL link aware_csv Phone AWARE app CSV files link aware_influxdb (beta) Phone AWARE app InfluxDB link fitbitjson_mysql Fitbit JSON (per Fitbit\u2019s API ) MySQL link fitbitjson_csv Fitbit JSON (per Fitbit\u2019s API ) CSV files link fitbitparsed_mysql Fitbit Parsed (parsed API data) MySQL link fitbitparsed_csv Fitbit Parsed (parsed API data) CSV files link empatica_zip Empatica E4 Connect ZIP files link","title":"Introduction"},{"location":"datastreams/data-streams-introduction/#data-streams-introduction","text":"A data stream is a set of sensor data collected using a specific type of device with a specific format and stored in a specific container . For example, the aware_mysql data stream handles smartphone data ( device ) collected with the AWARE Framework ( format ) stored in a MySQL database ( container ). Similarly, smartphone data collected with Beiwe will have a different format and could be stored in a container like a PostgreSQL database or a CSV file. If you want to process a data stream using RAPIDS, make sure that your data is stored in a supported format and container (see table below). If RAPIDS doesn\u2019t support your data stream yet (e.g. Beiwe data stored in PostgreSQL, or AWARE data stored in SQLite), you can always implement a new data stream . If it\u2019s something you think other people might be interested on, we will be happy to include your new data stream in RAPIDS, so get in touch!. Hint Currently, you can add new data streams for smartphones, Fitbit, and Empatica devices. If you need RAPIDS to process data from other devices , like Oura Rings or Actigraph wearables, get in touch. It is a more complicated process that could take a couple of days to implement for someone familiar with R or Python, but we would be happy to work on it together. For reference, these are the data streams we currently support: Data Stream Device Format Container Docs aware_mysql Phone AWARE app MySQL link aware_csv Phone AWARE app CSV files link aware_influxdb (beta) Phone AWARE app InfluxDB link fitbitjson_mysql Fitbit JSON (per Fitbit\u2019s API ) MySQL link fitbitjson_csv Fitbit JSON (per Fitbit\u2019s API ) CSV files link fitbitparsed_mysql Fitbit Parsed (parsed API data) MySQL link fitbitparsed_csv Fitbit Parsed (parsed API data) CSV files link empatica_zip Empatica E4 Connect ZIP files link","title":"Data Streams Introduction"},{"location":"datastreams/empatica-zip/","text":"empatica_zip \u00b6 This data stream handles Empatica sensor data downloaded as zip files using the E4 Connect . Container \u00b6 You need to create a subfolder for every participant named after their device id inside the folder specified by [EMPATICA_DATA_STREAMS][empatica_zipfiles][FOLDER] . You can add one or more Empatica zip files to any subfolder. The script to connect and download data from this container is at: src/data/streams/empatica_zip/container.R Format \u00b6 The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Empatica sensors . This file is at: src/data/streams/empatica_zip/format.yaml All columns are mutated from the raw data in the zip files so you don\u2019t need to modify any column mappings. EMPATICA_ACCELEROMETER RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_HEARTRATE RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id HEARTRATE heartrate MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_TEMPERATURE RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id TEMPERATURE temperature MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_ELECTRODERMAL_ACTIVITY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ELECTRODERMAL_ACTIVITY electrodermal_activity MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_BLOOD_VOLUME_PULSE RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BLOOD_VOLUME_PULSE blood_volume_pulse MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_INTER_BEAT_INTERVAL RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id INTER_BEAT_INTERVAL inter_beat_interval MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_EMPATICA_TAGS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id TAGS tags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None)","title":"empatica_zip"},{"location":"datastreams/empatica-zip/#empatica_zip","text":"This data stream handles Empatica sensor data downloaded as zip files using the E4 Connect .","title":"empatica_zip"},{"location":"datastreams/empatica-zip/#container","text":"You need to create a subfolder for every participant named after their device id inside the folder specified by [EMPATICA_DATA_STREAMS][empatica_zipfiles][FOLDER] . You can add one or more Empatica zip files to any subfolder. The script to connect and download data from this container is at: src/data/streams/empatica_zip/container.R","title":"Container"},{"location":"datastreams/empatica-zip/#format","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Empatica sensors . This file is at: src/data/streams/empatica_zip/format.yaml All columns are mutated from the raw data in the zip files so you don\u2019t need to modify any column mappings. EMPATICA_ACCELEROMETER RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_HEARTRATE RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id HEARTRATE heartrate MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_TEMPERATURE RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id TEMPERATURE temperature MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_ELECTRODERMAL_ACTIVITY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ELECTRODERMAL_ACTIVITY electrodermal_activity MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_BLOOD_VOLUME_PULSE RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BLOOD_VOLUME_PULSE blood_volume_pulse MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_INTER_BEAT_INTERVAL RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id INTER_BEAT_INTERVAL inter_beat_interval MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_EMPATICA_TAGS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id TAGS tags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None)","title":"Format"},{"location":"datastreams/fitbitjson-csv/","text":"fitbitjson_csv \u00b6 This data stream handles Fitbit sensor data downloaded using the Fitbit Web API and stored in a CSV file. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your sensor data in a CSV file, RAPIDS can process it. Warning The CSV files have to use , as separator, \\ as escape character (do not escape \" with \"\" ), and wrap any string columns with \" . Example of a valid CSV file \"timestamp\",\"device_id\",\"label\",\"fitbit_id\",\"fitbit_data_type\",\"fitbit_data\" 1587614400000,\"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",\"5S\",\"5ZKN9B\",\"steps\",\"{\\\"activities-steps\\\":[{\\\"dateTime\\\":\\\"2020-04-23\\\",\\\"value\\\":\\\"7881\\\"}]\" Container \u00b6 The container should be a CSV file per Fitbit sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitjson_csv/container.R Format \u00b6 The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitjson_csv/format.yaml If you want RAPIDS to process Fitbit sensor data using this stream, you will need to map DEVICE_ID and JSON_FITBIT_COLUMN to your own raw data columns inside each sensor section in format.yaml . FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESOUTOFRANGE FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESFATBURN FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESCARDIO FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESPEAK FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE FLAG_TO_MUTATE HEARTRATE_ZONE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE LOCAL_START_DATE_TIME FLAG_TO_MUTATE LOCAL_END_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id EFFICIENCY FLAG_TO_MUTATE MINUTES_AFTER_WAKEUP FLAG_TO_MUTATE MINUTES_ASLEEP FLAG_TO_MUTATE MINUTES_AWAKE FLAG_TO_MUTATE MINUTES_TO_FALL_ASLEEP FLAG_TO_MUTATE MINUTES_IN_BED FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_summary_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id TYPE_EPISODE_ID FLAG_TO_MUTATE DURATION FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE LEVEL FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2, we support both. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_summary_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_intraday_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API . See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}}","title":"fitbitjson_csv"},{"location":"datastreams/fitbitjson-csv/#fitbitjson_csv","text":"This data stream handles Fitbit sensor data downloaded using the Fitbit Web API and stored in a CSV file. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your sensor data in a CSV file, RAPIDS can process it. Warning The CSV files have to use , as separator, \\ as escape character (do not escape \" with \"\" ), and wrap any string columns with \" . Example of a valid CSV file \"timestamp\",\"device_id\",\"label\",\"fitbit_id\",\"fitbit_data_type\",\"fitbit_data\" 1587614400000,\"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",\"5S\",\"5ZKN9B\",\"steps\",\"{\\\"activities-steps\\\":[{\\\"dateTime\\\":\\\"2020-04-23\\\",\\\"value\\\":\\\"7881\\\"}]\"","title":"fitbitjson_csv"},{"location":"datastreams/fitbitjson-csv/#container","text":"The container should be a CSV file per Fitbit sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitjson_csv/container.R","title":"Container"},{"location":"datastreams/fitbitjson-csv/#format","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitjson_csv/format.yaml If you want RAPIDS to process Fitbit sensor data using this stream, you will need to map DEVICE_ID and JSON_FITBIT_COLUMN to your own raw data columns inside each sensor section in format.yaml . FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESOUTOFRANGE FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESFATBURN FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESCARDIO FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESPEAK FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE FLAG_TO_MUTATE HEARTRATE_ZONE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE LOCAL_START_DATE_TIME FLAG_TO_MUTATE LOCAL_END_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id EFFICIENCY FLAG_TO_MUTATE MINUTES_AFTER_WAKEUP FLAG_TO_MUTATE MINUTES_ASLEEP FLAG_TO_MUTATE MINUTES_AWAKE FLAG_TO_MUTATE MINUTES_TO_FALL_ASLEEP FLAG_TO_MUTATE MINUTES_IN_BED FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_summary_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id TYPE_EPISODE_ID FLAG_TO_MUTATE DURATION FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE LEVEL FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2, we support both. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_summary_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_intraday_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API . See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}}","title":"Format"},{"location":"datastreams/fitbitjson-mysql/","text":"fitbitjson_mysql \u00b6 This data stream handles Fitbit sensor data downloaded using the Fitbit Web API and stored in a MySQL database. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your sensor data in a MySQL database, RAPIDS can process it. Container \u00b6 The container should be a MySQL database with a table per sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitjson_mysql/container.R Format \u00b6 The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitjson_csv/format.yaml If you want RAPIDS to process Fitbit sensor data using this stream, you will need to map DEVICE_ID and JSON_FITBIT_COLUMN to your own raw data columns inside each sensor section in format.yaml . FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESOUTOFRANGE FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESFATBURN FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESCARDIO FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESPEAK FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE FLAG_TO_MUTATE HEARTRATE_ZONE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE LOCAL_START_DATE_TIME FLAG_TO_MUTATE LOCAL_END_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id EFFICIENCY FLAG_TO_MUTATE MINUTES_AFTER_WAKEUP FLAG_TO_MUTATE MINUTES_ASLEEP FLAG_TO_MUTATE MINUTES_AWAKE FLAG_TO_MUTATE MINUTES_TO_FALL_ASLEEP FLAG_TO_MUTATE MINUTES_IN_BED FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_summary_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id TYPE_EPISODE_ID FLAG_TO_MUTATE DURATION FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE LEVEL FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2, we support both. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_summary_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_intraday_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API . See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}}","title":"fitbitjson_mysql"},{"location":"datastreams/fitbitjson-mysql/#fitbitjson_mysql","text":"This data stream handles Fitbit sensor data downloaded using the Fitbit Web API and stored in a MySQL database. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your sensor data in a MySQL database, RAPIDS can process it.","title":"fitbitjson_mysql"},{"location":"datastreams/fitbitjson-mysql/#container","text":"The container should be a MySQL database with a table per sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitjson_mysql/container.R","title":"Container"},{"location":"datastreams/fitbitjson-mysql/#format","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitjson_csv/format.yaml If you want RAPIDS to process Fitbit sensor data using this stream, you will need to map DEVICE_ID and JSON_FITBIT_COLUMN to your own raw data columns inside each sensor section in format.yaml . FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESOUTOFRANGE FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESFATBURN FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESCARDIO FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESPEAK FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE FLAG_TO_MUTATE HEARTRATE_ZONE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE LOCAL_START_DATE_TIME FLAG_TO_MUTATE LOCAL_END_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id EFFICIENCY FLAG_TO_MUTATE MINUTES_AFTER_WAKEUP FLAG_TO_MUTATE MINUTES_ASLEEP FLAG_TO_MUTATE MINUTES_AWAKE FLAG_TO_MUTATE MINUTES_TO_FALL_ASLEEP FLAG_TO_MUTATE MINUTES_IN_BED FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_summary_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id TYPE_EPISODE_ID FLAG_TO_MUTATE DURATION FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE LEVEL FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2, we support both. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_summary_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_intraday_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API . See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}}","title":"Format"},{"location":"datastreams/fitbitparsed-csv/","text":"fitbitparsed_csv \u00b6 This data stream handles Fitbit sensor data downloaded using the Fitbit Web API , parsed , and stored in a CSV file. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your parsed sensor data in a CSV file, RAPIDS can process it. What is the difference between JSON and plain data streams Most people will only need fitbitjson_* because they downloaded and stored their data directly from Fitbit\u2019s API. However, if, for some reason, you don\u2019t have access to that JSON data and instead only have the parsed data (columns and rows), you can use this data stream. Warning The CSV files have to use , as separator, \\ as escape character (do not escape \" with \"\" ), and wrap any string columns with \" . Example of a valid CSV file \"device_id\",\"heartrate\",\"heartrate_zone\",\"local_date_time\",\"timestamp\" \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",69,\"outofrange\",\"2020-04-23 00:00:00\",0 \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",69,\"outofrange\",\"2020-04-23 00:01:00\",0 \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",67,\"outofrange\",\"2020-04-23 00:02:00\",0 \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",69,\"outofrange\",\"2020-04-23 00:03:00\",0 Container \u00b6 The container should be a CSV file per sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitparsed_csv/container.R Format \u00b6 The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitparsed_mysql/format.yaml If you want to use this stream with your data, modify every sensor in format.yaml to map all columns except TIMESTAMP in [RAPIDS_COLUMN_MAPPINGS] to your raw data column names. All columns are mandatory; however, all except device_id and local_date_time can be empty if you don\u2019t have that data. Just have in mind that some features will be empty if some of these columns are empty. FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR heartrate_daily_restinghr HEARTRATE_DAILY_CALORIESOUTOFRANGE heartrate_daily_caloriesoutofrange HEARTRATE_DAILY_CALORIESFATBURN heartrate_daily_caloriesfatburn HEARTRATE_DAILY_CALORIESCARDIO heartrate_daily_caloriescardio HEARTRATE_DAILY_CALORIESPEAK heartrate_daily_caloriespeak MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate_daily_restinghr heartrate_daily_caloriesoutofrange heartrate_daily_caloriesfatburn heartrate_daily_caloriescardio heartrate_daily_caloriespeak a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 72 1200.6102 760.3020 15.2048 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 70 1100.1120 660.0012 23.7088 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 69 750.3615 734.1516 131.8579 0 FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE heartrate HEARTRATE_ZONE heartrate_zone MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate heartrate_zone a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 68 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 67 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 67 outofrange FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time LOCAL_START_DATE_TIME local_start_date_time LOCAL_END_DATE_TIME local_end_date_time DEVICE_ID device_id EFFICIENCY efficiency MINUTES_AFTER_WAKEUP minutes_after_wakeup MINUTES_ASLEEP minutes_asleep MINUTES_AWAKE minutes_awake MINUTES_TO_FALL_ASLEEP minutes_to_fall_asleep MINUTES_IN_BED minutes_in_bed IS_MAIN_SLEEP is_main_sleep TYPE type MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. Example of the expected raw data device_id local_start_date_time local_end_date_time efficiency minutes_after_wakeup minutes_asleep minutes_awake minutes_to_fall_asleep minutes_in_bed is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 15:36:30 2020-10-10 16:37:00 92 0 55 5 0 60 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 01:46:30 2020-10-10 08:10:00 88 0 318 65 0 383 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-11 00:12:30 2020-10-11 11:47:00 89 1 562 132 0 694 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-12 01:31:00 2020-10-12 09:34:30 93 0 415 68 0 483 1 stages FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id TYPE_EPISODE_ID type_episode_id DURATION duration IS_MAIN_SLEEP is_main_sleep TYPE type LEVEL level MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2, we support both. Example of the expected raw data device_id type_episode_id local_date_time duration level is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:36:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:37:30 660 asleep 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:48:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u2026 \u2026 \u2026 \u2026 \u2026 \u2026 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:46:30 420 light 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:53:30 1230 deep 1 stages FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 1775 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 3201 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 998 FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 5 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 3 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 0","title":"fitbitparsed_csv"},{"location":"datastreams/fitbitparsed-csv/#fitbitparsed_csv","text":"This data stream handles Fitbit sensor data downloaded using the Fitbit Web API , parsed , and stored in a CSV file. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your parsed sensor data in a CSV file, RAPIDS can process it. What is the difference between JSON and plain data streams Most people will only need fitbitjson_* because they downloaded and stored their data directly from Fitbit\u2019s API. However, if, for some reason, you don\u2019t have access to that JSON data and instead only have the parsed data (columns and rows), you can use this data stream. Warning The CSV files have to use , as separator, \\ as escape character (do not escape \" with \"\" ), and wrap any string columns with \" . Example of a valid CSV file \"device_id\",\"heartrate\",\"heartrate_zone\",\"local_date_time\",\"timestamp\" \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",69,\"outofrange\",\"2020-04-23 00:00:00\",0 \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",69,\"outofrange\",\"2020-04-23 00:01:00\",0 \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",67,\"outofrange\",\"2020-04-23 00:02:00\",0 \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",69,\"outofrange\",\"2020-04-23 00:03:00\",0","title":"fitbitparsed_csv"},{"location":"datastreams/fitbitparsed-csv/#container","text":"The container should be a CSV file per sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitparsed_csv/container.R","title":"Container"},{"location":"datastreams/fitbitparsed-csv/#format","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitparsed_mysql/format.yaml If you want to use this stream with your data, modify every sensor in format.yaml to map all columns except TIMESTAMP in [RAPIDS_COLUMN_MAPPINGS] to your raw data column names. All columns are mandatory; however, all except device_id and local_date_time can be empty if you don\u2019t have that data. Just have in mind that some features will be empty if some of these columns are empty. FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR heartrate_daily_restinghr HEARTRATE_DAILY_CALORIESOUTOFRANGE heartrate_daily_caloriesoutofrange HEARTRATE_DAILY_CALORIESFATBURN heartrate_daily_caloriesfatburn HEARTRATE_DAILY_CALORIESCARDIO heartrate_daily_caloriescardio HEARTRATE_DAILY_CALORIESPEAK heartrate_daily_caloriespeak MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate_daily_restinghr heartrate_daily_caloriesoutofrange heartrate_daily_caloriesfatburn heartrate_daily_caloriescardio heartrate_daily_caloriespeak a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 72 1200.6102 760.3020 15.2048 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 70 1100.1120 660.0012 23.7088 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 69 750.3615 734.1516 131.8579 0 FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE heartrate HEARTRATE_ZONE heartrate_zone MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate heartrate_zone a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 68 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 67 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 67 outofrange FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time LOCAL_START_DATE_TIME local_start_date_time LOCAL_END_DATE_TIME local_end_date_time DEVICE_ID device_id EFFICIENCY efficiency MINUTES_AFTER_WAKEUP minutes_after_wakeup MINUTES_ASLEEP minutes_asleep MINUTES_AWAKE minutes_awake MINUTES_TO_FALL_ASLEEP minutes_to_fall_asleep MINUTES_IN_BED minutes_in_bed IS_MAIN_SLEEP is_main_sleep TYPE type MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. Example of the expected raw data device_id local_start_date_time local_end_date_time efficiency minutes_after_wakeup minutes_asleep minutes_awake minutes_to_fall_asleep minutes_in_bed is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 15:36:30 2020-10-10 16:37:00 92 0 55 5 0 60 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 01:46:30 2020-10-10 08:10:00 88 0 318 65 0 383 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-11 00:12:30 2020-10-11 11:47:00 89 1 562 132 0 694 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-12 01:31:00 2020-10-12 09:34:30 93 0 415 68 0 483 1 stages FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id TYPE_EPISODE_ID type_episode_id DURATION duration IS_MAIN_SLEEP is_main_sleep TYPE type LEVEL level MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2, we support both. Example of the expected raw data device_id type_episode_id local_date_time duration level is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:36:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:37:30 660 asleep 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:48:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u2026 \u2026 \u2026 \u2026 \u2026 \u2026 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:46:30 420 light 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:53:30 1230 deep 1 stages FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 1775 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 3201 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 998 FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 5 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 3 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 0","title":"Format"},{"location":"datastreams/fitbitparsed-mysql/","text":"fitbitparsed_mysql \u00b6 This data stream handles Fitbit sensor data downloaded using the Fitbit Web API , parsed , and stored in a MySQL database. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your parsed sensor data in a MySQL database, RAPIDS can process it. What is the difference between JSON and plain data streams Most people will only need fitbitjson_* because they downloaded and stored their data directly from Fitbit\u2019s API. However, if, for some reason, you don\u2019t have access to that JSON data and instead only have the parsed data (columns and rows), you can use this data stream. Container \u00b6 The container should be a MySQL database with a table per sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitparsed_mysql/container.R Format \u00b6 The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitparsed_mysql/format.yaml If you want to use this stream with your data, modify every sensor in format.yaml to map all columns except TIMESTAMP in [RAPIDS_COLUMN_MAPPINGS] to your raw data column names. All columns are mandatory; however, all except device_id and local_date_time can be empty if you don\u2019t have that data. Just have in mind that some features will be empty if some of these columns are empty. FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR heartrate_daily_restinghr HEARTRATE_DAILY_CALORIESOUTOFRANGE heartrate_daily_caloriesoutofrange HEARTRATE_DAILY_CALORIESFATBURN heartrate_daily_caloriesfatburn HEARTRATE_DAILY_CALORIESCARDIO heartrate_daily_caloriescardio HEARTRATE_DAILY_CALORIESPEAK heartrate_daily_caloriespeak MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate_daily_restinghr heartrate_daily_caloriesoutofrange heartrate_daily_caloriesfatburn heartrate_daily_caloriescardio heartrate_daily_caloriespeak a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 72 1200.6102 760.3020 15.2048 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 70 1100.1120 660.0012 23.7088 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 69 750.3615 734.1516 131.8579 0 FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE heartrate HEARTRATE_ZONE heartrate_zone MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate heartrate_zone a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 68 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 67 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 67 outofrange FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time LOCAL_START_DATE_TIME local_start_date_time LOCAL_END_DATE_TIME local_end_date_time DEVICE_ID device_id EFFICIENCY efficiency MINUTES_AFTER_WAKEUP minutes_after_wakeup MINUTES_ASLEEP minutes_asleep MINUTES_AWAKE minutes_awake MINUTES_TO_FALL_ASLEEP minutes_to_fall_asleep MINUTES_IN_BED minutes_in_bed IS_MAIN_SLEEP is_main_sleep TYPE type MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. Example of the expected raw data device_id local_start_date_time local_end_date_time efficiency minutes_after_wakeup minutes_asleep minutes_awake minutes_to_fall_asleep minutes_in_bed is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 15:36:30 2020-10-10 16:37:00 92 0 55 5 0 60 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 01:46:30 2020-10-10 08:10:00 88 0 318 65 0 383 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-11 00:12:30 2020-10-11 11:47:00 89 1 562 132 0 694 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-12 01:31:00 2020-10-12 09:34:30 93 0 415 68 0 483 1 stages FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id TYPE_EPISODE_ID type_episode_id DURATION duration IS_MAIN_SLEEP is_main_sleep TYPE type LEVEL level MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2, we support both. Example of the expected raw data device_id type_episode_id local_date_time duration level is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:36:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:37:30 660 asleep 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:48:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u2026 \u2026 \u2026 \u2026 \u2026 \u2026 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:46:30 420 light 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:53:30 1230 deep 1 stages FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 1775 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 3201 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 998 FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 5 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 3 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 0","title":"fitbitparsed_mysql"},{"location":"datastreams/fitbitparsed-mysql/#fitbitparsed_mysql","text":"This data stream handles Fitbit sensor data downloaded using the Fitbit Web API , parsed , and stored in a MySQL database. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your parsed sensor data in a MySQL database, RAPIDS can process it. What is the difference between JSON and plain data streams Most people will only need fitbitjson_* because they downloaded and stored their data directly from Fitbit\u2019s API. However, if, for some reason, you don\u2019t have access to that JSON data and instead only have the parsed data (columns and rows), you can use this data stream.","title":"fitbitparsed_mysql"},{"location":"datastreams/fitbitparsed-mysql/#container","text":"The container should be a MySQL database with a table per sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitparsed_mysql/container.R","title":"Container"},{"location":"datastreams/fitbitparsed-mysql/#format","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitparsed_mysql/format.yaml If you want to use this stream with your data, modify every sensor in format.yaml to map all columns except TIMESTAMP in [RAPIDS_COLUMN_MAPPINGS] to your raw data column names. All columns are mandatory; however, all except device_id and local_date_time can be empty if you don\u2019t have that data. Just have in mind that some features will be empty if some of these columns are empty. FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR heartrate_daily_restinghr HEARTRATE_DAILY_CALORIESOUTOFRANGE heartrate_daily_caloriesoutofrange HEARTRATE_DAILY_CALORIESFATBURN heartrate_daily_caloriesfatburn HEARTRATE_DAILY_CALORIESCARDIO heartrate_daily_caloriescardio HEARTRATE_DAILY_CALORIESPEAK heartrate_daily_caloriespeak MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate_daily_restinghr heartrate_daily_caloriesoutofrange heartrate_daily_caloriesfatburn heartrate_daily_caloriescardio heartrate_daily_caloriespeak a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 72 1200.6102 760.3020 15.2048 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 70 1100.1120 660.0012 23.7088 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 69 750.3615 734.1516 131.8579 0 FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE heartrate HEARTRATE_ZONE heartrate_zone MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate heartrate_zone a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 68 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 67 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 67 outofrange FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time LOCAL_START_DATE_TIME local_start_date_time LOCAL_END_DATE_TIME local_end_date_time DEVICE_ID device_id EFFICIENCY efficiency MINUTES_AFTER_WAKEUP minutes_after_wakeup MINUTES_ASLEEP minutes_asleep MINUTES_AWAKE minutes_awake MINUTES_TO_FALL_ASLEEP minutes_to_fall_asleep MINUTES_IN_BED minutes_in_bed IS_MAIN_SLEEP is_main_sleep TYPE type MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. Example of the expected raw data device_id local_start_date_time local_end_date_time efficiency minutes_after_wakeup minutes_asleep minutes_awake minutes_to_fall_asleep minutes_in_bed is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 15:36:30 2020-10-10 16:37:00 92 0 55 5 0 60 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 01:46:30 2020-10-10 08:10:00 88 0 318 65 0 383 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-11 00:12:30 2020-10-11 11:47:00 89 1 562 132 0 694 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-12 01:31:00 2020-10-12 09:34:30 93 0 415 68 0 483 1 stages FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id TYPE_EPISODE_ID type_episode_id DURATION duration IS_MAIN_SLEEP is_main_sleep TYPE type LEVEL level MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2, we support both. Example of the expected raw data device_id type_episode_id local_date_time duration level is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:36:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:37:30 660 asleep 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:48:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u2026 \u2026 \u2026 \u2026 \u2026 \u2026 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:46:30 420 light 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:53:30 1230 deep 1 stages FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 1775 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 3201 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 998 FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 5 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 3 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 0","title":"Format"},{"location":"datastreams/mandatory-empatica-format/","text":"Mandatory Empatica Format \u00b6 This is a description of the format RAPIDS needs to process data for the following Empatica sensors. EMPATICA_ACCELEROMETER RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_VALUES_0 x axis of acceleration DOUBLE_VALUES_1 y axis of acceleration DOUBLE_VALUES_2 z axis of acceleration EMPATICA_HEARTRATE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device HEARTRATE Intraday heartrate EMPATICA_TEMPERATURE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device TEMPERATURE temperature EMPATICA_ELECTRODERMAL_ACTIVITY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device ELECTRODERMAL_ACTIVITY electrical conductance EMPATICA_BLOOD_VOLUME_PULSE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device BLOOD_VOLUME_PULSE blood volume pulse EMPATICA_INTER_BEAT_INTERVAL RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device INTER_BEAT_INTERVAL inter beat interval EMPATICA_TAGS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device TAGS tags","title":"Mandatory Empatica Format"},{"location":"datastreams/mandatory-empatica-format/#mandatory-empatica-format","text":"This is a description of the format RAPIDS needs to process data for the following Empatica sensors. EMPATICA_ACCELEROMETER RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_VALUES_0 x axis of acceleration DOUBLE_VALUES_1 y axis of acceleration DOUBLE_VALUES_2 z axis of acceleration EMPATICA_HEARTRATE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device HEARTRATE Intraday heartrate EMPATICA_TEMPERATURE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device TEMPERATURE temperature EMPATICA_ELECTRODERMAL_ACTIVITY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device ELECTRODERMAL_ACTIVITY electrical conductance EMPATICA_BLOOD_VOLUME_PULSE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device BLOOD_VOLUME_PULSE blood volume pulse EMPATICA_INTER_BEAT_INTERVAL RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device INTER_BEAT_INTERVAL inter beat interval EMPATICA_TAGS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device TAGS tags","title":"Mandatory Empatica Format"},{"location":"datastreams/mandatory-fitbit-format/","text":"Mandatory Fitbit Format \u00b6 This is a description of the format RAPIDS needs to process data for the following Fitbit sensors. FITBIT_HEARTRATE_SUMMARY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device HEARTRATE_DAILY_RESTINGHR Daily resting heartrate HEARTRATE_DAILY_CALORIESOUTOFRANGE Calories spent while heartrate was oustide a heartrate zone HEARTRATE_DAILY_CALORIESFATBURN Calories spent while heartrate was inside the fat burn zone HEARTRATE_DAILY_CALORIESCARDIO Calories spent while heartrate was inside the cardio zone HEARTRATE_DAILY_CALORIESPEAK Calories spent while heartrate was inside the peak zone FITBIT_HEARTRATE_INTRADAY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device HEARTRATE Intraday heartrate HEARTRATE_ZONE Heartrate zone that HEARTRATE belongs to. It is based on the heartrate zone ranges of each device FITBIT_SLEEP_SUMMARY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss , this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day LOCAL_START_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss representing the start of a daily sleep episode LOCAL_END_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss representing the end of a daily sleep episode DEVICE_ID A string that uniquely identifies a device EFFICIENCY Sleep efficiency computed by fitbit as time asleep / (total time in bed - time to fall asleep) MINUTES_AFTER_WAKEUP Minutes the participant spent in bed after waking up MINUTES_ASLEEP Minutes the participant was asleep MINUTES_AWAKE Minutes the participant was awake MINUTES_TO_FALL_ASLEEP Minutes the participant spent in bed before falling asleep MINUTES_IN_BED Minutes the participant spent in bed across the sleep episode IS_MAIN_SLEEP 0 if this episode is a nap, or 1 if it is a main sleep episode TYPE stages or classic sleep data FITBIT_SLEEP_INTRADAY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss , this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day DEVICE_ID A string that uniquely identifies a device TYPE_EPISODE_ID An id for each unique main or nap episode. Main and nap episodes have different levels, each row in this table is one of such levels, so multiple rows can have the same TYPE_EPISODE_ID DURATION Duration of the episode level in minutes IS_MAIN_SLEEP 0 if this episode level belongs to a nap, or 1 if it belongs to a main sleep episode TYPE type of level: stages or classic sleep data LEVEL For stages levels one of wake , deep , light , or rem . For classic levels one of awake , restless , and asleep FITBIT_STEPS_SUMMARY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device STEPS Daily step count FITBIT_STEPS_INTRADAY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device STEPS Intraday step count (usually every minute)","title":"Mandatory Fitbit Format"},{"location":"datastreams/mandatory-fitbit-format/#mandatory-fitbit-format","text":"This is a description of the format RAPIDS needs to process data for the following Fitbit sensors. FITBIT_HEARTRATE_SUMMARY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device HEARTRATE_DAILY_RESTINGHR Daily resting heartrate HEARTRATE_DAILY_CALORIESOUTOFRANGE Calories spent while heartrate was oustide a heartrate zone HEARTRATE_DAILY_CALORIESFATBURN Calories spent while heartrate was inside the fat burn zone HEARTRATE_DAILY_CALORIESCARDIO Calories spent while heartrate was inside the cardio zone HEARTRATE_DAILY_CALORIESPEAK Calories spent while heartrate was inside the peak zone FITBIT_HEARTRATE_INTRADAY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device HEARTRATE Intraday heartrate HEARTRATE_ZONE Heartrate zone that HEARTRATE belongs to. It is based on the heartrate zone ranges of each device FITBIT_SLEEP_SUMMARY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss , this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day LOCAL_START_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss representing the start of a daily sleep episode LOCAL_END_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss representing the end of a daily sleep episode DEVICE_ID A string that uniquely identifies a device EFFICIENCY Sleep efficiency computed by fitbit as time asleep / (total time in bed - time to fall asleep) MINUTES_AFTER_WAKEUP Minutes the participant spent in bed after waking up MINUTES_ASLEEP Minutes the participant was asleep MINUTES_AWAKE Minutes the participant was awake MINUTES_TO_FALL_ASLEEP Minutes the participant spent in bed before falling asleep MINUTES_IN_BED Minutes the participant spent in bed across the sleep episode IS_MAIN_SLEEP 0 if this episode is a nap, or 1 if it is a main sleep episode TYPE stages or classic sleep data FITBIT_SLEEP_INTRADAY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss , this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day DEVICE_ID A string that uniquely identifies a device TYPE_EPISODE_ID An id for each unique main or nap episode. Main and nap episodes have different levels, each row in this table is one of such levels, so multiple rows can have the same TYPE_EPISODE_ID DURATION Duration of the episode level in minutes IS_MAIN_SLEEP 0 if this episode level belongs to a nap, or 1 if it belongs to a main sleep episode TYPE type of level: stages or classic sleep data LEVEL For stages levels one of wake , deep , light , or rem . For classic levels one of awake , restless , and asleep FITBIT_STEPS_SUMMARY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device STEPS Daily step count FITBIT_STEPS_INTRADAY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device STEPS Intraday step count (usually every minute)","title":"Mandatory Fitbit Format"},{"location":"datastreams/mandatory-phone-format/","text":"Mandatory Phone Format \u00b6 This is a description of the format RAPIDS needs to process data for the following PHONE sensors. See examples in the CSV files inside rapids_example_csv.zip PHONE_ACCELEROMETER RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_VALUES_0 x axis of acceleration DOUBLE_VALUES_1 y axis of acceleration DOUBLE_VALUES_2 z axis of acceleration PHONE_ACTIVITY_RECOGNITION RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device ACTIVITY_NAME An string that denotes current activity name: in_vehicle , on_bicycle , on_foot , still , unknown , tilting , walking or running ACTIVITY_TYPE An integer (ranged from 0 to 8) that denotes current activity type CONFIDENCE An integer (ranged from 0 to 100) that denotes the prediction accuracy PHONE_APPLICATIONS_CRASHES RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME Application\u2019s package name APPLICATION_NAME Application\u2019s localized name APPLICATION_VERSION Application\u2019s version code ERROR_SHORT Short description of the error ERROR_LONG More verbose version of the error description ERROR_CONDITION 1 = code error; 2 = non-responsive (ANR error) IS_SYSTEM_APP Device\u2019s pre-installed application PHONE_APPLICATIONS_FOREGROUND RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME Application\u2019s package name APPLICATION_NAME Application\u2019s localized name IS_SYSTEM_APP Device\u2019s pre-installed application PHONE_APPLICATIONS_NOTIFICATIONS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME Application\u2019s package name APPLICATION_NAME Application\u2019s localized name TEXT Notification\u2019s header text, not the content SOUND Notification\u2019s sound source (if applicable) VIBRATE Notification\u2019s vibration pattern (if applicable) DEFAULTS If notification was delivered according to device\u2019s default settings FLAGS An integer that denotes Android notification flag PHONE_BATTERY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device BATTERY_STATUS An integer that denotes battery status: 0 or 1 = unknown, 2 = charging, 3 = discharging, 4 = not charging, 5 = full BATTERY_LEVEL An integer that denotes battery level, between 0 and BATTERY_SCALE BATTERY_SCALE An integer that denotes the maximum battery level PHONE_BLUETOOTH RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device BT_ADDRESS MAC address of the device\u2019s Bluetooth sensor BT_NAME User assigned name of the device\u2019s Bluetooth sensor BT_RSSI The RSSI dB to the scanned device PHONE_CALLS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device CALL_TYPE An integer that denotes call type: 1 = incoming, 2 = outgoing, 3 = missed CALL_DURATION Length of the call session TRACE SHA-1 one-way source/target of the call PHONE_CONVERSATION RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_ENERGY A number that denotes the amplitude of an audio sample (L2-norm of the audio frame) INFERENCE An integer (ranged from 0 to 3) that denotes the type of an audio sample: 0 = silence, 1 = noise, 2 = voice, 3 = unknown DOUBLE_CONVO_START UNIX timestamp (13 digits) of the beginning of a conversation DOUBLE_CONVO_END UNIX timestamp (13 digits) of the end of a conversation PHONE_KEYBOARD RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME The application\u2019s package name of keyboard interaction BEFORE_TEXT The previous keyboard input (empty if password) CURRENT_TEXT The current keyboard input (empty if password) IS_PASSWORD An integer: 0 = not password; 1 = password PHONE_LIGHT RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_LIGHT_LUX The ambient luminance in lux units ACCURACY An integer that denotes the sensor\u2019s accuracy level: 3 = maximum accuracy, 2 = medium accuracy, 1 = low accuracy PHONE_LOCATIONS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_LATITUDE The location\u2019s latitude, in degrees DOUBLE_LONGITUDE The location\u2019s longitude, in degrees DOUBLE_BEARING The location\u2019s bearing, in degrees DOUBLE_SPEED The speed if available, in meters/second over ground DOUBLE_ALTITUDE The altitude if available, in meters above sea level PROVIDER A string that denotes the provider: gps , fused or network ACCURACY The estimated location accuracy PHONE_LOG RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device LOG_MESSAGE A string that denotes log message PHONE_MESSAGES RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device MESSAGE_TYPE An integer that denotes message type: 1 = received, 2 = sent TRACE SHA-1 one-way source/target of the message PHONE_SCREEN RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device SCREEN_STATUS An integer that denotes screen status: 0 = off, 1 = on, 2 = locked, 3 = unlocked PHONE_WIFI_CONNECTED RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device MAC_ADDRESS Device\u2019s MAC address SSID Currently connected access point network name BSSID Currently connected access point MAC address PHONE_WIFI_VISIBLE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device SSID Detected access point network name BSSID Detected access point MAC address SECURITY Active security protocols FREQUENCY Wi-Fi band frequency (e.g., 2427, 5180), in Hz RSSI RSSI dB to the scanned device","title":"Mandatory Phone Format"},{"location":"datastreams/mandatory-phone-format/#mandatory-phone-format","text":"This is a description of the format RAPIDS needs to process data for the following PHONE sensors. See examples in the CSV files inside rapids_example_csv.zip PHONE_ACCELEROMETER RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_VALUES_0 x axis of acceleration DOUBLE_VALUES_1 y axis of acceleration DOUBLE_VALUES_2 z axis of acceleration PHONE_ACTIVITY_RECOGNITION RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device ACTIVITY_NAME An string that denotes current activity name: in_vehicle , on_bicycle , on_foot , still , unknown , tilting , walking or running ACTIVITY_TYPE An integer (ranged from 0 to 8) that denotes current activity type CONFIDENCE An integer (ranged from 0 to 100) that denotes the prediction accuracy PHONE_APPLICATIONS_CRASHES RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME Application\u2019s package name APPLICATION_NAME Application\u2019s localized name APPLICATION_VERSION Application\u2019s version code ERROR_SHORT Short description of the error ERROR_LONG More verbose version of the error description ERROR_CONDITION 1 = code error; 2 = non-responsive (ANR error) IS_SYSTEM_APP Device\u2019s pre-installed application PHONE_APPLICATIONS_FOREGROUND RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME Application\u2019s package name APPLICATION_NAME Application\u2019s localized name IS_SYSTEM_APP Device\u2019s pre-installed application PHONE_APPLICATIONS_NOTIFICATIONS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME Application\u2019s package name APPLICATION_NAME Application\u2019s localized name TEXT Notification\u2019s header text, not the content SOUND Notification\u2019s sound source (if applicable) VIBRATE Notification\u2019s vibration pattern (if applicable) DEFAULTS If notification was delivered according to device\u2019s default settings FLAGS An integer that denotes Android notification flag PHONE_BATTERY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device BATTERY_STATUS An integer that denotes battery status: 0 or 1 = unknown, 2 = charging, 3 = discharging, 4 = not charging, 5 = full BATTERY_LEVEL An integer that denotes battery level, between 0 and BATTERY_SCALE BATTERY_SCALE An integer that denotes the maximum battery level PHONE_BLUETOOTH RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device BT_ADDRESS MAC address of the device\u2019s Bluetooth sensor BT_NAME User assigned name of the device\u2019s Bluetooth sensor BT_RSSI The RSSI dB to the scanned device PHONE_CALLS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device CALL_TYPE An integer that denotes call type: 1 = incoming, 2 = outgoing, 3 = missed CALL_DURATION Length of the call session TRACE SHA-1 one-way source/target of the call PHONE_CONVERSATION RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_ENERGY A number that denotes the amplitude of an audio sample (L2-norm of the audio frame) INFERENCE An integer (ranged from 0 to 3) that denotes the type of an audio sample: 0 = silence, 1 = noise, 2 = voice, 3 = unknown DOUBLE_CONVO_START UNIX timestamp (13 digits) of the beginning of a conversation DOUBLE_CONVO_END UNIX timestamp (13 digits) of the end of a conversation PHONE_KEYBOARD RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME The application\u2019s package name of keyboard interaction BEFORE_TEXT The previous keyboard input (empty if password) CURRENT_TEXT The current keyboard input (empty if password) IS_PASSWORD An integer: 0 = not password; 1 = password PHONE_LIGHT RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_LIGHT_LUX The ambient luminance in lux units ACCURACY An integer that denotes the sensor\u2019s accuracy level: 3 = maximum accuracy, 2 = medium accuracy, 1 = low accuracy PHONE_LOCATIONS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_LATITUDE The location\u2019s latitude, in degrees DOUBLE_LONGITUDE The location\u2019s longitude, in degrees DOUBLE_BEARING The location\u2019s bearing, in degrees DOUBLE_SPEED The speed if available, in meters/second over ground DOUBLE_ALTITUDE The altitude if available, in meters above sea level PROVIDER A string that denotes the provider: gps , fused or network ACCURACY The estimated location accuracy PHONE_LOG RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device LOG_MESSAGE A string that denotes log message PHONE_MESSAGES RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device MESSAGE_TYPE An integer that denotes message type: 1 = received, 2 = sent TRACE SHA-1 one-way source/target of the message PHONE_SCREEN RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device SCREEN_STATUS An integer that denotes screen status: 0 = off, 1 = on, 2 = locked, 3 = unlocked PHONE_WIFI_CONNECTED RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device MAC_ADDRESS Device\u2019s MAC address SSID Currently connected access point network name BSSID Currently connected access point MAC address PHONE_WIFI_VISIBLE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device SSID Detected access point network name BSSID Detected access point MAC address SECURITY Active security protocols FREQUENCY Wi-Fi band frequency (e.g., 2427, 5180), in Hz RSSI RSSI dB to the scanned device","title":"Mandatory Phone Format"},{"location":"developers/documentation/","text":"Documentation \u00b6 We use mkdocs with the material theme to write these docs. Whenever you make any changes, just push them back to the repo and the documentation will be deployed automatically. Set up development environment \u00b6 Make sure your conda environment is active pip install mkdocs pip install mkdocs-material Preview \u00b6 Run the following command in RAPIDS root folder and go to http://127.0.0.1:8000 : mkdocs serve File Structure \u00b6 The documentation config file is /mkdocs.yml , if you are adding new .md files to the docs modify the nav attribute at the bottom of that file. You can use the hierarchy there to find all the files that appear in the documentation. Reference \u00b6 Check this page to get familiar with the different visual elements we can use in the docs (admonitions, code blocks, tables, etc.) You can also refer to /docs/setup/installation.md and /docs/setup/configuration.md to see practical examples of these elements. Hint Any links to internal pages should be relative to the current page. For example, any link from this page (documentation) which is inside ./developers should begin with ../ to go one folder level up like: [ mylink ]( ../setup/installation.md ) Extras \u00b6 You can insert emojis using this syntax :[SOURCE]-[ICON_NAME] from the following sources: https://materialdesignicons.com/ https://fontawesome.com/icons/tasks?style=solid https://primer.style/octicons/ You can use this page to create markdown tables more easily","title":"Documentation"},{"location":"developers/documentation/#documentation","text":"We use mkdocs with the material theme to write these docs. Whenever you make any changes, just push them back to the repo and the documentation will be deployed automatically.","title":"Documentation"},{"location":"developers/documentation/#set-up-development-environment","text":"Make sure your conda environment is active pip install mkdocs pip install mkdocs-material","title":"Set up development environment"},{"location":"developers/documentation/#preview","text":"Run the following command in RAPIDS root folder and go to http://127.0.0.1:8000 : mkdocs serve","title":"Preview"},{"location":"developers/documentation/#file-structure","text":"The documentation config file is /mkdocs.yml , if you are adding new .md files to the docs modify the nav attribute at the bottom of that file. You can use the hierarchy there to find all the files that appear in the documentation.","title":"File Structure"},{"location":"developers/documentation/#reference","text":"Check this page to get familiar with the different visual elements we can use in the docs (admonitions, code blocks, tables, etc.) You can also refer to /docs/setup/installation.md and /docs/setup/configuration.md to see practical examples of these elements. Hint Any links to internal pages should be relative to the current page. For example, any link from this page (documentation) which is inside ./developers should begin with ../ to go one folder level up like: [ mylink ]( ../setup/installation.md )","title":"Reference"},{"location":"developers/documentation/#extras","text":"You can insert emojis using this syntax :[SOURCE]-[ICON_NAME] from the following sources: https://materialdesignicons.com/ https://fontawesome.com/icons/tasks?style=solid https://primer.style/octicons/ You can use this page to create markdown tables more easily","title":"Extras"},{"location":"developers/git-flow/","text":"Git Flow \u00b6 We use the develop/master variation of the OneFlow git flow Add New Features \u00b6 We use feature (topic) branches to implement new features Internal Developer You are an internal developer if you have writing permissions to the repository. Most feature branches are never pushed to the repo, only do so if you expect that its development will take days (to avoid losing your work if you computer is damaged). Otherwise follow the following instructions to locally rebase your feature branch into develop and push those rebased changes online. Starting your feature branch Pull the latest develop git checkout develop git pull Create your feature branch git checkout -b feature/feature1 Add, modify or delete the necessary files to add your new feature Update the change log ( docs/change-log.md ) Stage and commit your changes using VS Code git GUI or the following commands git add modified-file1 modified-file2 git commit -m \"Add my new feature\" # use a concise description Merging back your feature branch If your changes took time to be implemented it is possible that there are new commits in our develop branch, so we need to rebase your feature branch. Fetch the latest changes to develop git fetch origin develop Rebase your feature branch git checkout feature/feature1 git rebase -i develop Integrate your new feature to develop git checkout develop git merge --no-ff feature/feature1 # (use the default merge message) git push origin develop git branch -d feature/feature1 External Developer You are an external developer if you do NOT have writing permissions to the repository. Starting your feature branch Fork and clone our repository on Github Switch to the latest develop git checkout develop Create your feature branch git checkout -b feature/external-test Add, modify or delete the necessary files to add your new feature Stage and commit your changes using VS Code git GUI or the following commands git add modified-file1 modified-file2 git commit -m \"Add my new feature\" # use a concise description Merging back your feature branch If your changes took time to be implemented, it is possible that there are new commits in our develop branch, so we need to rebase your feature branch. Add our repo as another remote git remote add upstream https://github.com/carissalow/rapids/ Fetch the latest changes to develop git fetch upstream develop Rebase your feature branch git checkout feature/external-test git rebase -i develop Push your feature branch online git push --set-upstream origin feature/external-test Open a pull request to the develop branch using Github\u2019s GUI Release a New Version \u00b6 Pull the latest develop git checkout develop git pull Create a new release branch git describe --abbrev = 0 --tags # Bump the release (0.1.0 to 0.2.0 => NEW_HOTFIX) git checkout -b release/v [ NEW_RELEASE ] develop Add new tag git tag v [ NEW_RELEASE ] Merge and push the release branch git checkout develop git merge release/v [ NEW_RELEASE ] git push --tags origin develop git branch -d release/v [ NEW_RELEASE ] Fast-forward master git checkout master git merge --ff-only develop git push Go to GitHub and create a new release based on the newest tag v[NEW_RELEASE] (remember to add the change log) Release a Hotfix \u00b6 Pull the latest master git checkout master git pull Start a hotfix branch git describe --abbrev = 0 --tags # Bump the hotfix (0.1.0 to 0.1.1 => NEW_HOTFIX) git checkout -b hotfix/v [ NEW_HOTFIX ] master Fix whatever needs to be fixed Update the change log Tag and merge the hotfix git tag v [ NEW_HOTFIX ] git checkout develop git merge hotfix/v [ NEW_HOTFIX ] git push --tags origin develop git branch -d hotfix/v [ NEW_HOTFIX ] Fast-forward master git checkout master git merge --ff-only v[NEW_HOTFIX] git push Go to GitHub and create a new release based on the newest tag v[NEW_HOTFIX] (remember to add the change log)","title":"Git Flow"},{"location":"developers/git-flow/#git-flow","text":"We use the develop/master variation of the OneFlow git flow","title":"Git Flow"},{"location":"developers/git-flow/#add-new-features","text":"We use feature (topic) branches to implement new features Internal Developer You are an internal developer if you have writing permissions to the repository. Most feature branches are never pushed to the repo, only do so if you expect that its development will take days (to avoid losing your work if you computer is damaged). Otherwise follow the following instructions to locally rebase your feature branch into develop and push those rebased changes online. Starting your feature branch Pull the latest develop git checkout develop git pull Create your feature branch git checkout -b feature/feature1 Add, modify or delete the necessary files to add your new feature Update the change log ( docs/change-log.md ) Stage and commit your changes using VS Code git GUI or the following commands git add modified-file1 modified-file2 git commit -m \"Add my new feature\" # use a concise description Merging back your feature branch If your changes took time to be implemented it is possible that there are new commits in our develop branch, so we need to rebase your feature branch. Fetch the latest changes to develop git fetch origin develop Rebase your feature branch git checkout feature/feature1 git rebase -i develop Integrate your new feature to develop git checkout develop git merge --no-ff feature/feature1 # (use the default merge message) git push origin develop git branch -d feature/feature1 External Developer You are an external developer if you do NOT have writing permissions to the repository. Starting your feature branch Fork and clone our repository on Github Switch to the latest develop git checkout develop Create your feature branch git checkout -b feature/external-test Add, modify or delete the necessary files to add your new feature Stage and commit your changes using VS Code git GUI or the following commands git add modified-file1 modified-file2 git commit -m \"Add my new feature\" # use a concise description Merging back your feature branch If your changes took time to be implemented, it is possible that there are new commits in our develop branch, so we need to rebase your feature branch. Add our repo as another remote git remote add upstream https://github.com/carissalow/rapids/ Fetch the latest changes to develop git fetch upstream develop Rebase your feature branch git checkout feature/external-test git rebase -i develop Push your feature branch online git push --set-upstream origin feature/external-test Open a pull request to the develop branch using Github\u2019s GUI","title":"Add New Features"},{"location":"developers/git-flow/#release-a-new-version","text":"Pull the latest develop git checkout develop git pull Create a new release branch git describe --abbrev = 0 --tags # Bump the release (0.1.0 to 0.2.0 => NEW_HOTFIX) git checkout -b release/v [ NEW_RELEASE ] develop Add new tag git tag v [ NEW_RELEASE ] Merge and push the release branch git checkout develop git merge release/v [ NEW_RELEASE ] git push --tags origin develop git branch -d release/v [ NEW_RELEASE ] Fast-forward master git checkout master git merge --ff-only develop git push Go to GitHub and create a new release based on the newest tag v[NEW_RELEASE] (remember to add the change log)","title":"Release a New Version"},{"location":"developers/git-flow/#release-a-hotfix","text":"Pull the latest master git checkout master git pull Start a hotfix branch git describe --abbrev = 0 --tags # Bump the hotfix (0.1.0 to 0.1.1 => NEW_HOTFIX) git checkout -b hotfix/v [ NEW_HOTFIX ] master Fix whatever needs to be fixed Update the change log Tag and merge the hotfix git tag v [ NEW_HOTFIX ] git checkout develop git merge hotfix/v [ NEW_HOTFIX ] git push --tags origin develop git branch -d hotfix/v [ NEW_HOTFIX ] Fast-forward master git checkout master git merge --ff-only v[NEW_HOTFIX] git push Go to GitHub and create a new release based on the newest tag v[NEW_HOTFIX] (remember to add the change log)","title":"Release a Hotfix"},{"location":"developers/remote-support/","text":"Remote Support \u00b6 We use the Live Share extension of Visual Studio Code to debug bugs when sharing data or database credentials is not possible. Install Visual Studio Code Open you RAPIDS root folder in a new VSCode window Open a new Terminal Terminal > New terminal Install the Live Share extension pack Press Ctrl + P or Cmd + P and run this command: >live share: start collaboration session 6. Follow the instructions and share the session link you receive","title":"Remote Support"},{"location":"developers/remote-support/#remote-support","text":"We use the Live Share extension of Visual Studio Code to debug bugs when sharing data or database credentials is not possible. Install Visual Studio Code Open you RAPIDS root folder in a new VSCode window Open a new Terminal Terminal > New terminal Install the Live Share extension pack Press Ctrl + P or Cmd + P and run this command: >live share: start collaboration session 6. Follow the instructions and share the session link you receive","title":"Remote Support"},{"location":"developers/test-cases/","text":"Test Cases \u00b6 Along with the continued development and the addition of new sensors and features to the RAPIDS pipeline, tests for the currently available sensors and features are being implemented. Since this is a Work In Progress this page will be updated with the list of sensors and features for which testing is available. For each of the sensors listed a description of the data used for testing (test cases) are outline. Currently for all intent and testing purposes the tests/data/raw/test01/ contains all the test data files for testing android data formats and tests/data/raw/test02/ contains all the test data files for testing iOS data formats. It follows that the expected (verified output) are contained in the tests/data/processed/test01/ and tests/data/processed/test02/ for Android and iOS respectively. tests/data/raw/test03/ and tests/data/raw/test04/ contain data files for testing empty raw data files for android and iOS respectively. The following is a list of the sensors that testing is currently available. Sensor Provider Periodic Frequency Event Phone Accelerometer Panda N N N Phone Accelerometer RAPIDS N N N Phone Activity Recognition RAPIDS N N N Phone Applications Foreground RAPIDS N N N Phone Battery RAPIDS Y Y N Phone Bluetooth Doryab N N N Phone Bluetooth RAPIDS Y Y Y Phone Calls RAPIDS Y Y N Phone Conversation RAPIDS Y Y N Phone Data Yield RAPIDS N N N Phone Light RAPIDS Y Y N Phone Locations Doryab N N N Phone Locations Barnett N N N Phone Messages RAPIDS Y Y N Phone Screen RAPIDS Y N N Phone WiFi Connected RAPIDS Y Y N Phone WiFi Visible RAPIDS Y Y N Fitbit Data Yield RAPIDS N N N Fitbit Heart Rate Summary RAPIDS N N N Fitbit Heart Rate Intraday RAPIDS N N N Fitbit Sleep Summary RAPIDS N N N Fitbit Steps Summary RAPIDS N N N Fitbit Steps Intraday RAPIDS N N N Messages (SMS) \u00b6 The raw message data file contains data for 2 separate days. The data for the first day contains records 5 records for every epoch . The second day's data contains 6 records for each of only 2 epoch (currently morning and evening ) The raw message data contains records for both message_types (i.e. recieved and sent ) in both days in all epochs. The number records with each message_types per epoch is randomly distributed There is at least one records with each message_types per epoch. There is one raw message data file each, as described above, for testing both iOS and Android data. There is also an additional empty data file for both android and iOS for testing empty data files Calls \u00b6 Due to the difference in the format of the raw call data for iOS and Android the following is the expected results the calls_with_datetime_unified.csv . This would give a better idea of the use cases being tested since the calls_with_datetime_unified.csv would make both the iOS and Android data comparable. The call data would contain data for 2 days. The data for the first day contains 6 records for every epoch . The second day's data contains 6 records for each of only 2 epoch (currently morning and evening ) The call data contains records for all call_types (i.e. incoming , outgoing and missed ) in both days in all epochs. The number records with each of the call_types per epoch is randomly distributed. There is at least one records with each call_types per epoch. There is one call data file each, as described above, for testing both iOS and Android data. There is also an additional empty data file for both android and iOS for testing empty data files Screen \u00b6 Due to the difference in the format of the raw screen data for iOS and Android the following is the expected results the screen_deltas.csv . This would give a better idea of the use cases being tested since the screen_eltas.csv would make both the iOS and Android data comparable These files are used to calculate the features for the screen sensor The screen delta data file contains data for 1 day. The screen delta data contains 1 record to represent an unlock episode that falls within an epoch for every epoch . The screen delta data contains 1 record to represent an unlock episode that falls across the boundary of 2 epochs. Namely the unlock episode starts in one epoch and ends in the next, thus there is a record for unlock episodes that fall across night to morning , morning to afternoon and finally afternoon to night The testing is done for unlock episode_type. There is one screen data file each for testing both iOS and Android data formats. There is also an additional empty data file for both android and iOS for testing empty data files Battery \u00b6 Due to the difference in the format of the raw battery data for iOS and Android as well as versions of iOS the following is the expected results the battery_deltas.csv . This would give a better idea of the use cases being tested since the battery_deltas.csv would make both the iOS and Android data comparable. These files are used to calculate the features for the battery sensor. The battery delta data file contains data for 1 day. The battery delta data contains 1 record each for a charging and discharging episode that falls within an epoch for every epoch . Thus, for the daily epoch there would be multiple charging and discharging episodes Since either a charging episode or a discharging episode and not both can occur across epochs, in order to test episodes that occur across epochs alternating episodes of charging and discharging episodes that fall across night to morning , morning to afternoon and finally afternoon to night are present in the battery delta data. This starts with a discharging episode that begins in night and end in morning . There is one battery data file each, for testing both iOS and Android data formats. There is also an additional empty data file for both android and iOS for testing empty data files Bluetooth \u00b6 The raw Bluetooth data file contains data for 1 day. The raw Bluetooth data contains at least 2 records for each epoch . Each epoch has a record with a timestamp for the beginning boundary for that epoch and a record with a timestamp for the ending boundary for that epoch . (e.g. For the morning epoch there is a record with a timestamp for 6:00AM and another record with a timestamp for 11:59:59AM . These are to test edge cases) An option of 5 Bluetooth devices are randomly distributed throughout the data records. There is one raw Bluetooth data file each, for testing both iOS and Android data formats. There is also an additional empty data file for both android and iOS for testing empty data files. WIFI \u00b6 There are 2 data files ( wifi_raw.csv and sensor_wifi_raw.csv ) for each fake participant for each phone platform. The raw WIFI data files contain data for 1 day. The sensor_wifi_raw.csv data contains at least 2 records for each epoch . Each epoch has a record with a timestamp for the beginning boundary for that epoch and a record with a timestamp for the ending boundary for that epoch . (e.g. For the morning epoch there is a record with a timestamp for 6:00AM and another record with a timestamp for 11:59:59AM . These are to test edge cases) The wifi_raw.csv data contains 3 records with random timestamps for each epoch to represent visible broadcasting WIFI network. This file is empty for the iOS phone testing data. An option of 10 access point devices is randomly distributed throughout the data records. 5 each for sensor_wifi_raw.csv and wifi_raw.csv . There data files for testing both iOS and Android data formats. There are also additional empty data files for both android and iOS for testing empty data files. Light \u00b6 The raw light data file contains data for 1 day. The raw light data contains 3 or 4 rows of data for each epoch except night . The single row of data for night is for testing features for single values inputs. (Example testing the standard deviation of one input value) Since light is only available for Android there is only one file that contains data for Android. All other files (i.e. for iPhone) are empty data files. Application Foreground \u00b6 The raw application foreground data file contains data for 1 day. The raw application foreground data contains 7 - 9 rows of data for each epoch . The records for each epoch contains apps that are randomly selected from a list of apps that are from the MULTIPLE_CATEGORIES and SINGLE_CATEGORIES (See testing_config.yaml ). There are also records in each epoch that have apps randomly selected from a list of apps that are from the EXCLUDED_CATEGORIES and EXCLUDED_APPS . This is to test that these apps are actually being excluded from the calculations of features. There are also records to test SINGLE_APPS calculations. Since application foreground is only available for Android there is only one file that contains data for Android. All other files (i.e. for iPhone) are empty data files. Activity Recognition \u00b6 The raw Activity Recognition data file contains data for 1 day. The raw Activity Recognition data each epoch period contains rows that records 2 - 5 different activity_types . The is such that durations of activities can be tested. Additionally, there are records that mimic the duration of an activity over the time boundary of neighboring epochs. (For example, there a set of records that mimic the participant in_vehicle from afternoon into evening ) There is one file each with raw Activity Recognition data for testing both iOS and Android data formats. (plugin_google_activity_recognition_raw.csv for android and plugin_ios_activity_recognition_raw.csv for iOS) There is also an additional empty data file for both android and iOS for testing empty data files. Conversation \u00b6 The raw conversation data file contains data for 2 day. The raw conversation data contains records with a sample of both datatypes (i.e. voice/noise = 0 , and conversation = 2 ) as well as rows with for samples of each of the inference values (i.e. silence = 0 , noise = 1 , voice = 2 , and unknown = 3 ) for each epoch . The different datatype and inference records are randomly distributed throughout the epoch . Additionally there are 2 - 5 records for conversations ( datatype = 2, and inference = -1) in each epoch and for each epoch except night, there is a conversation record that has a double_convo_start timestamp that is from the previous epoch . This is to test the calculations of features across epochs . There is a raw conversation data file for both android and iOS platforms ( plugin_studentlife_audio_android_raw.csv and plugin_studentlife_audio_raw.csv respectively). Finally, there are also additional empty data files for both android and iOS for testing empty data files","title":"Test cases"},{"location":"developers/test-cases/#test-cases","text":"Along with the continued development and the addition of new sensors and features to the RAPIDS pipeline, tests for the currently available sensors and features are being implemented. Since this is a Work In Progress this page will be updated with the list of sensors and features for which testing is available. For each of the sensors listed a description of the data used for testing (test cases) are outline. Currently for all intent and testing purposes the tests/data/raw/test01/ contains all the test data files for testing android data formats and tests/data/raw/test02/ contains all the test data files for testing iOS data formats. It follows that the expected (verified output) are contained in the tests/data/processed/test01/ and tests/data/processed/test02/ for Android and iOS respectively. tests/data/raw/test03/ and tests/data/raw/test04/ contain data files for testing empty raw data files for android and iOS respectively. The following is a list of the sensors that testing is currently available. Sensor Provider Periodic Frequency Event Phone Accelerometer Panda N N N Phone Accelerometer RAPIDS N N N Phone Activity Recognition RAPIDS N N N Phone Applications Foreground RAPIDS N N N Phone Battery RAPIDS Y Y N Phone Bluetooth Doryab N N N Phone Bluetooth RAPIDS Y Y Y Phone Calls RAPIDS Y Y N Phone Conversation RAPIDS Y Y N Phone Data Yield RAPIDS N N N Phone Light RAPIDS Y Y N Phone Locations Doryab N N N Phone Locations Barnett N N N Phone Messages RAPIDS Y Y N Phone Screen RAPIDS Y N N Phone WiFi Connected RAPIDS Y Y N Phone WiFi Visible RAPIDS Y Y N Fitbit Data Yield RAPIDS N N N Fitbit Heart Rate Summary RAPIDS N N N Fitbit Heart Rate Intraday RAPIDS N N N Fitbit Sleep Summary RAPIDS N N N Fitbit Steps Summary RAPIDS N N N Fitbit Steps Intraday RAPIDS N N N","title":"Test Cases"},{"location":"developers/test-cases/#messages-sms","text":"The raw message data file contains data for 2 separate days. The data for the first day contains records 5 records for every epoch . The second day's data contains 6 records for each of only 2 epoch (currently morning and evening ) The raw message data contains records for both message_types (i.e. recieved and sent ) in both days in all epochs. The number records with each message_types per epoch is randomly distributed There is at least one records with each message_types per epoch. There is one raw message data file each, as described above, for testing both iOS and Android data. There is also an additional empty data file for both android and iOS for testing empty data files","title":"Messages (SMS)"},{"location":"developers/test-cases/#calls","text":"Due to the difference in the format of the raw call data for iOS and Android the following is the expected results the calls_with_datetime_unified.csv . This would give a better idea of the use cases being tested since the calls_with_datetime_unified.csv would make both the iOS and Android data comparable. The call data would contain data for 2 days. The data for the first day contains 6 records for every epoch . The second day's data contains 6 records for each of only 2 epoch (currently morning and evening ) The call data contains records for all call_types (i.e. incoming , outgoing and missed ) in both days in all epochs. The number records with each of the call_types per epoch is randomly distributed. There is at least one records with each call_types per epoch. There is one call data file each, as described above, for testing both iOS and Android data. There is also an additional empty data file for both android and iOS for testing empty data files","title":"Calls"},{"location":"developers/test-cases/#screen","text":"Due to the difference in the format of the raw screen data for iOS and Android the following is the expected results the screen_deltas.csv . This would give a better idea of the use cases being tested since the screen_eltas.csv would make both the iOS and Android data comparable These files are used to calculate the features for the screen sensor The screen delta data file contains data for 1 day. The screen delta data contains 1 record to represent an unlock episode that falls within an epoch for every epoch . The screen delta data contains 1 record to represent an unlock episode that falls across the boundary of 2 epochs. Namely the unlock episode starts in one epoch and ends in the next, thus there is a record for unlock episodes that fall across night to morning , morning to afternoon and finally afternoon to night The testing is done for unlock episode_type. There is one screen data file each for testing both iOS and Android data formats. There is also an additional empty data file for both android and iOS for testing empty data files","title":"Screen"},{"location":"developers/test-cases/#battery","text":"Due to the difference in the format of the raw battery data for iOS and Android as well as versions of iOS the following is the expected results the battery_deltas.csv . This would give a better idea of the use cases being tested since the battery_deltas.csv would make both the iOS and Android data comparable. These files are used to calculate the features for the battery sensor. The battery delta data file contains data for 1 day. The battery delta data contains 1 record each for a charging and discharging episode that falls within an epoch for every epoch . Thus, for the daily epoch there would be multiple charging and discharging episodes Since either a charging episode or a discharging episode and not both can occur across epochs, in order to test episodes that occur across epochs alternating episodes of charging and discharging episodes that fall across night to morning , morning to afternoon and finally afternoon to night are present in the battery delta data. This starts with a discharging episode that begins in night and end in morning . There is one battery data file each, for testing both iOS and Android data formats. There is also an additional empty data file for both android and iOS for testing empty data files","title":"Battery"},{"location":"developers/test-cases/#bluetooth","text":"The raw Bluetooth data file contains data for 1 day. The raw Bluetooth data contains at least 2 records for each epoch . Each epoch has a record with a timestamp for the beginning boundary for that epoch and a record with a timestamp for the ending boundary for that epoch . (e.g. For the morning epoch there is a record with a timestamp for 6:00AM and another record with a timestamp for 11:59:59AM . These are to test edge cases) An option of 5 Bluetooth devices are randomly distributed throughout the data records. There is one raw Bluetooth data file each, for testing both iOS and Android data formats. There is also an additional empty data file for both android and iOS for testing empty data files.","title":"Bluetooth"},{"location":"developers/test-cases/#wifi","text":"There are 2 data files ( wifi_raw.csv and sensor_wifi_raw.csv ) for each fake participant for each phone platform. The raw WIFI data files contain data for 1 day. The sensor_wifi_raw.csv data contains at least 2 records for each epoch . Each epoch has a record with a timestamp for the beginning boundary for that epoch and a record with a timestamp for the ending boundary for that epoch . (e.g. For the morning epoch there is a record with a timestamp for 6:00AM and another record with a timestamp for 11:59:59AM . These are to test edge cases) The wifi_raw.csv data contains 3 records with random timestamps for each epoch to represent visible broadcasting WIFI network. This file is empty for the iOS phone testing data. An option of 10 access point devices is randomly distributed throughout the data records. 5 each for sensor_wifi_raw.csv and wifi_raw.csv . There data files for testing both iOS and Android data formats. There are also additional empty data files for both android and iOS for testing empty data files.","title":"WIFI"},{"location":"developers/test-cases/#light","text":"The raw light data file contains data for 1 day. The raw light data contains 3 or 4 rows of data for each epoch except night . The single row of data for night is for testing features for single values inputs. (Example testing the standard deviation of one input value) Since light is only available for Android there is only one file that contains data for Android. All other files (i.e. for iPhone) are empty data files.","title":"Light"},{"location":"developers/test-cases/#application-foreground","text":"The raw application foreground data file contains data for 1 day. The raw application foreground data contains 7 - 9 rows of data for each epoch . The records for each epoch contains apps that are randomly selected from a list of apps that are from the MULTIPLE_CATEGORIES and SINGLE_CATEGORIES (See testing_config.yaml ). There are also records in each epoch that have apps randomly selected from a list of apps that are from the EXCLUDED_CATEGORIES and EXCLUDED_APPS . This is to test that these apps are actually being excluded from the calculations of features. There are also records to test SINGLE_APPS calculations. Since application foreground is only available for Android there is only one file that contains data for Android. All other files (i.e. for iPhone) are empty data files.","title":"Application Foreground"},{"location":"developers/test-cases/#activity-recognition","text":"The raw Activity Recognition data file contains data for 1 day. The raw Activity Recognition data each epoch period contains rows that records 2 - 5 different activity_types . The is such that durations of activities can be tested. Additionally, there are records that mimic the duration of an activity over the time boundary of neighboring epochs. (For example, there a set of records that mimic the participant in_vehicle from afternoon into evening ) There is one file each with raw Activity Recognition data for testing both iOS and Android data formats. (plugin_google_activity_recognition_raw.csv for android and plugin_ios_activity_recognition_raw.csv for iOS) There is also an additional empty data file for both android and iOS for testing empty data files.","title":"Activity Recognition"},{"location":"developers/test-cases/#conversation","text":"The raw conversation data file contains data for 2 day. The raw conversation data contains records with a sample of both datatypes (i.e. voice/noise = 0 , and conversation = 2 ) as well as rows with for samples of each of the inference values (i.e. silence = 0 , noise = 1 , voice = 2 , and unknown = 3 ) for each epoch . The different datatype and inference records are randomly distributed throughout the epoch . Additionally there are 2 - 5 records for conversations ( datatype = 2, and inference = -1) in each epoch and for each epoch except night, there is a conversation record that has a double_convo_start timestamp that is from the previous epoch . This is to test the calculations of features across epochs . There is a raw conversation data file for both android and iOS platforms ( plugin_studentlife_audio_android_raw.csv and plugin_studentlife_audio_raw.csv respectively). Finally, there are also additional empty data files for both android and iOS for testing empty data files","title":"Conversation"},{"location":"developers/testing/","text":"Testing \u00b6 The following is a simple guide to run RAPIDS\u2019 tests. All files necessary for testing are stored in the ./tests/ directory Steps for Testing \u00b6 Add raw data. Add the raw data to the corresponding sensor CSV file in tests/data/external/aware_csv . Create the CSV if it does not exist. Link raw data. Make sure that you link the new raw data to a participant by using the same device_id in the data and in [DEVICE_IDS] inside their participant file ( tests/data/external/participant_files/testXX.yaml ). Create the participant file if it does not exist, and don\u2019t forget to edit [PIDS] in the config file of the time segments you are testing (see below). For simplicity, we use a participant\u2019s id ( testXX ) as their device_id . Edit the config file. Activate the sensor provider you are testing if it isn\u2019t already. Set [SENSOR][PROVIDER][COMPUTE] to TRUE in the config.yaml of the time segments you are testing: - tests/settings/frequency_config.yaml # For frequency time segments - tests/settings/periodic_config.yaml # For periodic time segments # We have not tested events time segments yet Run the pipeline and tests. You can run all time segments pipelines and their tests tests/scripts/run_tests.sh -t all You can run only the pipeline of a specific time segment and its tests tests/scripts/run_tests.sh -t frequency -a both Or, if you are working on your tests and you want to run a pipeline and its tests independently tests/scripts/run_tests.sh -t frequency -a run tests/scripts/run_tests.sh -t frequency -a test Output example \u00b6 The following is a snippet of the output you should see after running your test. test_sensors_files_exist ( test_sensor_features.TestSensorFeatures ) ... periodic ok test_sensors_features_calculations ( test_sensor_features.TestSensorFeatures ) ... periodic ok test_sensors_files_exist ( test_sensor_features.TestSensorFeatures ) ... frequency ok test_sensors_features_calculations ( test_sensor_features.TestSensorFeatures ) ... frequency FAIL The results above show that the for periodic both test_sensors_files_exist and test_sensors_features_calculations passed while for frequency first test test_sensors_files_exist passed while test_sensors_features_calculations failed. Additionally, you should get the traceback of the failure (not shown here). For more information on how to implement test scripts and use unittest please see Unittest Documentation Testing of the RAPIDS sensors and features is a work-in-progress. Please see Test Cases for a list of sensors and features that have testing currently available. How do we execute the tests? \u00b6 This bash script tests/scripts/run_tests.sh executes one or all pipelines for different time segment types ( frequency , periodic , and events ) as well as their tests (see below). This python script tests/scripts/run_tests.py runs the tests. It parses the involved participants and active sensor providers in the config.yaml file of the time segment type being tested. We test that the output file we expect exists and that its content matches the expected values. Example of raw data for PHONE_APPLICATIONS_FOREGROUND testing 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 \"timestamp\" , \"device_id\" , \"package_name\" , \"application_name\" , \"is_system_app\" 1593946320761 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593961974942 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593958144033 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593947228964 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1593951572326 , \"test01\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1593950554868 , \"test01\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1593964799620 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593974241305 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593969483540 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593977289581 , \"test01\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1593970763367 , \"test01\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1593964867720 , \"test01\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1593974942995 , \"test01\" , \"com.google.android.gm\" , \"Gmail\" , 0 1593986399351 , \"test01\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1594000139073 , \"test01\" , \"com.google.android.gm\" , \"Gmail\" , 0 1593994717099 , \"test01\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1593985854872 , \"test01\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1594003154390 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1594003853415 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593991680045 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1594007999202 , \"test01\" , \"com.aware.plugin.upmc.cancer\" , \"AWARE\" , 0 1593939733998 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1593933324739 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593925161482 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593936918763 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593924155524 , \"test01\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1593922625358 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593943199317 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593951550550 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593981544544 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593999779779 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593933565565 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1602475200000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602475200000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602475200000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602475200000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602475999000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602475999000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602476999000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602476999000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602476999000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602476999000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602477000000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602477000000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602477000000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478000000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602478000000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602478799000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478799000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602478000000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602478799000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478799000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602478799000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602478800000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478800000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602478800000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478800000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602480500000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602480500000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602480500000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602558000000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602558000000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602558000000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602558799000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602558799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602558000000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602558799000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602558799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602559799000 , \"test05\" , \"com.aware.plugin.upmc.cancer\" , \"AWARE\" , 0 1602559799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602559799000 , \"test05\" , \"com.aware.plugin.upmc.cancer\" , \"AWARE\" , 0 1602559799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602559800000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602559800000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602560800000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602560800000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602560800000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602560800000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602561599000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602561599000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602561600000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602561600000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602563400000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602563500000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 What cases do we test? \u00b6 The sample data includes 7 tests cases. Take phone battery as an example, on this platform, battery status 2 represents charging and battery status 4 represents discharge . 1. A daily segment instance with no battery episodes Example Input time segments: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 00:08:10.415 per_ios 4 80 100 4170 23 0 2 Li-ion 00:17:38.602 per_ios 4 77 100 4157 23 0 2 Li-ion 03:20:30.415 per_ios 2 77 100 4170 23 0 2 Li-ion 03:30:35.875 per_ios 2 80 100 4157 23 0 2 Li-ion Output results local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_battery_rapids_countdischarge phone_battery_rapids_sumdurationdischarge phone_battery_rapids_avgconsumptionrate phone_battery_rapids_maxconsumptionrate phone_battery_rapids_countcharge phone_battery_rapids_sumdurationcharge 00:00:00,00:29:59 thirtyminutes0000 2020-07-01 00:00:00 2020-07-01 00:29:59 1 21.8259833333333 0.137450851775292 0.137450851775292 0 0 00:03:00,03:29:59 thirtyminutes0006 2020-07-01 03:00:00 2020-07-01 03:29:59 0 0 0 0 1 9.49288333333333 Since there is no battery episode between 00:00:30 and 03:00:00, no result will be generated for this epoch. 2. A daily segment instance with two battery episodes (one charging, one discharge) Periodic (daily) Input time segments: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 17:59:41.434 per_ios 4 59 100 4094 23 0 2 Li-ion 18:04:14.321 per_ios 4 58 100 4157 23 0 2 Li-ion 18:07:24.456 per_ios 4 57 100 4157 23 0 2 Li-ion 20:03:03.415 per_ios 2 72 100 4170 23 0 2 Li-ion 20:05:12.434 per_ios 2 73 100 4094 23 0 2 Li-ion 20:07:24.678 per_ios 2 74 100 4157 23 0 2 Li-ion 20:10:34.875 per_ios 2 75 100 4157 23 0 2 Li-ion 21:30:04.415 per_ios 4 74 100 4170 23 0 2 Li-ion 21:32:14.434 per_ios 4 73 100 4094 23 0 2 Li-ion 21:35:23.678 per_ios 4 72 100 4157 23 0 2 Li-ion 21:37:47.875 per_ios 4 71 100 4157 23 0 2 Li-ion Output results: local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_battery_rapids_countdischarge phone_battery_rapids_sumdurationdischarge phone_battery_rapids_avgconsumptionrate phone_battery_rapids_maxconsumptionrate phone_battery_rapids_countcharge phone_battery_rapids_sumdurationcharge 18:00:00,23:59:59 evening 2020-07-01 18:00:00 2020-07-01 23:59:59 2 75.1306166666666 0.0664958369201784 0.079525673538274 1 37.5236666666667 Frequency (30 mins) Input time segments: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 20:10:34.875 fre_ios 2 75 100 4157 23 0 2 Li-ion 20:20:17.171 fre_ios 4 74 100 4170 23 0 2 Li-ion Output results local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_battery_rapids_countdischarge phone_battery_rapids_sumdurationdischarge phone_battery_rapids_avgconsumptionrate phone_battery_rapids_maxconsumptionrate phone_battery_rapids_countcharge phone_battery_rapids_sumdurationcharge 20:00:00,20:29:59 thirtyminutes0040 2020-07-01 20:00:00 2020-07-01 20:29:59 1 14.6351666666667 0.0683285693136395 0.0683285693136395 1 12.3074 3. A daily segment instance with a charging episode that spans to the next daily instance Periodic (daily) Input time segments: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 11:59:28.434 per_ios 2 63 100 4094 23 0 2 Li-ion 12:04:37.678 per_ios 2 64 100 4157 23 0 2 Li-ion Frequency (30 mins) Input time segements: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 11:59:28.434 fre_ios 2 63 100 4094 23 0 2 Li-ion 12:04:37.678 fre_ios 2 64 100 4157 23 0 2 Li-ion 4. A daily segment instance with a discharge episode that spans to the next daily instance Periodic (daily) Input time segements: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 05:59:49.434 per_ios 4 79 100 4094 23 0 2 Li-ion 06:02:19.321 per_ios 4 78 100 4157 23 0 2 Li-ion Frequency (30 mins) Input time segements: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 17:59:41.434 fre_ios 4 59 100 4094 23 0 2 Li-ion 18:04:14.321 fre_ios 4 58 100 4157 23 0 2 Li-ion 5. Three-day segments that repeat everyday Time segment tested: label start_time length repeats_on repeats_value daily 00:00:00 23H 59M 59S every_day 0 Data tested: We test 14 segments, one at the beginning of the first day, one at the end of the last day timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 2020-07-02 00:03:47.875 per_and 3 63 100 4157 23 0 2 Li-ion 2020-07-02 00:05:47.875 per_and 3 62 100 4157 23 0 2 Li-ion 2020-07-02 23:55:47.875 per_and 3 55 100 4157 23 0 2 Li-ion 2020-07-02 23:59:47.875 per_and 3 54 100 4157 23 0 2 Li-ion 2020-07-03 00:06:47.875 per_and 3 53 100 4157 23 0 2 Li-ion 2020-07-03 00:09:47.875 per_and 3 52 100 4157 23 0 2 Li-ion 2020-07-03 23:47:05.000 per_and 3 60 100 4157 23 0 2 Li-ion 2020-07-03 23:55:05.000 per_and 3 59 100 4157 23 0 2 Li-ion 2020-07-04 00:15:05.000 per_and 3 58 100 4157 23 0 2 Li-ion 2020-07-04 00:18:05.000 per_and 3 57 100 4157 23 0 2 Li-ion 2020-07-04 23:51:00.000 per_and 3 41 100 4157 23 0 2 Li-ion 2020-07-04 23:57:00.000 per_and 3 40 100 4157 23 0 2 Li-ion 2020-07-05 00:21:00.000 per_and 3 39 100 4157 23 0 2 Li-ion 2020-07-05 00:23:00.000 per_and 3 38 100 4157 23 0 2 Li-ion Output results: local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_battery_rapids_countdischarge phone_battery_rapids_sumdurationdischarge phone_battery_rapids_avgconsumptionrate phone_battery_rapids_maxconsumptionrate phone_battery_rapids_countcharge phone_battery_rapids_sumdurationcharge threeday#2020-07-02 00:00:00,2020-07-04 23:59:59 threeday 2020-07-02 00:00:00 2020-07-04 23:59:59 4 149.7954 0.0710868450815781 0.111113168762384 0 0 threeday#2020-07-03 00:00:00,2020-07-05 23:59:59 threeday 2020-07-03 00:00:00 2020-07-05 23:59:59 3 162.7952 0.0492745931499224 0.0502547286558745 0 0 threeday#2020-07-04 00:00:00,2020-07-06 23:59:59 threeday 2020-07-04 00:00:00 2020-07-06 23:59:59 2 110.0815 0.0449915246814979 0.0483879032392475 0 0 threeday#2020-07-05 00:00:00,2020-07-07 23:59:59 threeday 2020-07-05 00:00:00 2020-07-07 23:59:59 1 52.9991166666667 0.0377364779979038 0.0377364779979038 0 0 6. A three-day segment that repeats on a fixed day Time segment tested: label start_time length repeats_on repeats_value weekends 00:00:00 2D 23H 59M 59S wday 5 Data tested: We test 10 segments, one at the beginning of the first day, one at the end of the last day timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 2020-07-03 00:06:47.875 per_and 3 53 100 4157 23 0 2 Li-ion 2020-07-03 00:09:47.875 per_and 3 52 100 4157 23 0 2 Li-ion 2020-07-03 23:47:05.000 per_and 3 60 100 4157 23 0 2 Li-ion 2020-07-03 23:55:05.000 per_and 3 59 100 4157 23 0 2 Li-ion 2020-07-04 00:15:05.000 per_and 3 58 100 4157 23 0 2 Li-ion 2020-07-04 00:18:05.000 per_and 3 57 100 4157 23 0 2 Li-ion 2020-07-04 23:51:00.000 per_and 3 41 100 4157 23 0 2 Li-ion 2020-07-04 23:57:00.000 per_and 3 40 100 4157 23 0 2 Li-ion 2020-07-05 00:21:00.000 per_and 3 39 100 4157 23 0 2 Li-ion 2020-07-05 00:23:00.000 per_and 3 38 100 4157 23 0 2 Li-ion Output results: local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_battery_rapids_countdischarge phone_battery_rapids_sumdurationdischarge phone_battery_rapids_avgconsumptionrate phone_battery_rapids_maxconsumptionrate phone_battery_rapids_countcharge phone_battery_rapids_sumdurationcharge weekends#2020-07-03 00:00:00,2020-07-05 23:59:59 weekends 2020-07-03 00:00:00 2020-07-05 23:59:59 3 162.7952 0.0492745931499224 0.0502547286558745 0 0 7. Event segements Time segments tested: label event_timestamp length shift shift_direction device_id survey1 1587661220000 10H 10H -1 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 survey2 1587661220000 10H 5H -1 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 survey3 1587661220000 10H 0H 1 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 Data tested: We test 7 segments, one at the beginning of the first day, one at the end of the last day timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 2020-04-23 03:15:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 90 100 4157 23 0 2 Li-ion 2020-04-23 03:21:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 89 100 4157 23 0 2 Li-ion 2020-04-23 07:50:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 80 100 4157 23 0 2 Li-ion 2020-04-23 08:05:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 79 100 4157 23 0 2 Li-ion 2020-04-23 08:12:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 78 100 4157 23 0 2 Li-ion 2020-04-23 22:50:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 50 100 4157 23 0 2 Li-ion 2020-04-23 22:53:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 49 100 4157 23 0 2 Li-ion Output results: local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_battery_rapids_sumdurationcharge phone_battery_rapids_countdischarge phone_battery_rapids_sumdurationdischarge phone_battery_rapids_maxconsumptionrate phone_battery_rapids_avgconsumptionrate phone_battery_rapids_countcharge survey1#2020-04-23 03:00:20,2020-04-23 13:00:20 survey1 2020-04-23 03:00:20 2020-04-23 13:00:20 0 2 87.9985333333333 0.0384621794978634 0.0331202101231602 0 survey2#2020-04-23 08:00:20,2020-04-23 18:00:20 survey2 2020-04-23 08:00:20 2020-04-23 18:00:20 0 1 41.6659833333333 0.0480007872129103 0.0480007872129103 0 survey3#2020-04-23 13:00:20,2020-04-23 23:00:20 survey3 2020-04-23 13:00:20 2020-04-23 23:00:20 0 1 10.3498 0.0966202245454018 0.0966202245454018 0","title":"Testing"},{"location":"developers/testing/#testing","text":"The following is a simple guide to run RAPIDS\u2019 tests. All files necessary for testing are stored in the ./tests/ directory","title":"Testing"},{"location":"developers/testing/#steps-for-testing","text":"Add raw data. Add the raw data to the corresponding sensor CSV file in tests/data/external/aware_csv . Create the CSV if it does not exist. Link raw data. Make sure that you link the new raw data to a participant by using the same device_id in the data and in [DEVICE_IDS] inside their participant file ( tests/data/external/participant_files/testXX.yaml ). Create the participant file if it does not exist, and don\u2019t forget to edit [PIDS] in the config file of the time segments you are testing (see below). For simplicity, we use a participant\u2019s id ( testXX ) as their device_id . Edit the config file. Activate the sensor provider you are testing if it isn\u2019t already. Set [SENSOR][PROVIDER][COMPUTE] to TRUE in the config.yaml of the time segments you are testing: - tests/settings/frequency_config.yaml # For frequency time segments - tests/settings/periodic_config.yaml # For periodic time segments # We have not tested events time segments yet Run the pipeline and tests. You can run all time segments pipelines and their tests tests/scripts/run_tests.sh -t all You can run only the pipeline of a specific time segment and its tests tests/scripts/run_tests.sh -t frequency -a both Or, if you are working on your tests and you want to run a pipeline and its tests independently tests/scripts/run_tests.sh -t frequency -a run tests/scripts/run_tests.sh -t frequency -a test","title":"Steps for Testing"},{"location":"developers/testing/#output-example","text":"The following is a snippet of the output you should see after running your test. test_sensors_files_exist ( test_sensor_features.TestSensorFeatures ) ... periodic ok test_sensors_features_calculations ( test_sensor_features.TestSensorFeatures ) ... periodic ok test_sensors_files_exist ( test_sensor_features.TestSensorFeatures ) ... frequency ok test_sensors_features_calculations ( test_sensor_features.TestSensorFeatures ) ... frequency FAIL The results above show that the for periodic both test_sensors_files_exist and test_sensors_features_calculations passed while for frequency first test test_sensors_files_exist passed while test_sensors_features_calculations failed. Additionally, you should get the traceback of the failure (not shown here). For more information on how to implement test scripts and use unittest please see Unittest Documentation Testing of the RAPIDS sensors and features is a work-in-progress. Please see Test Cases for a list of sensors and features that have testing currently available.","title":"Output example"},{"location":"developers/testing/#how-do-we-execute-the-tests","text":"This bash script tests/scripts/run_tests.sh executes one or all pipelines for different time segment types ( frequency , periodic , and events ) as well as their tests (see below). This python script tests/scripts/run_tests.py runs the tests. It parses the involved participants and active sensor providers in the config.yaml file of the time segment type being tested. We test that the output file we expect exists and that its content matches the expected values. Example of raw data for PHONE_APPLICATIONS_FOREGROUND testing 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 \"timestamp\" , \"device_id\" , \"package_name\" , \"application_name\" , \"is_system_app\" 1593946320761 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593961974942 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593958144033 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593947228964 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1593951572326 , \"test01\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1593950554868 , \"test01\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1593964799620 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593974241305 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593969483540 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593977289581 , \"test01\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1593970763367 , \"test01\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1593964867720 , \"test01\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1593974942995 , \"test01\" , \"com.google.android.gm\" , \"Gmail\" , 0 1593986399351 , \"test01\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1594000139073 , \"test01\" , \"com.google.android.gm\" , \"Gmail\" , 0 1593994717099 , \"test01\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1593985854872 , \"test01\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1594003154390 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1594003853415 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593991680045 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1594007999202 , \"test01\" , \"com.aware.plugin.upmc.cancer\" , \"AWARE\" , 0 1593939733998 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1593933324739 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593925161482 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593936918763 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593924155524 , \"test01\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1593922625358 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593943199317 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593951550550 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593981544544 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593999779779 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593933565565 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1602475200000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602475200000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602475200000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602475200000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602475999000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602475999000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602476999000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602476999000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602476999000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602476999000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602477000000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602477000000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602477000000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478000000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602478000000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602478799000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478799000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602478000000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602478799000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478799000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602478799000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602478800000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478800000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602478800000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478800000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602480500000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602480500000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602480500000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602558000000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602558000000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602558000000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602558799000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602558799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602558000000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602558799000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602558799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602559799000 , \"test05\" , \"com.aware.plugin.upmc.cancer\" , \"AWARE\" , 0 1602559799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602559799000 , \"test05\" , \"com.aware.plugin.upmc.cancer\" , \"AWARE\" , 0 1602559799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602559800000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602559800000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602560800000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602560800000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602560800000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602560800000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602561599000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602561599000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602561600000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602561600000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602563400000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602563500000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0","title":"How do we execute the tests?"},{"location":"developers/testing/#what-cases-do-we-test","text":"The sample data includes 7 tests cases. Take phone battery as an example, on this platform, battery status 2 represents charging and battery status 4 represents discharge . 1. A daily segment instance with no battery episodes Example Input time segments: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 00:08:10.415 per_ios 4 80 100 4170 23 0 2 Li-ion 00:17:38.602 per_ios 4 77 100 4157 23 0 2 Li-ion 03:20:30.415 per_ios 2 77 100 4170 23 0 2 Li-ion 03:30:35.875 per_ios 2 80 100 4157 23 0 2 Li-ion Output results local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_battery_rapids_countdischarge phone_battery_rapids_sumdurationdischarge phone_battery_rapids_avgconsumptionrate phone_battery_rapids_maxconsumptionrate phone_battery_rapids_countcharge phone_battery_rapids_sumdurationcharge 00:00:00,00:29:59 thirtyminutes0000 2020-07-01 00:00:00 2020-07-01 00:29:59 1 21.8259833333333 0.137450851775292 0.137450851775292 0 0 00:03:00,03:29:59 thirtyminutes0006 2020-07-01 03:00:00 2020-07-01 03:29:59 0 0 0 0 1 9.49288333333333 Since there is no battery episode between 00:00:30 and 03:00:00, no result will be generated for this epoch. 2. A daily segment instance with two battery episodes (one charging, one discharge) Periodic (daily) Input time segments: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 17:59:41.434 per_ios 4 59 100 4094 23 0 2 Li-ion 18:04:14.321 per_ios 4 58 100 4157 23 0 2 Li-ion 18:07:24.456 per_ios 4 57 100 4157 23 0 2 Li-ion 20:03:03.415 per_ios 2 72 100 4170 23 0 2 Li-ion 20:05:12.434 per_ios 2 73 100 4094 23 0 2 Li-ion 20:07:24.678 per_ios 2 74 100 4157 23 0 2 Li-ion 20:10:34.875 per_ios 2 75 100 4157 23 0 2 Li-ion 21:30:04.415 per_ios 4 74 100 4170 23 0 2 Li-ion 21:32:14.434 per_ios 4 73 100 4094 23 0 2 Li-ion 21:35:23.678 per_ios 4 72 100 4157 23 0 2 Li-ion 21:37:47.875 per_ios 4 71 100 4157 23 0 2 Li-ion Output results: local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_battery_rapids_countdischarge phone_battery_rapids_sumdurationdischarge phone_battery_rapids_avgconsumptionrate phone_battery_rapids_maxconsumptionrate phone_battery_rapids_countcharge phone_battery_rapids_sumdurationcharge 18:00:00,23:59:59 evening 2020-07-01 18:00:00 2020-07-01 23:59:59 2 75.1306166666666 0.0664958369201784 0.079525673538274 1 37.5236666666667 Frequency (30 mins) Input time segments: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 20:10:34.875 fre_ios 2 75 100 4157 23 0 2 Li-ion 20:20:17.171 fre_ios 4 74 100 4170 23 0 2 Li-ion Output results local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_battery_rapids_countdischarge phone_battery_rapids_sumdurationdischarge phone_battery_rapids_avgconsumptionrate phone_battery_rapids_maxconsumptionrate phone_battery_rapids_countcharge phone_battery_rapids_sumdurationcharge 20:00:00,20:29:59 thirtyminutes0040 2020-07-01 20:00:00 2020-07-01 20:29:59 1 14.6351666666667 0.0683285693136395 0.0683285693136395 1 12.3074 3. A daily segment instance with a charging episode that spans to the next daily instance Periodic (daily) Input time segments: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 11:59:28.434 per_ios 2 63 100 4094 23 0 2 Li-ion 12:04:37.678 per_ios 2 64 100 4157 23 0 2 Li-ion Frequency (30 mins) Input time segements: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 11:59:28.434 fre_ios 2 63 100 4094 23 0 2 Li-ion 12:04:37.678 fre_ios 2 64 100 4157 23 0 2 Li-ion 4. A daily segment instance with a discharge episode that spans to the next daily instance Periodic (daily) Input time segements: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 05:59:49.434 per_ios 4 79 100 4094 23 0 2 Li-ion 06:02:19.321 per_ios 4 78 100 4157 23 0 2 Li-ion Frequency (30 mins) Input time segements: timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 17:59:41.434 fre_ios 4 59 100 4094 23 0 2 Li-ion 18:04:14.321 fre_ios 4 58 100 4157 23 0 2 Li-ion 5. Three-day segments that repeat everyday Time segment tested: label start_time length repeats_on repeats_value daily 00:00:00 23H 59M 59S every_day 0 Data tested: We test 14 segments, one at the beginning of the first day, one at the end of the last day timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 2020-07-02 00:03:47.875 per_and 3 63 100 4157 23 0 2 Li-ion 2020-07-02 00:05:47.875 per_and 3 62 100 4157 23 0 2 Li-ion 2020-07-02 23:55:47.875 per_and 3 55 100 4157 23 0 2 Li-ion 2020-07-02 23:59:47.875 per_and 3 54 100 4157 23 0 2 Li-ion 2020-07-03 00:06:47.875 per_and 3 53 100 4157 23 0 2 Li-ion 2020-07-03 00:09:47.875 per_and 3 52 100 4157 23 0 2 Li-ion 2020-07-03 23:47:05.000 per_and 3 60 100 4157 23 0 2 Li-ion 2020-07-03 23:55:05.000 per_and 3 59 100 4157 23 0 2 Li-ion 2020-07-04 00:15:05.000 per_and 3 58 100 4157 23 0 2 Li-ion 2020-07-04 00:18:05.000 per_and 3 57 100 4157 23 0 2 Li-ion 2020-07-04 23:51:00.000 per_and 3 41 100 4157 23 0 2 Li-ion 2020-07-04 23:57:00.000 per_and 3 40 100 4157 23 0 2 Li-ion 2020-07-05 00:21:00.000 per_and 3 39 100 4157 23 0 2 Li-ion 2020-07-05 00:23:00.000 per_and 3 38 100 4157 23 0 2 Li-ion Output results: local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_battery_rapids_countdischarge phone_battery_rapids_sumdurationdischarge phone_battery_rapids_avgconsumptionrate phone_battery_rapids_maxconsumptionrate phone_battery_rapids_countcharge phone_battery_rapids_sumdurationcharge threeday#2020-07-02 00:00:00,2020-07-04 23:59:59 threeday 2020-07-02 00:00:00 2020-07-04 23:59:59 4 149.7954 0.0710868450815781 0.111113168762384 0 0 threeday#2020-07-03 00:00:00,2020-07-05 23:59:59 threeday 2020-07-03 00:00:00 2020-07-05 23:59:59 3 162.7952 0.0492745931499224 0.0502547286558745 0 0 threeday#2020-07-04 00:00:00,2020-07-06 23:59:59 threeday 2020-07-04 00:00:00 2020-07-06 23:59:59 2 110.0815 0.0449915246814979 0.0483879032392475 0 0 threeday#2020-07-05 00:00:00,2020-07-07 23:59:59 threeday 2020-07-05 00:00:00 2020-07-07 23:59:59 1 52.9991166666667 0.0377364779979038 0.0377364779979038 0 0 6. A three-day segment that repeats on a fixed day Time segment tested: label start_time length repeats_on repeats_value weekends 00:00:00 2D 23H 59M 59S wday 5 Data tested: We test 10 segments, one at the beginning of the first day, one at the end of the last day timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 2020-07-03 00:06:47.875 per_and 3 53 100 4157 23 0 2 Li-ion 2020-07-03 00:09:47.875 per_and 3 52 100 4157 23 0 2 Li-ion 2020-07-03 23:47:05.000 per_and 3 60 100 4157 23 0 2 Li-ion 2020-07-03 23:55:05.000 per_and 3 59 100 4157 23 0 2 Li-ion 2020-07-04 00:15:05.000 per_and 3 58 100 4157 23 0 2 Li-ion 2020-07-04 00:18:05.000 per_and 3 57 100 4157 23 0 2 Li-ion 2020-07-04 23:51:00.000 per_and 3 41 100 4157 23 0 2 Li-ion 2020-07-04 23:57:00.000 per_and 3 40 100 4157 23 0 2 Li-ion 2020-07-05 00:21:00.000 per_and 3 39 100 4157 23 0 2 Li-ion 2020-07-05 00:23:00.000 per_and 3 38 100 4157 23 0 2 Li-ion Output results: local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_battery_rapids_countdischarge phone_battery_rapids_sumdurationdischarge phone_battery_rapids_avgconsumptionrate phone_battery_rapids_maxconsumptionrate phone_battery_rapids_countcharge phone_battery_rapids_sumdurationcharge weekends#2020-07-03 00:00:00,2020-07-05 23:59:59 weekends 2020-07-03 00:00:00 2020-07-05 23:59:59 3 162.7952 0.0492745931499224 0.0502547286558745 0 0 7. Event segements Time segments tested: label event_timestamp length shift shift_direction device_id survey1 1587661220000 10H 10H -1 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 survey2 1587661220000 10H 5H -1 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 survey3 1587661220000 10H 0H 1 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 Data tested: We test 7 segments, one at the beginning of the first day, one at the end of the last day timestamp device_id battery_status battery_level battery_scale battery_voltage battery_temperature battery_adaptor battery_health battery_technology 2020-04-23 03:15:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 90 100 4157 23 0 2 Li-ion 2020-04-23 03:21:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 89 100 4157 23 0 2 Li-ion 2020-04-23 07:50:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 80 100 4157 23 0 2 Li-ion 2020-04-23 08:05:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 79 100 4157 23 0 2 Li-ion 2020-04-23 08:12:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 78 100 4157 23 0 2 Li-ion 2020-04-23 22:50:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 50 100 4157 23 0 2 Li-ion 2020-04-23 22:53:00.000 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 3 49 100 4157 23 0 2 Li-ion Output results: local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_battery_rapids_sumdurationcharge phone_battery_rapids_countdischarge phone_battery_rapids_sumdurationdischarge phone_battery_rapids_maxconsumptionrate phone_battery_rapids_avgconsumptionrate phone_battery_rapids_countcharge survey1#2020-04-23 03:00:20,2020-04-23 13:00:20 survey1 2020-04-23 03:00:20 2020-04-23 13:00:20 0 2 87.9985333333333 0.0384621794978634 0.0331202101231602 0 survey2#2020-04-23 08:00:20,2020-04-23 18:00:20 survey2 2020-04-23 08:00:20 2020-04-23 18:00:20 0 1 41.6659833333333 0.0480007872129103 0.0480007872129103 0 survey3#2020-04-23 13:00:20,2020-04-23 23:00:20 survey3 2020-04-23 13:00:20 2020-04-23 23:00:20 0 1 10.3498 0.0966202245454018 0.0966202245454018 0","title":"What cases do we test?"},{"location":"developers/validation-schema-config/","text":"Validation schema of config.yaml \u00b6 Why do we need to validate the config.yaml ? Most of the key/values in the config.yaml are constrained to a set of possible values or types. For example [TIME_SEGMENTS][TYPE] can only be one of [\"FREQUENCY\", \"PERIODIC\", \"EVENT\"] , and [TIMEZONE] has to be a string. We should show the user an error if that\u2019s not the case. We could validate this in Python or R but since we reuse scripts and keys in multiple places, tracking these validations can be time consuming and get out of control. Thus, we do these validations through a schema and check that schema before RAPIDS starts processing any data so the user can see the error right away. Keep in mind these validations can only cover certain base cases. Some validations that require more complex logic should still be done in the respective script. For example, we can check that a CSV file path actually ends in .csv but we can only check that the file actually exists in a Python script. The structure and values of the config.yaml file are validated using a YAML schema stored in tools/config.schema.yaml . Each key in config.yaml , for example PIDS , has a corresponding entry in the schema where we can validate its type, possible values, required properties, min and max values, among other things. The config.yaml is validated against the schema every time RAPIDS runs (see the top of the Snakefile ): validate ( config , \"tools/config.schema.yaml\" ) Structure of the schema \u00b6 The schema has three main sections required , definitions , and properties . All of them are just nested key/value YAML pairs, where the value can be a primitive type ( integer , string , boolean , number ) or can be another key/value pair ( object ). required \u00b6 required lists properties that should be present in the config.yaml . We will almost always add every config.yaml key to this list (meaning that the user cannot delete any of those keys like TIMEZONE or PIDS ). definitions \u00b6 definitions lists key/values that are common to different properties so we can reuse them. You can define a key/value under definitions and use $ref to refer to it in any property . For example, every sensor like [PHONE_ACCELEROMETER] has one or more providers like RAPIDS and PANDA , these providers have some common properties like the COMPUTE flag or the SRC_SCRIPT string. Therefore we define a shared provider \u201ctemplate\u201d that is used by every provider and extended with properties exclusive to each one of them. For example: provider definition (template) The PROVIDER definition will be used later on different properties . PROVIDER : type : object required : [ COMPUTE , SRC_SCRIPT , FEATURES ] properties : COMPUTE : type : boolean FEATURES : type : [ array , object ] SRC_SCRIPT : type : string pattern : \"^.*\\\\.(py|R)$\" provider reusing and extending the template Notice that RAPIDS (a provider) uses and extends the PROVIDER template in this example. The FEATURES key is overriding the FEATURES key from the #/definitions/PROVIDER template but is keeping the validation for COMPUTE , and SRC_SCRIPT . For more details about reusing properties, go to this link PHONE_ACCELEROMETER : type : object # .. other properties PROVIDERS : type : [ \"null\" , object ] properties : RAPIDS : allOf : - $ref : \"#/definitions/PROVIDER\" - properties : FEATURES : type : array uniqueItems : True items : type : string enum : [ \"maxmagnitude\" , \"minmagnitude\" , \"avgmagnitude\" , \"medianmagnitude\" , \"stdmagnitude\" ] properties \u00b6 properties are nested key/values that describe the different components of our config.yaml file. Values can be of one or more primitive types like string , number , array , boolean and null . Values can also be another key/value pair (of type object ) that are similar to a dictionary in Python. For example, the following property validates the PIDS of our config.yaml . It checks that PIDS is an array with unique items of type string . PIDS : type : array uniqueItems : True items : type : string Modifying the schema \u00b6 Validating the config.yaml during development If you updated the schema and want to check the config.yaml is compliant, you can run the command snakemake --list-params-changes . You will see Building DAG of jobs... if there are no problems or an error message otherwise (try setting any COMPUTE flag to a string like test instead of False/True ). You can use this command without having to configure RAPIDS to process any participants or sensors. You can validate different aspects of each key/value in our config.yaml file: number/integer Including min and max values MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS : type : number minimum : 0 maximum : 1 FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD : type : integer exclusiveMinimum : 0 string Including valid values ( enum ) items : type : string enum : [ \"count\" , \"maxlux\" , \"minlux\" , \"avglux\" , \"medianlux\" , \"stdlux\" ] boolean MINUTES_DATA_USED : type : boolean array Including whether or not it should have unique values, the type of the array\u2019s elements ( strings , numbers ) and valid values ( enum ). MESSAGES_TYPES : type : array uniqueItems : True items : type : string enum : [ \"received\" , \"sent\" ] object PARENT is an object that has two properties. KID1 is one of those properties that are, in turn, another object that will reuse the \"#/definitions/PROVIDER\" definition AND also include (extend) two extra properties GRAND_KID1 of type array and GRAND_KID2 of type number . KID2 is another property of PARENT of type boolean . The schema validation looks like this PARENT : type : object properties : KID1 : allOf : - $ref : \"#/definitions/PROVIDER\" - properties : GRAND_KID1 : type : array uniqueItems : True GRAND_KID2 : type : number KID2 : type : boolean The config.yaml key that the previous schema validates looks like this: PARENT : KID1 : # These four come from the `PROVIDER` definition (template) COMPUTE : False FEATURES : [ x , y ] # an array SRC_SCRIPT : \"a path to a py or R script\" # This two come from the extension GRAND_KID1 : [ a , b ] # an array GRAND_KID2 : 5.1 # an number KID2 : True # a boolean Verifying the schema is correct \u00b6 We recommend that before you start modifying the schema you modify the config.yaml key that you want to validate with an invalid value. For example, if you want to validate that COMPUTE is boolean, you set COMPUTE: 123 . Then create your validation, run snakemake --list-params-changes and make sure your validation fails (123 is not boolean ), and then set the key to the correct value. In other words, make sure it\u2019s broken first so that you know that your validation works. Warning Be careful . You can check that the schema config.schema.yaml has a valid format by running python tools/check_schema.py . You will see this message if its structure is correct: Schema is OK . However, we don\u2019t have a way to detect typos, for example allOf will work but allOF won\u2019t (capital F ) and it won\u2019t show any error. That\u2019s why we recommend to start with an invalid key/value in your config.yaml so that you can be sure the schema validation finds the problem. Useful resources \u00b6 Read the following links to learn more about what we can validate with schemas. They are based on JSON instead of YAML schemas but the same concepts apply. Understanding JSON Schemas Specification of the JSON schema we use","title":"Validation schema of config.yaml"},{"location":"developers/validation-schema-config/#validation-schema-of-configyaml","text":"Why do we need to validate the config.yaml ? Most of the key/values in the config.yaml are constrained to a set of possible values or types. For example [TIME_SEGMENTS][TYPE] can only be one of [\"FREQUENCY\", \"PERIODIC\", \"EVENT\"] , and [TIMEZONE] has to be a string. We should show the user an error if that\u2019s not the case. We could validate this in Python or R but since we reuse scripts and keys in multiple places, tracking these validations can be time consuming and get out of control. Thus, we do these validations through a schema and check that schema before RAPIDS starts processing any data so the user can see the error right away. Keep in mind these validations can only cover certain base cases. Some validations that require more complex logic should still be done in the respective script. For example, we can check that a CSV file path actually ends in .csv but we can only check that the file actually exists in a Python script. The structure and values of the config.yaml file are validated using a YAML schema stored in tools/config.schema.yaml . Each key in config.yaml , for example PIDS , has a corresponding entry in the schema where we can validate its type, possible values, required properties, min and max values, among other things. The config.yaml is validated against the schema every time RAPIDS runs (see the top of the Snakefile ): validate ( config , \"tools/config.schema.yaml\" )","title":"Validation schema of config.yaml"},{"location":"developers/validation-schema-config/#structure-of-the-schema","text":"The schema has three main sections required , definitions , and properties . All of them are just nested key/value YAML pairs, where the value can be a primitive type ( integer , string , boolean , number ) or can be another key/value pair ( object ).","title":"Structure of the schema"},{"location":"developers/validation-schema-config/#required","text":"required lists properties that should be present in the config.yaml . We will almost always add every config.yaml key to this list (meaning that the user cannot delete any of those keys like TIMEZONE or PIDS ).","title":"required"},{"location":"developers/validation-schema-config/#definitions","text":"definitions lists key/values that are common to different properties so we can reuse them. You can define a key/value under definitions and use $ref to refer to it in any property . For example, every sensor like [PHONE_ACCELEROMETER] has one or more providers like RAPIDS and PANDA , these providers have some common properties like the COMPUTE flag or the SRC_SCRIPT string. Therefore we define a shared provider \u201ctemplate\u201d that is used by every provider and extended with properties exclusive to each one of them. For example: provider definition (template) The PROVIDER definition will be used later on different properties . PROVIDER : type : object required : [ COMPUTE , SRC_SCRIPT , FEATURES ] properties : COMPUTE : type : boolean FEATURES : type : [ array , object ] SRC_SCRIPT : type : string pattern : \"^.*\\\\.(py|R)$\" provider reusing and extending the template Notice that RAPIDS (a provider) uses and extends the PROVIDER template in this example. The FEATURES key is overriding the FEATURES key from the #/definitions/PROVIDER template but is keeping the validation for COMPUTE , and SRC_SCRIPT . For more details about reusing properties, go to this link PHONE_ACCELEROMETER : type : object # .. other properties PROVIDERS : type : [ \"null\" , object ] properties : RAPIDS : allOf : - $ref : \"#/definitions/PROVIDER\" - properties : FEATURES : type : array uniqueItems : True items : type : string enum : [ \"maxmagnitude\" , \"minmagnitude\" , \"avgmagnitude\" , \"medianmagnitude\" , \"stdmagnitude\" ]","title":"definitions"},{"location":"developers/validation-schema-config/#properties","text":"properties are nested key/values that describe the different components of our config.yaml file. Values can be of one or more primitive types like string , number , array , boolean and null . Values can also be another key/value pair (of type object ) that are similar to a dictionary in Python. For example, the following property validates the PIDS of our config.yaml . It checks that PIDS is an array with unique items of type string . PIDS : type : array uniqueItems : True items : type : string","title":"properties"},{"location":"developers/validation-schema-config/#modifying-the-schema","text":"Validating the config.yaml during development If you updated the schema and want to check the config.yaml is compliant, you can run the command snakemake --list-params-changes . You will see Building DAG of jobs... if there are no problems or an error message otherwise (try setting any COMPUTE flag to a string like test instead of False/True ). You can use this command without having to configure RAPIDS to process any participants or sensors. You can validate different aspects of each key/value in our config.yaml file: number/integer Including min and max values MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS : type : number minimum : 0 maximum : 1 FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD : type : integer exclusiveMinimum : 0 string Including valid values ( enum ) items : type : string enum : [ \"count\" , \"maxlux\" , \"minlux\" , \"avglux\" , \"medianlux\" , \"stdlux\" ] boolean MINUTES_DATA_USED : type : boolean array Including whether or not it should have unique values, the type of the array\u2019s elements ( strings , numbers ) and valid values ( enum ). MESSAGES_TYPES : type : array uniqueItems : True items : type : string enum : [ \"received\" , \"sent\" ] object PARENT is an object that has two properties. KID1 is one of those properties that are, in turn, another object that will reuse the \"#/definitions/PROVIDER\" definition AND also include (extend) two extra properties GRAND_KID1 of type array and GRAND_KID2 of type number . KID2 is another property of PARENT of type boolean . The schema validation looks like this PARENT : type : object properties : KID1 : allOf : - $ref : \"#/definitions/PROVIDER\" - properties : GRAND_KID1 : type : array uniqueItems : True GRAND_KID2 : type : number KID2 : type : boolean The config.yaml key that the previous schema validates looks like this: PARENT : KID1 : # These four come from the `PROVIDER` definition (template) COMPUTE : False FEATURES : [ x , y ] # an array SRC_SCRIPT : \"a path to a py or R script\" # This two come from the extension GRAND_KID1 : [ a , b ] # an array GRAND_KID2 : 5.1 # an number KID2 : True # a boolean","title":"Modifying the schema"},{"location":"developers/validation-schema-config/#verifying-the-schema-is-correct","text":"We recommend that before you start modifying the schema you modify the config.yaml key that you want to validate with an invalid value. For example, if you want to validate that COMPUTE is boolean, you set COMPUTE: 123 . Then create your validation, run snakemake --list-params-changes and make sure your validation fails (123 is not boolean ), and then set the key to the correct value. In other words, make sure it\u2019s broken first so that you know that your validation works. Warning Be careful . You can check that the schema config.schema.yaml has a valid format by running python tools/check_schema.py . You will see this message if its structure is correct: Schema is OK . However, we don\u2019t have a way to detect typos, for example allOf will work but allOF won\u2019t (capital F ) and it won\u2019t show any error. That\u2019s why we recommend to start with an invalid key/value in your config.yaml so that you can be sure the schema validation finds the problem.","title":"Verifying the schema is correct"},{"location":"developers/validation-schema-config/#useful-resources","text":"Read the following links to learn more about what we can validate with schemas. They are based on JSON instead of YAML schemas but the same concepts apply. Understanding JSON Schemas Specification of the JSON schema we use","title":"Useful resources"},{"location":"developers/virtual-environments/","text":"Python Virtual Environment \u00b6 Add new packages \u00b6 Try to install any new package using conda install -c CHANNEL PACKAGE_NAME (you can use pip if the package is only available there). Make sure your Python virtual environment is active ( conda activate YOUR_ENV ). Remove packages \u00b6 Uninstall packages using the same manager you used to install them conda remove PACKAGE_NAME or pip uninstall PACKAGE_NAME Updating all packages \u00b6 Make sure your Python virtual environment is active ( conda activate YOUR_ENV ), then run conda update --all Update your conda environment.yaml \u00b6 After installing or removing a package you can use the following command in your terminal to update your environment.yaml before publishing your pipeline. Note that we ignore the package version for libfortran and mkl to keep compatibility with Linux: conda env export --no-builds | sed 's/^.*libgfortran.*$/ - libgfortran/' | sed 's/^.*mkl=.*$/ - mkl/' > environment.yml R Virtual Environment \u00b6 Add new packages \u00b6 Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::install(\"PACKAGE_NAME\") Remove packages \u00b6 Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::remove(\"PACKAGE_NAME\") Updating all packages \u00b6 Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::update() Update your R renv.lock \u00b6 After installing or removing a package you can use the following command in your terminal to update your renv.lock before publishing your pipeline. Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::snapshot() (renv will ask you to confirm any updates to this file)","title":"Virtual Environments"},{"location":"developers/virtual-environments/#python-virtual-environment","text":"","title":"Python Virtual Environment"},{"location":"developers/virtual-environments/#add-new-packages","text":"Try to install any new package using conda install -c CHANNEL PACKAGE_NAME (you can use pip if the package is only available there). Make sure your Python virtual environment is active ( conda activate YOUR_ENV ).","title":"Add new packages"},{"location":"developers/virtual-environments/#remove-packages","text":"Uninstall packages using the same manager you used to install them conda remove PACKAGE_NAME or pip uninstall PACKAGE_NAME","title":"Remove packages"},{"location":"developers/virtual-environments/#updating-all-packages","text":"Make sure your Python virtual environment is active ( conda activate YOUR_ENV ), then run conda update --all","title":"Updating all packages"},{"location":"developers/virtual-environments/#update-your-conda-environmentyaml","text":"After installing or removing a package you can use the following command in your terminal to update your environment.yaml before publishing your pipeline. Note that we ignore the package version for libfortran and mkl to keep compatibility with Linux: conda env export --no-builds | sed 's/^.*libgfortran.*$/ - libgfortran/' | sed 's/^.*mkl=.*$/ - mkl/' > environment.yml","title":"Update your conda environment.yaml"},{"location":"developers/virtual-environments/#r-virtual-environment","text":"","title":"R Virtual Environment"},{"location":"developers/virtual-environments/#add-new-packages_1","text":"Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::install(\"PACKAGE_NAME\")","title":"Add new packages"},{"location":"developers/virtual-environments/#remove-packages_1","text":"Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::remove(\"PACKAGE_NAME\")","title":"Remove packages"},{"location":"developers/virtual-environments/#updating-all-packages_1","text":"Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::update()","title":"Updating all packages"},{"location":"developers/virtual-environments/#update-your-r-renvlock","text":"After installing or removing a package you can use the following command in your terminal to update your renv.lock before publishing your pipeline. Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::snapshot() (renv will ask you to confirm any updates to this file)","title":"Update your R renv.lock"},{"location":"features/add-new-features/","text":"Add New Features \u00b6 Hint We recommend reading the Behavioral Features Introduction before reading this page. You can implement new features in Python or R scripts. You won\u2019t have to deal with time zones, dates, times, data cleaning, or preprocessing. The data that RAPIDS pipes to your feature extraction code are ready to process. New Features for Existing Sensors \u00b6 You can add new features to any existing sensors (see list below) by adding a new provider in three steps: Modify the config.yaml file Create your feature provider script Implement your features extraction code As a tutorial, we will add a new provider for PHONE_ACCELEROMETER called VEGA that extracts feature1 , feature2 , feature3 with a Python script that requires a parameter from the user called MY_PARAMETER . Existing Sensors An existing sensor of any device with a configuration entry in config.yaml : Smartphone (AWARE) Phone Accelerometer Phone Activity Recognition Phone Applications Crashes Phone Applications Foreground Phone Applications Notifications Phone Battery Phone Bluetooth Phone Calls Phone Conversation Phone Data Yield Phone Keyboard Phone Light Phone Locations Phone Log Phone Messages Phone Screen Phone WiFI Connected Phone WiFI Visible Fitbit Fitbit Data Yield Fitbit Heart Rate Summary Fitbit Heart Rate Intraday Fitbit Sleep Summary Fitbit Sleep Intraday Fitbit Steps Summary Fitbit Steps Intraday Empatica Empatica Accelerometer Empatica Heart Rate Empatica Temperature Empatica Electrodermal Activity Empatica Blood Volume Pulse Empatica Inter Beat Interval Empatica Tags Modify the config.yaml file \u00b6 In this step, you need to add your provider configuration section under the relevant sensor in config.yaml . See our example for our tutorial\u2019s VEGA provider for PHONE_ACCELEROMETER : Example configuration for a new accelerometer provider VEGA PHONE_ACCELEROMETER : CONTAINER : accelerometer PROVIDERS : RAPIDS : # this is a feature provider COMPUTE : False ... PANDA : # this is another feature provider COMPUTE : False ... VEGA : # this is our new feature provider COMPUTE : False FEATURES : [ \"feature1\" , \"feature2\" , \"feature3\" ] MY_PARAMTER : a_string SRC_SCRIPT : src/features/phone_accelerometer/vega/main.py Key Description [COMPUTE] Flag to activate/deactivate your provider [FEATURES] List of features your provider supports. Your provider code should only return the features on this list [MY_PARAMTER] An arbitrary parameter that our example provider VEGA needs. This can be a boolean, integer, float, string, or an array of any of such types. [SRC_SCRIPT] The relative path from RAPIDS\u2019 root folder to an script that computes the features for this provider. It can be implemented in R or Python. Create a feature provider script \u00b6 Create your feature Python or R script called main.py or main.R in the correct folder, src/feature/[sensorname]/[providername]/ . RAPIDS automatically loads and executes it based on the config key [SRC_SCRIPT] you added in the last step. For our example, this script is: src/feature/phone_accelerometer/vega/main.py Implement your feature extraction code \u00b6 Every feature script ( main.[py|R] ) needs a [providername]_features function with specific parameters. RAPIDS calls this function with the sensor data ready to process and with other functions and arguments you will need. Python function def [ providername ] _features ( sensor_data_files , time_segment , provider , filter_data_by_segment , * args , ** kwargs ): # empty for now return ( your_features_df ) R function [ providername ] _ features <- function ( sensor_data , time_segment , provider ){ # empty for now return ( your_features_df ) } Parameter Description sensor_data_files Path to the CSV file containing the data of a single participant. This data has been cleaned and preprocessed. Your function will be automatically called for each participant in your study (in the [PIDS] array in config.yaml ) time_segment The label of the time segment that should be processed. provider The parameters you configured for your provider in config.yaml will be available in this variable as a dictionary in Python or a list in R. In our example this dictionary contains {MY_PARAMETER:\"a_string\"} filter_data_by_segment Python only. A function that you will use to filter your data. In R this function is already available in the environment. *args Python only. Not used for now **kwargs Python only. Not used for now The next step is to implement the code that computes your behavioral features in your provider script\u2019s function. As with any other script, this function can call other auxiliary methods, but in general terms, it should have three stages: 1. Read a participant\u2019s data by loading the CSV data stored in the file pointed by sensor_data_files acc_data = pd . read_csv ( sensor_data_files [ \"sensor_data\" ]) Note that the phone\u2019s battery, screen, and activity recognition data are given as episodes instead of event rows (for example, start and end timestamps of the periods the phone screen was on) 2. Filter your data to process only those rows that belong to time_segment This step is only one line of code, but keep reading to understand why we need it. acc_data = filter_data_by_segment ( acc_data , time_segment ) You should use the filter_data_by_segment() function to process and group those rows that belong to each of the time segments RAPIDS could be configured with . Let\u2019s understand the filter_data_by_segment() function with an example. A RAPIDS user can extract features on any arbitrary time segment . A time segment is a period that has a label and one or more instances. For example, the user (or you) could have requested features on a daily, weekly, and weekend basis for p01 . The labels are arbitrary, and the instances depend on the days a participant was monitored for: the daily segment could be named my_days and if p01 was monitored for 14 days, it would have 14 instances the weekly segment could be named my_weeks and if p01 was monitored for 14 days, it would have 2 instances. the weekend segment could be named my_weekends and if p01 was monitored for 14 days, it would have 2 instances. For this example, RAPIDS will call your provider function three times for p01 , once where time_segment is my_days , once where time_segment is my_weeks , and once where time_segment is my_weekends . In this example, not every row in p01 \u2018s data needs to take part in the feature computation for either segment and the rows need to be grouped differently. Thus filter_data_by_segment() comes in handy, it will return a data frame that contains the rows that were logged during a time segment plus an extra column called local_segment . This new column will have as many unique values as time segment instances exist (14, 2, and 2 for our p01 \u2018s my_days , my_weeks , and my_weekends examples). After filtering, you should group the data frame by this column and compute any desired features , for example: acc_features [ \"maxmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . max () The reason RAPIDS does not filter the participant\u2019s data set for you is because your code might need to compute something based on a participant\u2019s complete dataset before computing their features. For example, you might want to identify the number that called a participant the most throughout the study before computing a feature with the number of calls the participant received from that number. 3. Return a data frame with your features After filtering, grouping your data, and computing your features, your provider function should return a data frame that has: One row per time segment instance (e.g., 14 our p01 \u2018s my_days example) The local_segment column added by filter_data_by_segment() One column per feature. The name of your features should only contain letters or numbers ( feature1 ) by convention. RAPIDS automatically adds the correct sensor and provider prefix; in our example, this prefix is phone_accelerometr_vega_ . PHONE_ACCELEROMETER Provider Example For your reference, this our own provider ( RAPIDS ) for PHONE_ACCELEROMETER that computes five acceleration features import pandas as pd import numpy as np def rapids_features ( sensor_data_files , time_segment , provider , filter_data_by_segment , * args , ** kwargs ): acc_data = pd . read_csv ( sensor_data_files [ \"sensor_data\" ]) requested_features = provider [ \"FEATURES\" ] # name of the features this function can compute base_features_names = [ \"maxmagnitude\" , \"minmagnitude\" , \"avgmagnitude\" , \"medianmagnitude\" , \"stdmagnitude\" ] # the subset of requested features this function can compute features_to_compute = list ( set ( requested_features ) & set ( base_features_names )) acc_features = pd . DataFrame ( columns = [ \"local_segment\" ] + features_to_compute ) if not acc_data . empty : acc_data = filter_data_by_segment ( acc_data , time_segment ) if not acc_data . empty : acc_features = pd . DataFrame () # get magnitude related features: magnitude = sqrt(x^2+y^2+z^2) magnitude = acc_data . apply ( lambda row : np . sqrt ( row [ \"double_values_0\" ] ** 2 + row [ \"double_values_1\" ] ** 2 + row [ \"double_values_2\" ] ** 2 ), axis = 1 ) acc_data = acc_data . assign ( magnitude = magnitude . values ) if \"maxmagnitude\" in features_to_compute : acc_features [ \"maxmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . max () if \"minmagnitude\" in features_to_compute : acc_features [ \"minmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . min () if \"avgmagnitude\" in features_to_compute : acc_features [ \"avgmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . mean () if \"medianmagnitude\" in features_to_compute : acc_features [ \"medianmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . median () if \"stdmagnitude\" in features_to_compute : acc_features [ \"stdmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . std () acc_features = acc_features . reset_index () return acc_features New Features for Non-Existing Sensors \u00b6 If you want to add features for a device or a sensor that we do not support at the moment (those that do not appear in the \"Existing Sensors\" list above), contact us or request it on Slack and we can add the necessary code so you can follow the instructions above.","title":"Add New Features"},{"location":"features/add-new-features/#add-new-features","text":"Hint We recommend reading the Behavioral Features Introduction before reading this page. You can implement new features in Python or R scripts. You won\u2019t have to deal with time zones, dates, times, data cleaning, or preprocessing. The data that RAPIDS pipes to your feature extraction code are ready to process.","title":"Add New Features"},{"location":"features/add-new-features/#new-features-for-existing-sensors","text":"You can add new features to any existing sensors (see list below) by adding a new provider in three steps: Modify the config.yaml file Create your feature provider script Implement your features extraction code As a tutorial, we will add a new provider for PHONE_ACCELEROMETER called VEGA that extracts feature1 , feature2 , feature3 with a Python script that requires a parameter from the user called MY_PARAMETER . Existing Sensors An existing sensor of any device with a configuration entry in config.yaml : Smartphone (AWARE) Phone Accelerometer Phone Activity Recognition Phone Applications Crashes Phone Applications Foreground Phone Applications Notifications Phone Battery Phone Bluetooth Phone Calls Phone Conversation Phone Data Yield Phone Keyboard Phone Light Phone Locations Phone Log Phone Messages Phone Screen Phone WiFI Connected Phone WiFI Visible Fitbit Fitbit Data Yield Fitbit Heart Rate Summary Fitbit Heart Rate Intraday Fitbit Sleep Summary Fitbit Sleep Intraday Fitbit Steps Summary Fitbit Steps Intraday Empatica Empatica Accelerometer Empatica Heart Rate Empatica Temperature Empatica Electrodermal Activity Empatica Blood Volume Pulse Empatica Inter Beat Interval Empatica Tags","title":"New Features for Existing Sensors"},{"location":"features/add-new-features/#modify-the-configyaml-file","text":"In this step, you need to add your provider configuration section under the relevant sensor in config.yaml . See our example for our tutorial\u2019s VEGA provider for PHONE_ACCELEROMETER : Example configuration for a new accelerometer provider VEGA PHONE_ACCELEROMETER : CONTAINER : accelerometer PROVIDERS : RAPIDS : # this is a feature provider COMPUTE : False ... PANDA : # this is another feature provider COMPUTE : False ... VEGA : # this is our new feature provider COMPUTE : False FEATURES : [ \"feature1\" , \"feature2\" , \"feature3\" ] MY_PARAMTER : a_string SRC_SCRIPT : src/features/phone_accelerometer/vega/main.py Key Description [COMPUTE] Flag to activate/deactivate your provider [FEATURES] List of features your provider supports. Your provider code should only return the features on this list [MY_PARAMTER] An arbitrary parameter that our example provider VEGA needs. This can be a boolean, integer, float, string, or an array of any of such types. [SRC_SCRIPT] The relative path from RAPIDS\u2019 root folder to an script that computes the features for this provider. It can be implemented in R or Python.","title":"Modify the config.yaml file"},{"location":"features/add-new-features/#create-a-feature-provider-script","text":"Create your feature Python or R script called main.py or main.R in the correct folder, src/feature/[sensorname]/[providername]/ . RAPIDS automatically loads and executes it based on the config key [SRC_SCRIPT] you added in the last step. For our example, this script is: src/feature/phone_accelerometer/vega/main.py","title":"Create a feature provider script"},{"location":"features/add-new-features/#implement-your-feature-extraction-code","text":"Every feature script ( main.[py|R] ) needs a [providername]_features function with specific parameters. RAPIDS calls this function with the sensor data ready to process and with other functions and arguments you will need. Python function def [ providername ] _features ( sensor_data_files , time_segment , provider , filter_data_by_segment , * args , ** kwargs ): # empty for now return ( your_features_df ) R function [ providername ] _ features <- function ( sensor_data , time_segment , provider ){ # empty for now return ( your_features_df ) } Parameter Description sensor_data_files Path to the CSV file containing the data of a single participant. This data has been cleaned and preprocessed. Your function will be automatically called for each participant in your study (in the [PIDS] array in config.yaml ) time_segment The label of the time segment that should be processed. provider The parameters you configured for your provider in config.yaml will be available in this variable as a dictionary in Python or a list in R. In our example this dictionary contains {MY_PARAMETER:\"a_string\"} filter_data_by_segment Python only. A function that you will use to filter your data. In R this function is already available in the environment. *args Python only. Not used for now **kwargs Python only. Not used for now The next step is to implement the code that computes your behavioral features in your provider script\u2019s function. As with any other script, this function can call other auxiliary methods, but in general terms, it should have three stages: 1. Read a participant\u2019s data by loading the CSV data stored in the file pointed by sensor_data_files acc_data = pd . read_csv ( sensor_data_files [ \"sensor_data\" ]) Note that the phone\u2019s battery, screen, and activity recognition data are given as episodes instead of event rows (for example, start and end timestamps of the periods the phone screen was on) 2. Filter your data to process only those rows that belong to time_segment This step is only one line of code, but keep reading to understand why we need it. acc_data = filter_data_by_segment ( acc_data , time_segment ) You should use the filter_data_by_segment() function to process and group those rows that belong to each of the time segments RAPIDS could be configured with . Let\u2019s understand the filter_data_by_segment() function with an example. A RAPIDS user can extract features on any arbitrary time segment . A time segment is a period that has a label and one or more instances. For example, the user (or you) could have requested features on a daily, weekly, and weekend basis for p01 . The labels are arbitrary, and the instances depend on the days a participant was monitored for: the daily segment could be named my_days and if p01 was monitored for 14 days, it would have 14 instances the weekly segment could be named my_weeks and if p01 was monitored for 14 days, it would have 2 instances. the weekend segment could be named my_weekends and if p01 was monitored for 14 days, it would have 2 instances. For this example, RAPIDS will call your provider function three times for p01 , once where time_segment is my_days , once where time_segment is my_weeks , and once where time_segment is my_weekends . In this example, not every row in p01 \u2018s data needs to take part in the feature computation for either segment and the rows need to be grouped differently. Thus filter_data_by_segment() comes in handy, it will return a data frame that contains the rows that were logged during a time segment plus an extra column called local_segment . This new column will have as many unique values as time segment instances exist (14, 2, and 2 for our p01 \u2018s my_days , my_weeks , and my_weekends examples). After filtering, you should group the data frame by this column and compute any desired features , for example: acc_features [ \"maxmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . max () The reason RAPIDS does not filter the participant\u2019s data set for you is because your code might need to compute something based on a participant\u2019s complete dataset before computing their features. For example, you might want to identify the number that called a participant the most throughout the study before computing a feature with the number of calls the participant received from that number. 3. Return a data frame with your features After filtering, grouping your data, and computing your features, your provider function should return a data frame that has: One row per time segment instance (e.g., 14 our p01 \u2018s my_days example) The local_segment column added by filter_data_by_segment() One column per feature. The name of your features should only contain letters or numbers ( feature1 ) by convention. RAPIDS automatically adds the correct sensor and provider prefix; in our example, this prefix is phone_accelerometr_vega_ . PHONE_ACCELEROMETER Provider Example For your reference, this our own provider ( RAPIDS ) for PHONE_ACCELEROMETER that computes five acceleration features import pandas as pd import numpy as np def rapids_features ( sensor_data_files , time_segment , provider , filter_data_by_segment , * args , ** kwargs ): acc_data = pd . read_csv ( sensor_data_files [ \"sensor_data\" ]) requested_features = provider [ \"FEATURES\" ] # name of the features this function can compute base_features_names = [ \"maxmagnitude\" , \"minmagnitude\" , \"avgmagnitude\" , \"medianmagnitude\" , \"stdmagnitude\" ] # the subset of requested features this function can compute features_to_compute = list ( set ( requested_features ) & set ( base_features_names )) acc_features = pd . DataFrame ( columns = [ \"local_segment\" ] + features_to_compute ) if not acc_data . empty : acc_data = filter_data_by_segment ( acc_data , time_segment ) if not acc_data . empty : acc_features = pd . DataFrame () # get magnitude related features: magnitude = sqrt(x^2+y^2+z^2) magnitude = acc_data . apply ( lambda row : np . sqrt ( row [ \"double_values_0\" ] ** 2 + row [ \"double_values_1\" ] ** 2 + row [ \"double_values_2\" ] ** 2 ), axis = 1 ) acc_data = acc_data . assign ( magnitude = magnitude . values ) if \"maxmagnitude\" in features_to_compute : acc_features [ \"maxmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . max () if \"minmagnitude\" in features_to_compute : acc_features [ \"minmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . min () if \"avgmagnitude\" in features_to_compute : acc_features [ \"avgmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . mean () if \"medianmagnitude\" in features_to_compute : acc_features [ \"medianmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . median () if \"stdmagnitude\" in features_to_compute : acc_features [ \"stdmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . std () acc_features = acc_features . reset_index () return acc_features","title":"Implement your feature extraction code"},{"location":"features/add-new-features/#new-features-for-non-existing-sensors","text":"If you want to add features for a device or a sensor that we do not support at the moment (those that do not appear in the \"Existing Sensors\" list above), contact us or request it on Slack and we can add the necessary code so you can follow the instructions above.","title":"New Features for Non-Existing Sensors"},{"location":"features/empatica-accelerometer/","text":"Empatica Accelerometer \u00b6 Sensor parameters description for [EMPATICA_ACCELEROMETER] : Key Description [CONTAINER] Name of the CSV file containing accelerometer data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. DBDP provider \u00b6 Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_accelerometer_raw.csv - data/raw/ { pid } /empatica_accelerometer_with_datetime.csv - data/interim/ { pid } /empatica_accelerometer_features/empatica_accelerometer_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_accelerometer.csv Parameters description for [EMPATICA_ACCELEROMETER][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_ACCELEROMETER features from the DBDP provider [FEATURES] Features to be computed, see table below Features description for [EMPATICA_ACCELEROMETER][PROVIDERS][RAPDBDPIDS] : Feature Units Description maxmagnitude m/s 2 The maximum magnitude of acceleration ( \\(\\|acceleration\\| = \\sqrt{x^2 + y^2 + z^2}\\) ). minmagnitude m/s 2 The minimum magnitude of acceleration. avgmagnitude m/s 2 The average magnitude of acceleration. medianmagnitude m/s 2 The median magnitude of acceleration. stdmagnitude m/s 2 The standard deviation of acceleration. Assumptions/Observations Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is too big to fit in memory. We are considering different alternatives to overcome this problem, if this is something you need, get in touch and we can discuss how to implement it.","title":"Empatica Accelerometer"},{"location":"features/empatica-accelerometer/#empatica-accelerometer","text":"Sensor parameters description for [EMPATICA_ACCELEROMETER] : Key Description [CONTAINER] Name of the CSV file containing accelerometer data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute.","title":"Empatica Accelerometer"},{"location":"features/empatica-accelerometer/#dbdp-provider","text":"Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_accelerometer_raw.csv - data/raw/ { pid } /empatica_accelerometer_with_datetime.csv - data/interim/ { pid } /empatica_accelerometer_features/empatica_accelerometer_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_accelerometer.csv Parameters description for [EMPATICA_ACCELEROMETER][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_ACCELEROMETER features from the DBDP provider [FEATURES] Features to be computed, see table below Features description for [EMPATICA_ACCELEROMETER][PROVIDERS][RAPDBDPIDS] : Feature Units Description maxmagnitude m/s 2 The maximum magnitude of acceleration ( \\(\\|acceleration\\| = \\sqrt{x^2 + y^2 + z^2}\\) ). minmagnitude m/s 2 The minimum magnitude of acceleration. avgmagnitude m/s 2 The average magnitude of acceleration. medianmagnitude m/s 2 The median magnitude of acceleration. stdmagnitude m/s 2 The standard deviation of acceleration. Assumptions/Observations Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is too big to fit in memory. We are considering different alternatives to overcome this problem, if this is something you need, get in touch and we can discuss how to implement it.","title":"DBDP provider"},{"location":"features/empatica-blood-volume-pulse/","text":"Empatica Blood Volume Pulse \u00b6 Sensor parameters description for [EMPATICA_BLOOD_VOLUME_PULSE] : Key Description [CONTAINER] Name of the CSV file containing blood volume pulse data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. DBDP provider \u00b6 Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_blood_volume_pulse_raw.csv - data/raw/ { pid } /empatica_blood_volume_pulse_with_datetime.csv - data/interim/ { pid } /empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_blood_volume_pulse.csv Parameters description for [EMPATICA_BLOOD_VOLUME_PULSE][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_BLOOD_VOLUME_PULSE features from the DBDP provider [FEATURES] Features to be computed from blood volume pulse intraday data, see table below Features description for [EMPATICA_BLOOD_VOLUME_PULSE][PROVIDERS][DBDP] : Feature Units Description maxbvp - The maximum blood volume pulse during a time segment. minbvp - The minimum blood volume pulse during a time segment. avgbvp - The average blood volume pulse during a time segment. medianbvp - The median of blood volume pulse during a time segment. modebvp - The mode of blood volume pulse during a time segment. stdbvp - The standard deviation of blood volume pulse during a time segment. diffmaxmodebvp - The difference between the maximum and mode blood volume pulse during a time segment. diffminmodebvp - The difference between the mode and minimum blood volume pulse during a time segment. entropybvp nats Shannon\u2019s entropy measurement based on blood volume pulse during a time segment. Assumptions/Observations For more information about BVP read this .","title":"Empatica Blood Volume Pulse"},{"location":"features/empatica-blood-volume-pulse/#empatica-blood-volume-pulse","text":"Sensor parameters description for [EMPATICA_BLOOD_VOLUME_PULSE] : Key Description [CONTAINER] Name of the CSV file containing blood volume pulse data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute.","title":"Empatica Blood Volume Pulse"},{"location":"features/empatica-blood-volume-pulse/#dbdp-provider","text":"Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_blood_volume_pulse_raw.csv - data/raw/ { pid } /empatica_blood_volume_pulse_with_datetime.csv - data/interim/ { pid } /empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_blood_volume_pulse.csv Parameters description for [EMPATICA_BLOOD_VOLUME_PULSE][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_BLOOD_VOLUME_PULSE features from the DBDP provider [FEATURES] Features to be computed from blood volume pulse intraday data, see table below Features description for [EMPATICA_BLOOD_VOLUME_PULSE][PROVIDERS][DBDP] : Feature Units Description maxbvp - The maximum blood volume pulse during a time segment. minbvp - The minimum blood volume pulse during a time segment. avgbvp - The average blood volume pulse during a time segment. medianbvp - The median of blood volume pulse during a time segment. modebvp - The mode of blood volume pulse during a time segment. stdbvp - The standard deviation of blood volume pulse during a time segment. diffmaxmodebvp - The difference between the maximum and mode blood volume pulse during a time segment. diffminmodebvp - The difference between the mode and minimum blood volume pulse during a time segment. entropybvp nats Shannon\u2019s entropy measurement based on blood volume pulse during a time segment. Assumptions/Observations For more information about BVP read this .","title":"DBDP provider"},{"location":"features/empatica-electrodermal-activity/","text":"Empatica Electrodermal Activity \u00b6 Sensor parameters description for [EMPATICA_ELECTRODERMAL_ACTIVITY] : Key Description [CONTAINER] Name of the CSV file containing electrodermal activity data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. DBDP provider \u00b6 Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_electrodermal_activity_raw.csv - data/raw/ { pid } /empatica_electrodermal_activity_with_datetime.csv - data/interim/ { pid } /empatica_electrodermal_activity_features/empatica_electrodermal activity_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_electrodermal_activity.csv Parameters description for [EMPATICA_ELECTRODERMAL_ACTIVITY][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_ELECTRODERMAL_ACTIVITY features from the DBDP provider [FEATURES] Features to be computed from electrodermal activity intraday data, see table below Features description for [EMPATICA_ELECTRODERMAL ACTIVITY][PROVIDERS][DBDP] : Feature Units Description maxeda microsiemens The maximum electrical conductance during a time segment. mineda microsiemens The minimum electrical conductance during a time segment. avgeda microsiemens The average electrical conductance during a time segment. medianeda microsiemens The median of electrical conductance during a time segment. modeeda microsiemens The mode of electrical conductance during a time segment. stdeda microsiemens The standard deviation of electrical conductance during a time segment. diffmaxmodeeda microsiemens The difference between the maximum and mode electrical conductance during a time segment. diffminmodeeda microsiemens The difference between the mode and minimum electrical conductance during a time segment. entropyeda nats Shannon\u2019s entropy measurement based on electrical conductance during a time segment. Assumptions/Observations None","title":"Empatica Electrodermal Activity"},{"location":"features/empatica-electrodermal-activity/#empatica-electrodermal-activity","text":"Sensor parameters description for [EMPATICA_ELECTRODERMAL_ACTIVITY] : Key Description [CONTAINER] Name of the CSV file containing electrodermal activity data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute.","title":"Empatica Electrodermal Activity"},{"location":"features/empatica-electrodermal-activity/#dbdp-provider","text":"Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_electrodermal_activity_raw.csv - data/raw/ { pid } /empatica_electrodermal_activity_with_datetime.csv - data/interim/ { pid } /empatica_electrodermal_activity_features/empatica_electrodermal activity_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_electrodermal_activity.csv Parameters description for [EMPATICA_ELECTRODERMAL_ACTIVITY][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_ELECTRODERMAL_ACTIVITY features from the DBDP provider [FEATURES] Features to be computed from electrodermal activity intraday data, see table below Features description for [EMPATICA_ELECTRODERMAL ACTIVITY][PROVIDERS][DBDP] : Feature Units Description maxeda microsiemens The maximum electrical conductance during a time segment. mineda microsiemens The minimum electrical conductance during a time segment. avgeda microsiemens The average electrical conductance during a time segment. medianeda microsiemens The median of electrical conductance during a time segment. modeeda microsiemens The mode of electrical conductance during a time segment. stdeda microsiemens The standard deviation of electrical conductance during a time segment. diffmaxmodeeda microsiemens The difference between the maximum and mode electrical conductance during a time segment. diffminmodeeda microsiemens The difference between the mode and minimum electrical conductance during a time segment. entropyeda nats Shannon\u2019s entropy measurement based on electrical conductance during a time segment. Assumptions/Observations None","title":"DBDP provider"},{"location":"features/empatica-heartrate/","text":"Empatica Heart Rate \u00b6 Sensor parameters description for [EMPATICA_HEARTRATE] : Key Description [CONTAINER] Name of the CSV file containing heart rate data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. DBDP provider \u00b6 Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_heartrate_raw.csv - data/raw/ { pid } /empatica_heartrate_with_datetime.csv - data/interim/ { pid } /empatica_heartrate_features/empatica_heartrate_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_heartrate.csv Parameters description for [EMPATICA_HEARTRATE][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_HEARTRATE features from the DBDP provider [FEATURES] Features to be computed from heart rate intraday data, see table below Features description for [EMPATICA_HEARTRATE][PROVIDERS][DBDP] : Feature Units Description maxhr beats The maximum heart rate during a time segment. minhr beats The minimum heart rate during a time segment. avghr beats The average heart rate during a time segment. medianhr beats The median of heart rate during a time segment. modehr beats The mode of heart rate during a time segment. stdhr beats The standard deviation of heart rate during a time segment. diffmaxmodehr beats The difference between the maximum and mode heart rate during a time segment. diffminmodehr beats The difference between the mode and minimum heart rate during a time segment. entropyhr nats Shannon\u2019s entropy measurement based on heart rate during a time segment. Assumptions/Observations We extract the previous features based on the average heart rate values computed in 10-second windows .","title":"Empatica Heart Rate"},{"location":"features/empatica-heartrate/#empatica-heart-rate","text":"Sensor parameters description for [EMPATICA_HEARTRATE] : Key Description [CONTAINER] Name of the CSV file containing heart rate data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute.","title":"Empatica Heart Rate"},{"location":"features/empatica-heartrate/#dbdp-provider","text":"Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_heartrate_raw.csv - data/raw/ { pid } /empatica_heartrate_with_datetime.csv - data/interim/ { pid } /empatica_heartrate_features/empatica_heartrate_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_heartrate.csv Parameters description for [EMPATICA_HEARTRATE][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_HEARTRATE features from the DBDP provider [FEATURES] Features to be computed from heart rate intraday data, see table below Features description for [EMPATICA_HEARTRATE][PROVIDERS][DBDP] : Feature Units Description maxhr beats The maximum heart rate during a time segment. minhr beats The minimum heart rate during a time segment. avghr beats The average heart rate during a time segment. medianhr beats The median of heart rate during a time segment. modehr beats The mode of heart rate during a time segment. stdhr beats The standard deviation of heart rate during a time segment. diffmaxmodehr beats The difference between the maximum and mode heart rate during a time segment. diffminmodehr beats The difference between the mode and minimum heart rate during a time segment. entropyhr nats Shannon\u2019s entropy measurement based on heart rate during a time segment. Assumptions/Observations We extract the previous features based on the average heart rate values computed in 10-second windows .","title":"DBDP provider"},{"location":"features/empatica-inter-beat-interval/","text":"Empatica Inter Beat Interval \u00b6 Sensor parameters description for [EMPATICA_INTER_BEAT_INTERVAL] : Key Description [CONTAINER] Name of the CSV file containing inter beat interval data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. DBDP provider \u00b6 Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_inter_beat_interval_raw.csv - data/raw/ { pid } /empatica_inter_beat_interval_with_datetime.csv - data/interim/ { pid } /empatica_inter_beat_interval_features/empatica_inter_beat_interval_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_inter_beat_interval.csv Parameters description for [EMPATICA_INTER_BEAT_INTERVAL][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_INTER_BEAT_INTERVAL features from the DBDP provider [FEATURES] Features to be computed from inter beat interval intraday data, see table below Features description for [EMPATICA_INTER_BEAT_INTERVAL][PROVIDERS][DBDP] : Feature Units Description maxibi seconds The maximum inter beat interval during a time segment. minibi seconds The minimum inter beat interval during a time segment. avgibi seconds The average inter beat interval during a time segment. medianibi seconds The median of inter beat interval during a time segment. modeibi seconds The mode of inter beat interval during a time segment. stdibi seconds The standard deviation of inter beat interval during a time segment. diffmaxmodeibi seconds The difference between the maximum and mode inter beat interval during a time segment. diffminmodeibi seconds The difference between the mode and minimum inter beat interval during a time segment. entropyibi nats Shannon\u2019s entropy measurement based on inter beat interval during a time segment. Assumptions/Observations For more information about IBI read this .","title":"Empatica Inter Beat Interval"},{"location":"features/empatica-inter-beat-interval/#empatica-inter-beat-interval","text":"Sensor parameters description for [EMPATICA_INTER_BEAT_INTERVAL] : Key Description [CONTAINER] Name of the CSV file containing inter beat interval data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute.","title":"Empatica Inter Beat Interval"},{"location":"features/empatica-inter-beat-interval/#dbdp-provider","text":"Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_inter_beat_interval_raw.csv - data/raw/ { pid } /empatica_inter_beat_interval_with_datetime.csv - data/interim/ { pid } /empatica_inter_beat_interval_features/empatica_inter_beat_interval_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_inter_beat_interval.csv Parameters description for [EMPATICA_INTER_BEAT_INTERVAL][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_INTER_BEAT_INTERVAL features from the DBDP provider [FEATURES] Features to be computed from inter beat interval intraday data, see table below Features description for [EMPATICA_INTER_BEAT_INTERVAL][PROVIDERS][DBDP] : Feature Units Description maxibi seconds The maximum inter beat interval during a time segment. minibi seconds The minimum inter beat interval during a time segment. avgibi seconds The average inter beat interval during a time segment. medianibi seconds The median of inter beat interval during a time segment. modeibi seconds The mode of inter beat interval during a time segment. stdibi seconds The standard deviation of inter beat interval during a time segment. diffmaxmodeibi seconds The difference between the maximum and mode inter beat interval during a time segment. diffminmodeibi seconds The difference between the mode and minimum inter beat interval during a time segment. entropyibi nats Shannon\u2019s entropy measurement based on inter beat interval during a time segment. Assumptions/Observations For more information about IBI read this .","title":"DBDP provider"},{"location":"features/empatica-tags/","text":"Empatica Tags \u00b6 Sensor parameters description for [EMPATICA_TAGS] : Key Description [CONTAINER] Name of the CSV file containing tags data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. Note No feature providers have been implemented for this sensor yet, however you can implement your own features . To know more about tags read this .","title":"Empatica Tags"},{"location":"features/empatica-tags/#empatica-tags","text":"Sensor parameters description for [EMPATICA_TAGS] : Key Description [CONTAINER] Name of the CSV file containing tags data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. Note No feature providers have been implemented for this sensor yet, however you can implement your own features . To know more about tags read this .","title":"Empatica Tags"},{"location":"features/empatica-temperature/","text":"Empatica Temperature \u00b6 Sensor parameters description for [EMPATICA_TEMPERATURE] : Key Description [CONTAINER] Name of the CSV file containing temperature data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. DBDP provider \u00b6 Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_temperature_raw.csv - data/raw/ { pid } /empatica_temperature_with_datetime.csv - data/interim/ { pid } /empatica_temperature_features/empatica_temperature_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_temperature.csv Parameters description for [EMPATICA_TEMPERATURE][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_TEMPERATURE features from the DBDP provider [FEATURES] Features to be computed from temperature intraday data, see table below Features description for [EMPATICA_TEMPERATURE][PROVIDERS][DBDP] : Feature Units Description maxtemp degrees C The maximum temperature during a time segment. mintemp degrees C The minimum temperature during a time segment. avgtemp degrees C The average temperature during a time segment. mediantemp degrees C The median of temperature during a time segment. modetemp degrees C The mode of temperature during a time segment. stdtemp degrees C The standard deviation of temperature during a time segment. diffmaxmodetemp degrees C The difference between the maximum and mode temperature during a time segment. diffminmodetemp degrees C The difference between the mode and minimum temperature during a time segment. entropytemp nats Shannon\u2019s entropy measurement based on temperature during a time segment. Assumptions/Observations None","title":"Empatica Temperature"},{"location":"features/empatica-temperature/#empatica-temperature","text":"Sensor parameters description for [EMPATICA_TEMPERATURE] : Key Description [CONTAINER] Name of the CSV file containing temperature data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute.","title":"Empatica Temperature"},{"location":"features/empatica-temperature/#dbdp-provider","text":"Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_temperature_raw.csv - data/raw/ { pid } /empatica_temperature_with_datetime.csv - data/interim/ { pid } /empatica_temperature_features/empatica_temperature_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_temperature.csv Parameters description for [EMPATICA_TEMPERATURE][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_TEMPERATURE features from the DBDP provider [FEATURES] Features to be computed from temperature intraday data, see table below Features description for [EMPATICA_TEMPERATURE][PROVIDERS][DBDP] : Feature Units Description maxtemp degrees C The maximum temperature during a time segment. mintemp degrees C The minimum temperature during a time segment. avgtemp degrees C The average temperature during a time segment. mediantemp degrees C The median of temperature during a time segment. modetemp degrees C The mode of temperature during a time segment. stdtemp degrees C The standard deviation of temperature during a time segment. diffmaxmodetemp degrees C The difference between the maximum and mode temperature during a time segment. diffminmodetemp degrees C The difference between the mode and minimum temperature during a time segment. entropytemp nats Shannon\u2019s entropy measurement based on temperature during a time segment. Assumptions/Observations None","title":"DBDP provider"},{"location":"features/feature-introduction/","text":"Behavioral Features Introduction \u00b6 A behavioral feature is a metric computed from raw sensor data quantifying the behavior of a participant. For example, the time spent at home computed based on location data. These are also known as digital biomarkers. RAPIDS\u2019 config.yaml has a section for each supported device/sensor (e.g., PHONE_ACCELEROMETER , FITBIT_STEPS , EMPATICA_HEARTRATE ). These sections follow a similar structure, and they can have one or more feature PROVIDERS , that compute one or more behavioral features. You will modify the parameters of these PROVIDERS to obtain features from different mobile sensors. We\u2019ll use PHONE_ACCELEROMETER as an example to explain this further. Hint We recommend reading this page if you are using RAPIDS for the first time All computed sensor features are stored under /data/processed/features on files per sensor, per participant and per study (all participants). Every time you change any sensor parameters, provider parameters or provider features, all the necessary files will be updated as soon as you execute RAPIDS. In short, to extract features offered by a provider, you need to set its [COMPUTE] flag to TRUE , configure any of its parameters, and execute RAPIDS. Explaining the config.yaml sensor sections with an example \u00b6 Each sensor section follows the same structure. Click on the numbered markers to know more. PHONE_ACCELEROMETER : # (1) CONTAINER : accelerometer # (2) PROVIDERS : # (3) RAPIDS : COMPUTE : False # (4) FEATURES : [ \"maxmagnitude\" , \"minmagnitude\" , \"avgmagnitude\" , \"medianmagnitude\" , \"stdmagnitude\" ] SRC_SCRIPT : src/features/phone_accelerometer/rapids/main.py PANDA : COMPUTE : False VALID_SENSED_MINUTES : False FEATURES : # (5) exertional_activity_episode : [ \"sumduration\" , \"maxduration\" , \"minduration\" , \"avgduration\" , \"medianduration\" , \"stdduration\" ] nonexertional_activity_episode : [ \"sumduration\" , \"maxduration\" , \"minduration\" , \"avgduration\" , \"medianduration\" , \"stdduration\" ] # (6) SRC_SCRIPT : src/features/phone_accelerometer/panda/main.py Sensor section Each sensor (accelerometer, screen, etc.) of every supported device (smartphone, Fitbit, etc.) has a section in the config.yaml with parameters and feature PROVIDERS . Sensor Parameters. Each sensor section has one or more parameters. These are parameters that affect different aspects of how the raw data is pulled, and processed. The CONTAINER parameter exists for every sensor, but some sensors will have extra parameters like [PHONE_LOCATIONS] . We explain these parameters in a table at the top of each sensor documentation page. Sensor Providers Each object in this list represents a feature PROVIDER . Each sensor can have zero, one, or more providers. A PROVIDER is a script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . In this example, there are two accelerometer feature providers RAPIDS and PANDA . PROVIDER Parameters Each PROVIDER has parameters that affect the computation of the behavioral features it offers. These parameters include at least a [COMPUTE] flag that you switch to True to extract a provider\u2019s behavioral features. We explain every provider\u2019s parameter in a table under the Parameters description heading on each provider documentation page. PROVIDER Features Each PROVIDER offers a set of behavioral features. These features are grouped in an array for some providers, like those for RAPIDS provider. For others, they are grouped in a collection of arrays, like those for PANDAS provider. In either case, you can delete the features you are not interested in, and they will not be included in the sensor\u2019s output feature file. We explain each behavioral feature in a table under the Features description heading on each provider documentation page. PROVIDER script Each PROVIDER has a SRC_SCRIPT that points to the script implementing its behavioral features. It has to be a relative path from RAPIDS\u2019 root folder and the script\u2019s parent folder should be named after the provider, e.g. panda . These are the descriptions of each marker for accessibility: Sensor section Each sensor (accelerometer, screen, etc.) of every supported device (smartphone, Fitbit, etc.) has a section in the config.yaml with parameters and feature PROVIDERS . Sensor Parameters. Each sensor section has one or more parameters. These are parameters that affect different aspects of how the raw data is pulled, and processed. The CONTAINER parameter exists for every sensor, but some sensors will have extra parameters like [PHONE_LOCATIONS] . We explain these parameters in a table at the top of each sensor documentation page. Sensor Providers Each object in this list represents a feature PROVIDER . Each sensor can have zero, one, or more providers. A PROVIDER is a script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . In this example, there are two accelerometer feature providers RAPIDS and PANDA . PROVIDER Parameters Each PROVIDER has parameters that affect the computation of the behavioral features it offers. These parameters include at least a [COMPUTE] flag that you switch to True to extract a provider\u2019s behavioral features. We explain every provider\u2019s parameter in a table under the Parameters description heading on each provider documentation page. PROVIDER Features Each PROVIDER offers a set of behavioral features. These features are grouped in an array for some providers, like those for RAPIDS provider. For others, they are grouped in a collection of arrays, like those for PANDAS provider. In either case, you can delete the features you are not interested in, and they will not be included in the sensor\u2019s output feature file. We explain each behavioral feature in a table under the Features description heading on each provider documentation page. PROVIDER script Each PROVIDER has a SRC_SCRIPT that points to the script implementing its behavioral features. It has to be a relative path from RAPIDS\u2019 root folder and the script\u2019s parent folder should be named after the provider, e.g. panda .","title":"Introduction"},{"location":"features/feature-introduction/#behavioral-features-introduction","text":"A behavioral feature is a metric computed from raw sensor data quantifying the behavior of a participant. For example, the time spent at home computed based on location data. These are also known as digital biomarkers. RAPIDS\u2019 config.yaml has a section for each supported device/sensor (e.g., PHONE_ACCELEROMETER , FITBIT_STEPS , EMPATICA_HEARTRATE ). These sections follow a similar structure, and they can have one or more feature PROVIDERS , that compute one or more behavioral features. You will modify the parameters of these PROVIDERS to obtain features from different mobile sensors. We\u2019ll use PHONE_ACCELEROMETER as an example to explain this further. Hint We recommend reading this page if you are using RAPIDS for the first time All computed sensor features are stored under /data/processed/features on files per sensor, per participant and per study (all participants). Every time you change any sensor parameters, provider parameters or provider features, all the necessary files will be updated as soon as you execute RAPIDS. In short, to extract features offered by a provider, you need to set its [COMPUTE] flag to TRUE , configure any of its parameters, and execute RAPIDS.","title":"Behavioral Features Introduction"},{"location":"features/feature-introduction/#explaining-the-configyaml-sensor-sections-with-an-example","text":"Each sensor section follows the same structure. Click on the numbered markers to know more. PHONE_ACCELEROMETER : # (1) CONTAINER : accelerometer # (2) PROVIDERS : # (3) RAPIDS : COMPUTE : False # (4) FEATURES : [ \"maxmagnitude\" , \"minmagnitude\" , \"avgmagnitude\" , \"medianmagnitude\" , \"stdmagnitude\" ] SRC_SCRIPT : src/features/phone_accelerometer/rapids/main.py PANDA : COMPUTE : False VALID_SENSED_MINUTES : False FEATURES : # (5) exertional_activity_episode : [ \"sumduration\" , \"maxduration\" , \"minduration\" , \"avgduration\" , \"medianduration\" , \"stdduration\" ] nonexertional_activity_episode : [ \"sumduration\" , \"maxduration\" , \"minduration\" , \"avgduration\" , \"medianduration\" , \"stdduration\" ] # (6) SRC_SCRIPT : src/features/phone_accelerometer/panda/main.py Sensor section Each sensor (accelerometer, screen, etc.) of every supported device (smartphone, Fitbit, etc.) has a section in the config.yaml with parameters and feature PROVIDERS . Sensor Parameters. Each sensor section has one or more parameters. These are parameters that affect different aspects of how the raw data is pulled, and processed. The CONTAINER parameter exists for every sensor, but some sensors will have extra parameters like [PHONE_LOCATIONS] . We explain these parameters in a table at the top of each sensor documentation page. Sensor Providers Each object in this list represents a feature PROVIDER . Each sensor can have zero, one, or more providers. A PROVIDER is a script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . In this example, there are two accelerometer feature providers RAPIDS and PANDA . PROVIDER Parameters Each PROVIDER has parameters that affect the computation of the behavioral features it offers. These parameters include at least a [COMPUTE] flag that you switch to True to extract a provider\u2019s behavioral features. We explain every provider\u2019s parameter in a table under the Parameters description heading on each provider documentation page. PROVIDER Features Each PROVIDER offers a set of behavioral features. These features are grouped in an array for some providers, like those for RAPIDS provider. For others, they are grouped in a collection of arrays, like those for PANDAS provider. In either case, you can delete the features you are not interested in, and they will not be included in the sensor\u2019s output feature file. We explain each behavioral feature in a table under the Features description heading on each provider documentation page. PROVIDER script Each PROVIDER has a SRC_SCRIPT that points to the script implementing its behavioral features. It has to be a relative path from RAPIDS\u2019 root folder and the script\u2019s parent folder should be named after the provider, e.g. panda . These are the descriptions of each marker for accessibility: Sensor section Each sensor (accelerometer, screen, etc.) of every supported device (smartphone, Fitbit, etc.) has a section in the config.yaml with parameters and feature PROVIDERS . Sensor Parameters. Each sensor section has one or more parameters. These are parameters that affect different aspects of how the raw data is pulled, and processed. The CONTAINER parameter exists for every sensor, but some sensors will have extra parameters like [PHONE_LOCATIONS] . We explain these parameters in a table at the top of each sensor documentation page. Sensor Providers Each object in this list represents a feature PROVIDER . Each sensor can have zero, one, or more providers. A PROVIDER is a script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . In this example, there are two accelerometer feature providers RAPIDS and PANDA . PROVIDER Parameters Each PROVIDER has parameters that affect the computation of the behavioral features it offers. These parameters include at least a [COMPUTE] flag that you switch to True to extract a provider\u2019s behavioral features. We explain every provider\u2019s parameter in a table under the Parameters description heading on each provider documentation page. PROVIDER Features Each PROVIDER offers a set of behavioral features. These features are grouped in an array for some providers, like those for RAPIDS provider. For others, they are grouped in a collection of arrays, like those for PANDAS provider. In either case, you can delete the features you are not interested in, and they will not be included in the sensor\u2019s output feature file. We explain each behavioral feature in a table under the Features description heading on each provider documentation page. PROVIDER script Each PROVIDER has a SRC_SCRIPT that points to the script implementing its behavioral features. It has to be a relative path from RAPIDS\u2019 root folder and the script\u2019s parent folder should be named after the provider, e.g. panda .","title":"Explaining the config.yaml sensor sections with an example"},{"location":"features/fitbit-data-yield/","text":"Fitbit Data Yield \u00b6 We use Fitbit heart rate intraday data to extract data yield features. Fitbit data yield features can be used to remove rows ( time segments ) that do not contain enough Fitbit data. You should decide what is your \u201cenough\u201d threshold depending on the time a participant was supposed to be wearing their Fitbit, the length of your study, and the rates of missing data that your analysis could handle. Why is Fitbit data yield important? Imagine that you want to extract FITBIT_STEPS_SUMMARY features on daily segments ( 00:00 to 23:59 ). Let\u2019s say that on day 1 the Fitbit logged 6k as the total step count and the heart rate sensor logged 24 hours of data and on day 2 the Fitbit logged 101 as the total step count and the heart rate sensor logged 2 hours of data. It\u2019s very likely that on day 2 you walked during the other 22 hours so including this day in your analysis could bias your results. Sensor parameters description for [FITBIT_DATA_YIELD] : Key Description [SENSORS] The Fitbit sensor we considered for calculating the Fitbit data yield features. We only support FITBIT_HEARTRATE_INTRADAY since sleep data is commonly collected only overnight, and step counts are 0 even when not wearing the Fitbit device. RAPIDS provider \u00b6 Before explaining the data yield features, let\u2019s define the following relevant concepts: A valid minute is any 60 second window when Fitbit heart rate intraday sensor logged at least 1 row of data A valid hour is any 60 minute window with at least X valid minutes. The X or threshold is given by [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /fitbit_heartrate_intraday_raw.csv - data/raw/ { pid } /fitbit_heartrate_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_data_yield_features/fitbit_data_yield_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_data_yield.csv Parameters description for [FITBIT_DATA_YIELD][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_DATA_YIELD features from the RAPIDS provider [FEATURES] Features to be computed, see table below [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] The proportion [0.0 ,1.0] of valid minutes in a 60-minute window necessary to flag that window as valid. Features description for [FITBIT_DATA_YIELD][PROVIDERS][RAPIDS] : Feature Units Description ratiovalidyieldedminutes - The ratio between the number of valid minutes and the duration in minutes of a time segment. ratiovalidyieldedhours - The ratio between the number of valid hours and the duration in hours of a time segment. If the time segment is shorter than 1 hour this feature will always be 1. Assumptions/Observations We recommend using ratiovalidyieldedminutes on time segments that are shorter than two or three hours and ratiovalidyieldedhours for longer segments. This is because relying on yielded minutes only can be misleading when a big chunk of those missing minutes are clustered together. For example, let\u2019s assume we are working with a 24-hour time segment that is missing 12 hours of data. Two extreme cases can occur: the 12 missing hours are from the beginning of the segment or 30 minutes could be missing from every hour (24 * 30 minutes = 12 hours). ratiovalidyieldedminutes would be 0.5 for both a and b (hinting the missing circumstances are similar). However, ratiovalidyieldedhours would be 0.5 for a and 1.0 for b if [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] is between [0.0 and 0.49] (hinting that the missing circumstances might be more favorable for b . In other words, sensed data for b is more evenly spread compared to a . We assume your Fitbit intraday data was sampled (requested form the Fitbit API) at 1 minute intervals, if the interval is longer, for example 15 minutes, you need to take into account that valid minutes and valid hours ratios are going to be small (for example you would have at most 4 \u201cminutes\u201d of data per hour because you would have four 15-minute windows) and so you should adjust your thresholds to include and exclude rows accordingly. If you are in this situation, get in touch with us, we could implement this use case but we are not sure there is enough demand for it at the moment since you can control the sampling rate of the data you request from Fitbit API.","title":"Fitbit Data Yield"},{"location":"features/fitbit-data-yield/#fitbit-data-yield","text":"We use Fitbit heart rate intraday data to extract data yield features. Fitbit data yield features can be used to remove rows ( time segments ) that do not contain enough Fitbit data. You should decide what is your \u201cenough\u201d threshold depending on the time a participant was supposed to be wearing their Fitbit, the length of your study, and the rates of missing data that your analysis could handle. Why is Fitbit data yield important? Imagine that you want to extract FITBIT_STEPS_SUMMARY features on daily segments ( 00:00 to 23:59 ). Let\u2019s say that on day 1 the Fitbit logged 6k as the total step count and the heart rate sensor logged 24 hours of data and on day 2 the Fitbit logged 101 as the total step count and the heart rate sensor logged 2 hours of data. It\u2019s very likely that on day 2 you walked during the other 22 hours so including this day in your analysis could bias your results. Sensor parameters description for [FITBIT_DATA_YIELD] : Key Description [SENSORS] The Fitbit sensor we considered for calculating the Fitbit data yield features. We only support FITBIT_HEARTRATE_INTRADAY since sleep data is commonly collected only overnight, and step counts are 0 even when not wearing the Fitbit device.","title":"Fitbit Data Yield"},{"location":"features/fitbit-data-yield/#rapids-provider","text":"Before explaining the data yield features, let\u2019s define the following relevant concepts: A valid minute is any 60 second window when Fitbit heart rate intraday sensor logged at least 1 row of data A valid hour is any 60 minute window with at least X valid minutes. The X or threshold is given by [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /fitbit_heartrate_intraday_raw.csv - data/raw/ { pid } /fitbit_heartrate_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_data_yield_features/fitbit_data_yield_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_data_yield.csv Parameters description for [FITBIT_DATA_YIELD][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_DATA_YIELD features from the RAPIDS provider [FEATURES] Features to be computed, see table below [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] The proportion [0.0 ,1.0] of valid minutes in a 60-minute window necessary to flag that window as valid. Features description for [FITBIT_DATA_YIELD][PROVIDERS][RAPIDS] : Feature Units Description ratiovalidyieldedminutes - The ratio between the number of valid minutes and the duration in minutes of a time segment. ratiovalidyieldedhours - The ratio between the number of valid hours and the duration in hours of a time segment. If the time segment is shorter than 1 hour this feature will always be 1. Assumptions/Observations We recommend using ratiovalidyieldedminutes on time segments that are shorter than two or three hours and ratiovalidyieldedhours for longer segments. This is because relying on yielded minutes only can be misleading when a big chunk of those missing minutes are clustered together. For example, let\u2019s assume we are working with a 24-hour time segment that is missing 12 hours of data. Two extreme cases can occur: the 12 missing hours are from the beginning of the segment or 30 minutes could be missing from every hour (24 * 30 minutes = 12 hours). ratiovalidyieldedminutes would be 0.5 for both a and b (hinting the missing circumstances are similar). However, ratiovalidyieldedhours would be 0.5 for a and 1.0 for b if [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] is between [0.0 and 0.49] (hinting that the missing circumstances might be more favorable for b . In other words, sensed data for b is more evenly spread compared to a . We assume your Fitbit intraday data was sampled (requested form the Fitbit API) at 1 minute intervals, if the interval is longer, for example 15 minutes, you need to take into account that valid minutes and valid hours ratios are going to be small (for example you would have at most 4 \u201cminutes\u201d of data per hour because you would have four 15-minute windows) and so you should adjust your thresholds to include and exclude rows accordingly. If you are in this situation, get in touch with us, we could implement this use case but we are not sure there is enough demand for it at the moment since you can control the sampling rate of the data you request from Fitbit API.","title":"RAPIDS provider"},{"location":"features/fitbit-heartrate-intraday/","text":"Fitbit Heart Rate Intraday \u00b6 Sensor parameters description for [FITBIT_HEARTRATE_INTRADAY] : Key Description [CONTAINER] Container where your heart rate intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. RAPIDS provider \u00b6 Available time segments Available for all time segments File Sequence - data/raw/ { pid } /fitbit_heartrate_intraday_raw.csv - data/raw/ { pid } /fitbit_heartrate_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_heartrate_intraday.csv Parameters description for [FITBIT_HEARTRATE_INTRADAY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_HEARTRATE_INTRADAY features from the RAPIDS provider [FEATURES] Features to be computed from heart rate intraday data, see table below Features description for [FITBIT_HEARTRATE_INTRADAY][PROVIDERS][RAPIDS] : Feature Units Description maxhr beats/mins The maximum heart rate during a time segment. minhr beats/mins The minimum heart rate during a time segment. avghr beats/mins The average heart rate during a time segment. medianhr beats/mins The median of heart rate during a time segment. modehr beats/mins The mode of heart rate during a time segment. stdhr beats/mins The standard deviation of heart rate during a time segment. diffmaxmodehr beats/mins The difference between the maximum and mode heart rate during a time segment. diffminmodehr beats/mins The difference between the mode and minimum heart rate during a time segment. entropyhr nats Shannon\u2019s entropy measurement based on heart rate during a time segment. minutesonZONE minutes Number of minutes the user\u2019s heart rate fell within each heartrate_zone during a time segment. Assumptions/Observations There are four heart rate zones (ZONE): outofrange , fatburn , cardio , and peak . Please refer to Fitbit documentation for more information about the way they are computed.","title":"Fitbit Heart Rate Intraday"},{"location":"features/fitbit-heartrate-intraday/#fitbit-heart-rate-intraday","text":"Sensor parameters description for [FITBIT_HEARTRATE_INTRADAY] : Key Description [CONTAINER] Container where your heart rate intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc.","title":"Fitbit Heart Rate Intraday"},{"location":"features/fitbit-heartrate-intraday/#rapids-provider","text":"Available time segments Available for all time segments File Sequence - data/raw/ { pid } /fitbit_heartrate_intraday_raw.csv - data/raw/ { pid } /fitbit_heartrate_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_heartrate_intraday.csv Parameters description for [FITBIT_HEARTRATE_INTRADAY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_HEARTRATE_INTRADAY features from the RAPIDS provider [FEATURES] Features to be computed from heart rate intraday data, see table below Features description for [FITBIT_HEARTRATE_INTRADAY][PROVIDERS][RAPIDS] : Feature Units Description maxhr beats/mins The maximum heart rate during a time segment. minhr beats/mins The minimum heart rate during a time segment. avghr beats/mins The average heart rate during a time segment. medianhr beats/mins The median of heart rate during a time segment. modehr beats/mins The mode of heart rate during a time segment. stdhr beats/mins The standard deviation of heart rate during a time segment. diffmaxmodehr beats/mins The difference between the maximum and mode heart rate during a time segment. diffminmodehr beats/mins The difference between the mode and minimum heart rate during a time segment. entropyhr nats Shannon\u2019s entropy measurement based on heart rate during a time segment. minutesonZONE minutes Number of minutes the user\u2019s heart rate fell within each heartrate_zone during a time segment. Assumptions/Observations There are four heart rate zones (ZONE): outofrange , fatburn , cardio , and peak . Please refer to Fitbit documentation for more information about the way they are computed.","title":"RAPIDS provider"},{"location":"features/fitbit-heartrate-summary/","text":"Fitbit Heart Rate Summary \u00b6 Sensor parameters description for [FITBIT_HEARTRATE_SUMMARY] : Key Description [CONTAINER] Container where your heart rate summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. RAPIDS provider \u00b6 Available time segments Only available for segments that span 1 or more complete days (e.g. Jan 1 st 00:00 to Jan 3 rd 23:59) File Sequence - data/raw/ { pid } /fitbit_heartrate_summary_raw.csv - data/raw/ { pid } /fitbit_heartrate_summary_with_datetime.csv - data/interim/ { pid } /fitbit_heartrate_summary_features/fitbit_heartrate_summary_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_heartrate_summary.csv Parameters description for [FITBIT_HEARTRATE_SUMMARY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_HEARTRATE_SUMMARY features from the RAPIDS provider [FEATURES] Features to be computed from heart rate summary data, see table below Features description for [FITBIT_HEARTRATE_SUMMARY][PROVIDERS][RAPIDS] : Feature Units Description maxrestinghr beats/mins The maximum daily resting heart rate during a time segment. minrestinghr beats/mins The minimum daily resting heart rate during a time segment. avgrestinghr beats/mins The average daily resting heart rate during a time segment. medianrestinghr beats/mins The median of daily resting heart rate during a time segment. moderestinghr beats/mins The mode of daily resting heart rate during a time segment. stdrestinghr beats/mins The standard deviation of daily resting heart rate during a time segment. diffmaxmoderestinghr beats/mins The difference between the maximum and mode daily resting heart rate during a time segment. diffminmoderestinghr beats/mins The difference between the mode and minimum daily resting heart rate during a time segment. entropyrestinghr nats Shannon\u2019s entropy measurement based on daily resting heart rate during a time segment. sumcaloriesZONE cals The total daily calories burned within heartrate_zone during a time segment. maxcaloriesZONE cals The maximum daily calories burned within heartrate_zone during a time segment. mincaloriesZONE cals The minimum daily calories burned within heartrate_zone during a time segment. avgcaloriesZONE cals The average daily calories burned within heartrate_zone during a time segment. mediancaloriesZONE cals The median of daily calories burned within heartrate_zone during a time segment. stdcaloriesZONE cals The standard deviation of daily calories burned within heartrate_zone during a time segment. entropycaloriesZONE nats Shannon\u2019s entropy measurement based on daily calories burned within heartrate_zone during a time segment. Assumptions/Observations There are four heart rate zones (ZONE): outofrange , fatburn , cardio , and peak . Please refer to Fitbit documentation for more information about the way they are computed. Calories\u2019 accuracy depends on the users\u2019 Fitbit profile (weight, height, etc.).","title":"Fitbit Heart Rate Summary"},{"location":"features/fitbit-heartrate-summary/#fitbit-heart-rate-summary","text":"Sensor parameters description for [FITBIT_HEARTRATE_SUMMARY] : Key Description [CONTAINER] Container where your heart rate summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc.","title":"Fitbit Heart Rate Summary"},{"location":"features/fitbit-heartrate-summary/#rapids-provider","text":"Available time segments Only available for segments that span 1 or more complete days (e.g. Jan 1 st 00:00 to Jan 3 rd 23:59) File Sequence - data/raw/ { pid } /fitbit_heartrate_summary_raw.csv - data/raw/ { pid } /fitbit_heartrate_summary_with_datetime.csv - data/interim/ { pid } /fitbit_heartrate_summary_features/fitbit_heartrate_summary_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_heartrate_summary.csv Parameters description for [FITBIT_HEARTRATE_SUMMARY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_HEARTRATE_SUMMARY features from the RAPIDS provider [FEATURES] Features to be computed from heart rate summary data, see table below Features description for [FITBIT_HEARTRATE_SUMMARY][PROVIDERS][RAPIDS] : Feature Units Description maxrestinghr beats/mins The maximum daily resting heart rate during a time segment. minrestinghr beats/mins The minimum daily resting heart rate during a time segment. avgrestinghr beats/mins The average daily resting heart rate during a time segment. medianrestinghr beats/mins The median of daily resting heart rate during a time segment. moderestinghr beats/mins The mode of daily resting heart rate during a time segment. stdrestinghr beats/mins The standard deviation of daily resting heart rate during a time segment. diffmaxmoderestinghr beats/mins The difference between the maximum and mode daily resting heart rate during a time segment. diffminmoderestinghr beats/mins The difference between the mode and minimum daily resting heart rate during a time segment. entropyrestinghr nats Shannon\u2019s entropy measurement based on daily resting heart rate during a time segment. sumcaloriesZONE cals The total daily calories burned within heartrate_zone during a time segment. maxcaloriesZONE cals The maximum daily calories burned within heartrate_zone during a time segment. mincaloriesZONE cals The minimum daily calories burned within heartrate_zone during a time segment. avgcaloriesZONE cals The average daily calories burned within heartrate_zone during a time segment. mediancaloriesZONE cals The median of daily calories burned within heartrate_zone during a time segment. stdcaloriesZONE cals The standard deviation of daily calories burned within heartrate_zone during a time segment. entropycaloriesZONE nats Shannon\u2019s entropy measurement based on daily calories burned within heartrate_zone during a time segment. Assumptions/Observations There are four heart rate zones (ZONE): outofrange , fatburn , cardio , and peak . Please refer to Fitbit documentation for more information about the way they are computed. Calories\u2019 accuracy depends on the users\u2019 Fitbit profile (weight, height, etc.).","title":"RAPIDS provider"},{"location":"features/fitbit-sleep-intraday/","text":"Fitbit Sleep Intraday \u00b6 Sensor parameters description for [FITBIT_SLEEP_INTRADAY] : Key Description [CONTAINER] Container where your sleep intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. RAPIDS provider \u00b6 Available time segments Available for all time segments File Sequence - data/raw/ { pid } /fitbit_sleep_intraday_raw.csv - data/raw/ { pid } /fitbit_sleep_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_intraday_features/fitbit_sleep_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_sleep_intraday.csv Parameters description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_SLEEP_INTRADAY features from the RAPIDS provider [FEATURES] Features to be computed from sleep intraday data, see table below [SLEEP_LEVELS] Fitbit\u2019s sleep API Version 1 only provides CLASSIC records. However, Version 1.2 provides 2 types of records: CLASSIC and STAGES . STAGES is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While CLASSIC contains 3 sleep levels ( awake , restless , and asleep ), STAGES contains 4 sleep levels ( wake , deep , light , rem ). To make it consistent, RAPIDS grouped them into 2 UNIFIED sleep levels: awake ( CLASSIC : awake and restless ; STAGES : wake ) and asleep ( CLASSIC : asleep ; STAGES : deep , light , and rem ). [SLEEP_TYPES] Types of sleep to be included in the feature extraction computation. Fitbit provides 2 types of sleep: main , nap . [INCLUDE_SLEEP_LATER_THAN] All resampled sleep rows (bin interval: one minute) that started after this time will be included in the feature computation. It is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. If a segment is longer than one day, this value is for every day. [REFERENCE_TIME] The reference point from which the [ROUTINE] features are to be computed. Chosen from MIDNIGHT and START_OF_THE_SEGMENT , default is MIDNIGHT . If you have multiple time segments per day it might be more informative to set this flag to START_OF_THE_SEGMENT . Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS][LEVELS_AND_TYPES] : Feature Units Description countepisode [LEVEL][TYPE] episodes Number of [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. sumduration [LEVEL][TYPE] minutes Total duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. maxduration [LEVEL][TYPE] minutes Longest duration of any [LEVEL][TYPE] sleep episode. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. minduration [LEVEL][TYPE] minutes Shortest duration of any [LEVEL][TYPE] sleep episode. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. avgduration [LEVEL][TYPE] minutes Average duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. medianduration [LEVEL][TYPE] minutes Median duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. stdduration [LEVEL][TYPE] minutes Standard deviation duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [ACROSS_LEVELS] : Feature Units Description ratiocount [LEVEL] - Ratio between the count of episodes of a single sleep [LEVEL] and the count of all episodes of all levels during both main and nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem episodes were rem ? (e.g., \\(countepisode[remstages][all] / countepisode[all][all]\\) ) ratioduration [LEVEL] - Ratio between the duration of episodes of a single sleep [LEVEL] and the duration of all episodes of all levels during both main and nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem time was rem ? (e.g., \\(sumduration[remstages][all] / sumduration[all][all]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [ACROSS_TYPES] : Feature Units Description ratiocountmain - Ratio between the count of all main episodes (independently of the levels inside) divided by the count of all main and nap episodes. This answers the question: what percentage of all sleep episodes ( main and nap ) were main ? We do not provide the ratio for nap because is complementary. ( \\(countepisode[all][main] / countepisode[all][all]\\) ) ratiodurationmain - Ratio between the duration of all main episodes (independently of the levels inside) divided by the duration of all main and nap episodes. This answers the question: what percentage of all sleep time ( main and nap ) was main ? We do not provide the ratio for nap because is complementary. ( \\(sumduration[all][main] / sumduration[all][all]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [WITHIN_LEVELS] : Feature Units Description ratiocount [TYPE] within [LEVEL] - Ratio between the count of episodes of a single sleep [LEVEL] during main sleep divided by the count of episodes of a single sleep [LEVEL] during main and nap . This answers the question: are rem episodes more frequent during main than nap sleep? We do not provide the ratio for nap because is complementary. ( \\(countepisode[remstages][main] / countepisode[remstages][all]\\) ) ratioduration [TYPE] within [LEVEL] - Ratio between the duration of episodes of a single sleep [LEVEL] during main sleep divided by the duration of episodes of a single sleep [LEVEL] during main and nap . This answers the question: is rem time more frequent during main than nap sleep? We do not provide the ratio for nap because is complementary. ( \\(countepisode[remstages][main] / countepisode[remstages][all]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [WITHIN_TYPES] : Feature Units Description ratiocount [LEVEL] within [TYPE] - Ratio between the count of episodes of a single sleep [LEVEL] and the count of all episodes of all levels during either main or nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem episodes were rem during main / nap sleep time? (e.g., \\(countepisode[remstages][main] / countepisode[all][main]\\) ) ratioduration [LEVEL] within [TYPE] - Ratio between the duration of episodes of a single sleep [LEVEL] and the duration of all episodes of all levels during either main or nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem time was rem during main / nap sleep time? (e.g., \\(sumduration[remstages][main] / sumduration[all][main]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS][ROUTINE] : Feature Units Description starttimefirstmainsleep minutes Start time (in minutes since REFERENCE_TIME ) of the first main sleep episode after INCLUDE_EPISODES_LATER_THAN . endtimelastmainsleep minutes End time (in minutes since REFERENCE_TIME ) of the last main sleep episode after INCLUDE_EPISODES_LATER_THAN . starttimefirstnap minutes Start time (in minutes since REFERENCE_TIME ) of the first nap episode after INCLUDE_EPISODES_LATER_THAN . endtimelastnap minutes End time (in minutes since REFERENCE_TIME ) of the last nap episode after INCLUDE_EPISODES_LATER_THAN . Assumptions/Observations Deleting values from [SLEEP_LEVELS] or [SLEEP_TYPES] will only change the features you receive from [LEVELS_AND_TYPES] . For example if STAGES only contains [rem, light] you will not receive countepisode[wake|deep][TYPE] or sum, max, min, avg, median, or std duration . These values will not influence RATIOS or ROUTINE features. Any [LEVEL] grouping is done within the elements of each class CLASSIC , STAGES , and UNIFIED . That is, we never combine CLASSIC or STAGES types to compute features when LEVELS_AND_TYPES_COMBINING_ALL is True or when computing RATIOS . PRICE provider \u00b6 Available time segments Available for any time segments larger or equal to one day File Sequence - data/raw/ { pid } /fitbit_sleep_intraday_raw.csv - data/raw/ { pid } /fitbit_sleep_intraday_parsed.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_intraday_features/fitbit_sleep_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_sleep_intraday.csv Parameters description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE] : Key Description [COMPUTE] Set to True to extract FITBIT_SLEEP_INTRADAY features from the PRICE provider [FEATURES] Features to be computed from sleep intraday data, see table below [SLEEP_LEVELS] Fitbit\u2019s sleep API Version 1 only provides CLASSIC records. However, Version 1.2 provides 2 types of records: CLASSIC and STAGES . STAGES is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While CLASSIC contains 3 sleep levels ( awake , restless , and asleep ), STAGES contains 4 sleep levels ( wake , deep , light , rem ). To make it consistent, RAPIDS grouped them into 2 UNIFIED sleep levels: awake ( CLASSIC : awake and restless ; STAGES : wake ) and asleep ( CLASSIC : asleep ; STAGES : deep , light , and rem ). [DAY_TYPE] The features of this provider can be computed using daily averages/standard deviations that were extracted on WEEKEND days only, WEEK days only, or ALL days [GROUP_EPISODES_WITHIN] This parameter contains 2 values: [START_TIME] and [LENGTH] . Only main sleep episodes that intersect or contain the period between [ START_TIME , START_TIME + LENGTH ] are taken into account to compute the features described below. Both [START_TIME] and [LENGTH] are in minutes. [START_TIME] is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. [LENGTH] is a number smaller than 1440 (24 hours). Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE] : Feature Units Description avgduration [LEVEL] main [DAY_TYPE] minutes Average duration of daily LEVEL sleep episodes. You can include daily average that were computed on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgratioduration [LEVEL] withinmain [DAY_TYPE] - Average ratio between daily LEVEL time and in-bed time inferred from main sleep episodes. LEVEL is one of SLEEP_LEVELS (e.g. awake-classic or rem-stages). In-bed time is the total duration of all main sleep episodes for each day. You can include daily ratios that were computed on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgstarttimeofepisodemain [DAY_TYPE] minutes Average start time of the first main sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgendtimeofepisodemain [DAY_TYPE] minutes Average end time of the last main sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgmidpointofepisodemain [DAY_TYPE] minutes Average mid time between the start of the first main sleep episode and the end of the last main sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. stdstarttimeofepisodemain [DAY_TYPE] minutes Standard deviation of start time of the first main sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. stdendtimeofepisodemain [DAY_TYPE] minutes Standard deviation of end time of the last main sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. stdmidpointofepisodemain [DAY_TYPE] minutes Standard deviation of mid time between the start of the first main sleep episode and the end of the last main sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. socialjetlag minutes Difference in minutes between the avgmidpointofepisodemain (average mid time between bedtime and wake time) of weekends and weekdays. meanssdstarttimeofepisodemain minutes squared Same as avgstarttimeofepisodemain[DAY_TYPE] but the average is computed over the squared differences of each pair of consecutive start times. meanssdendtimeofepisodemain minutes squared Same as avgendtimeofepisodemain[DAY_TYPE] but the average is computed over the squared differences of each pair of consecutive end times. meanssdmidpointofepisodemain minutes squared Same as avgmidpointofepisodemain[DAY_TYPE] but the average is computed over the squared differences of each pair of consecutive mid times. medianssdstarttimeofepisodemain minutes squared Same as avgstarttimeofepisodemain[DAY_TYPE] but the median is computed over the squared differences of each pair of consecutive start times. medianssdendtimeofepisodemain minutes squared Same as avgendtimeofepisodemain[DAY_TYPE] but the median is computed over the squared differences of each pair of consecutive end times. medianssdmidpointofepisodemain minutes squared Same as avgmidpointofepisodemain[DAY_TYPE] but the median is computed over the squared differences of each pair of consecutive mid times. Assumptions/Observations These features are based on descriptive statistics computed across daily values (start/end/mid times of sleep episodes). This is the reason why they are only available on time segments that are longer than 24 hours (we need at least 1 day to get the average). Even though Fitbit provides 2 types of sleep episodes ( main and nap ), only main sleep episodes are considered. How do we assign sleep episodes to specific dates? START_TIME and LENGTH control the dates that sleep episodes belong to. For a pair of [START_TIME] and [LENGTH] , sleep episodes (blue boxes) can only be placed at the following places: Relationship between sleep episodes and the given times ([START_TIME], [LENGTH]) If the end time of a sleep episode is before [START_TIME] , it will belong to the day before its start date (e.g. sleep episode #1). if (1) the start time or the end time of a sleep episode are between (overlap) [START_TIME] and [START_TIME] + [LENGTH] or (2) the start time is before [START_TIME] and the end time is after [START_TIME] + [LENGTH] , it will belong to its start date (e.g. sleep episode #2, #3, #4, #5). If the start time of a sleep episode is after START_TIME] + [LENGTH] , it will belong to the day after its start date (e.g. sleep episode #6). Only main sleep episodes that intersect or contain the period between [START_TIME] and [START_TIME] + [LENGTH] will be included in the feature computation. If we process the following main sleep episodes: episode start end 1 2021-02-01 12:00 2021-02-01 15:00 2 2021-02-01 21:00 2021-02-02 03:00 3 2021-02-02 05:00 2021-02-02 08:00 4 2021-02-02 11:00 2021-02-02 14:00 5 2021-02-02 19:00 2021-02-03 06:00 And our parameters: [INCLUDE_EPISODES_INTERSECTING][START_TIME] = 1320 (today\u2019s 22:00) [INCLUDE_EPISODES_INTERSECTING][LENGTH] = 720 (tomorrow\u2019s 10:00, or 22:00 + 12 hours) Only sleep episodes 2, 3,and 5 would be considered. Time related features represent the number of minutes between the start/end/midpoint of sleep episodes and the assigned day\u2019s midnight. All main sleep episodes are chunked within the requested time segments which need to be at least 24 hours or more long (1, 2, 3, 7 days, etc.). Then, daily features will be extracted and averaged across the length of the time segment, for example: The daily features extracted on 2021-02-01 will be: starttimeofepisodemain (bedtime) is 21 * 60 (episode 2 start time 2021-02-01 21:00) endtimeofepisodemain (wake time) is 32 * 60 (episode 3 end time 2021-02-02 08:00 + 24) midpointofepisodemain (midpoint sleep) is [(21 * 60) + (32 * 60)] / 2 The daily features extracted on 2021-02-02 will be: starttimeofepisodemain (bedtime) is 19 * 60 (episode 5 start time 2021-02-01 19:00) endtimeofepisodemain (wake time) is 30 * 60 (episode 5 end time 2021-02-03 06:00 + 24) midpointofepisodemain (midpoint sleep) is [(19 * 60) + (30 * 60)] / 2 And avgstarttimeofepisodemain[DAY_TYPE] will be ([21 * 60] + [19 * 60]) / 2","title":"Fitbit Sleep Intraday"},{"location":"features/fitbit-sleep-intraday/#fitbit-sleep-intraday","text":"Sensor parameters description for [FITBIT_SLEEP_INTRADAY] : Key Description [CONTAINER] Container where your sleep intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc.","title":"Fitbit Sleep Intraday"},{"location":"features/fitbit-sleep-intraday/#rapids-provider","text":"Available time segments Available for all time segments File Sequence - data/raw/ { pid } /fitbit_sleep_intraday_raw.csv - data/raw/ { pid } /fitbit_sleep_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_intraday_features/fitbit_sleep_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_sleep_intraday.csv Parameters description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_SLEEP_INTRADAY features from the RAPIDS provider [FEATURES] Features to be computed from sleep intraday data, see table below [SLEEP_LEVELS] Fitbit\u2019s sleep API Version 1 only provides CLASSIC records. However, Version 1.2 provides 2 types of records: CLASSIC and STAGES . STAGES is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While CLASSIC contains 3 sleep levels ( awake , restless , and asleep ), STAGES contains 4 sleep levels ( wake , deep , light , rem ). To make it consistent, RAPIDS grouped them into 2 UNIFIED sleep levels: awake ( CLASSIC : awake and restless ; STAGES : wake ) and asleep ( CLASSIC : asleep ; STAGES : deep , light , and rem ). [SLEEP_TYPES] Types of sleep to be included in the feature extraction computation. Fitbit provides 2 types of sleep: main , nap . [INCLUDE_SLEEP_LATER_THAN] All resampled sleep rows (bin interval: one minute) that started after this time will be included in the feature computation. It is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. If a segment is longer than one day, this value is for every day. [REFERENCE_TIME] The reference point from which the [ROUTINE] features are to be computed. Chosen from MIDNIGHT and START_OF_THE_SEGMENT , default is MIDNIGHT . If you have multiple time segments per day it might be more informative to set this flag to START_OF_THE_SEGMENT . Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS][LEVELS_AND_TYPES] : Feature Units Description countepisode [LEVEL][TYPE] episodes Number of [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. sumduration [LEVEL][TYPE] minutes Total duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. maxduration [LEVEL][TYPE] minutes Longest duration of any [LEVEL][TYPE] sleep episode. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. minduration [LEVEL][TYPE] minutes Shortest duration of any [LEVEL][TYPE] sleep episode. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. avgduration [LEVEL][TYPE] minutes Average duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. medianduration [LEVEL][TYPE] minutes Median duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. stdduration [LEVEL][TYPE] minutes Standard deviation duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [ACROSS_LEVELS] : Feature Units Description ratiocount [LEVEL] - Ratio between the count of episodes of a single sleep [LEVEL] and the count of all episodes of all levels during both main and nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem episodes were rem ? (e.g., \\(countepisode[remstages][all] / countepisode[all][all]\\) ) ratioduration [LEVEL] - Ratio between the duration of episodes of a single sleep [LEVEL] and the duration of all episodes of all levels during both main and nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem time was rem ? (e.g., \\(sumduration[remstages][all] / sumduration[all][all]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [ACROSS_TYPES] : Feature Units Description ratiocountmain - Ratio between the count of all main episodes (independently of the levels inside) divided by the count of all main and nap episodes. This answers the question: what percentage of all sleep episodes ( main and nap ) were main ? We do not provide the ratio for nap because is complementary. ( \\(countepisode[all][main] / countepisode[all][all]\\) ) ratiodurationmain - Ratio between the duration of all main episodes (independently of the levels inside) divided by the duration of all main and nap episodes. This answers the question: what percentage of all sleep time ( main and nap ) was main ? We do not provide the ratio for nap because is complementary. ( \\(sumduration[all][main] / sumduration[all][all]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [WITHIN_LEVELS] : Feature Units Description ratiocount [TYPE] within [LEVEL] - Ratio between the count of episodes of a single sleep [LEVEL] during main sleep divided by the count of episodes of a single sleep [LEVEL] during main and nap . This answers the question: are rem episodes more frequent during main than nap sleep? We do not provide the ratio for nap because is complementary. ( \\(countepisode[remstages][main] / countepisode[remstages][all]\\) ) ratioduration [TYPE] within [LEVEL] - Ratio between the duration of episodes of a single sleep [LEVEL] during main sleep divided by the duration of episodes of a single sleep [LEVEL] during main and nap . This answers the question: is rem time more frequent during main than nap sleep? We do not provide the ratio for nap because is complementary. ( \\(countepisode[remstages][main] / countepisode[remstages][all]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [WITHIN_TYPES] : Feature Units Description ratiocount [LEVEL] within [TYPE] - Ratio between the count of episodes of a single sleep [LEVEL] and the count of all episodes of all levels during either main or nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem episodes were rem during main / nap sleep time? (e.g., \\(countepisode[remstages][main] / countepisode[all][main]\\) ) ratioduration [LEVEL] within [TYPE] - Ratio between the duration of episodes of a single sleep [LEVEL] and the duration of all episodes of all levels during either main or nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem time was rem during main / nap sleep time? (e.g., \\(sumduration[remstages][main] / sumduration[all][main]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS][ROUTINE] : Feature Units Description starttimefirstmainsleep minutes Start time (in minutes since REFERENCE_TIME ) of the first main sleep episode after INCLUDE_EPISODES_LATER_THAN . endtimelastmainsleep minutes End time (in minutes since REFERENCE_TIME ) of the last main sleep episode after INCLUDE_EPISODES_LATER_THAN . starttimefirstnap minutes Start time (in minutes since REFERENCE_TIME ) of the first nap episode after INCLUDE_EPISODES_LATER_THAN . endtimelastnap minutes End time (in minutes since REFERENCE_TIME ) of the last nap episode after INCLUDE_EPISODES_LATER_THAN . Assumptions/Observations Deleting values from [SLEEP_LEVELS] or [SLEEP_TYPES] will only change the features you receive from [LEVELS_AND_TYPES] . For example if STAGES only contains [rem, light] you will not receive countepisode[wake|deep][TYPE] or sum, max, min, avg, median, or std duration . These values will not influence RATIOS or ROUTINE features. Any [LEVEL] grouping is done within the elements of each class CLASSIC , STAGES , and UNIFIED . That is, we never combine CLASSIC or STAGES types to compute features when LEVELS_AND_TYPES_COMBINING_ALL is True or when computing RATIOS .","title":"RAPIDS provider"},{"location":"features/fitbit-sleep-intraday/#price-provider","text":"Available time segments Available for any time segments larger or equal to one day File Sequence - data/raw/ { pid } /fitbit_sleep_intraday_raw.csv - data/raw/ { pid } /fitbit_sleep_intraday_parsed.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_intraday_features/fitbit_sleep_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_sleep_intraday.csv Parameters description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE] : Key Description [COMPUTE] Set to True to extract FITBIT_SLEEP_INTRADAY features from the PRICE provider [FEATURES] Features to be computed from sleep intraday data, see table below [SLEEP_LEVELS] Fitbit\u2019s sleep API Version 1 only provides CLASSIC records. However, Version 1.2 provides 2 types of records: CLASSIC and STAGES . STAGES is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While CLASSIC contains 3 sleep levels ( awake , restless , and asleep ), STAGES contains 4 sleep levels ( wake , deep , light , rem ). To make it consistent, RAPIDS grouped them into 2 UNIFIED sleep levels: awake ( CLASSIC : awake and restless ; STAGES : wake ) and asleep ( CLASSIC : asleep ; STAGES : deep , light , and rem ). [DAY_TYPE] The features of this provider can be computed using daily averages/standard deviations that were extracted on WEEKEND days only, WEEK days only, or ALL days [GROUP_EPISODES_WITHIN] This parameter contains 2 values: [START_TIME] and [LENGTH] . Only main sleep episodes that intersect or contain the period between [ START_TIME , START_TIME + LENGTH ] are taken into account to compute the features described below. Both [START_TIME] and [LENGTH] are in minutes. [START_TIME] is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. [LENGTH] is a number smaller than 1440 (24 hours). Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE] : Feature Units Description avgduration [LEVEL] main [DAY_TYPE] minutes Average duration of daily LEVEL sleep episodes. You can include daily average that were computed on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgratioduration [LEVEL] withinmain [DAY_TYPE] - Average ratio between daily LEVEL time and in-bed time inferred from main sleep episodes. LEVEL is one of SLEEP_LEVELS (e.g. awake-classic or rem-stages). In-bed time is the total duration of all main sleep episodes for each day. You can include daily ratios that were computed on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgstarttimeofepisodemain [DAY_TYPE] minutes Average start time of the first main sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgendtimeofepisodemain [DAY_TYPE] minutes Average end time of the last main sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgmidpointofepisodemain [DAY_TYPE] minutes Average mid time between the start of the first main sleep episode and the end of the last main sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. stdstarttimeofepisodemain [DAY_TYPE] minutes Standard deviation of start time of the first main sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. stdendtimeofepisodemain [DAY_TYPE] minutes Standard deviation of end time of the last main sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. stdmidpointofepisodemain [DAY_TYPE] minutes Standard deviation of mid time between the start of the first main sleep episode and the end of the last main sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. socialjetlag minutes Difference in minutes between the avgmidpointofepisodemain (average mid time between bedtime and wake time) of weekends and weekdays. meanssdstarttimeofepisodemain minutes squared Same as avgstarttimeofepisodemain[DAY_TYPE] but the average is computed over the squared differences of each pair of consecutive start times. meanssdendtimeofepisodemain minutes squared Same as avgendtimeofepisodemain[DAY_TYPE] but the average is computed over the squared differences of each pair of consecutive end times. meanssdmidpointofepisodemain minutes squared Same as avgmidpointofepisodemain[DAY_TYPE] but the average is computed over the squared differences of each pair of consecutive mid times. medianssdstarttimeofepisodemain minutes squared Same as avgstarttimeofepisodemain[DAY_TYPE] but the median is computed over the squared differences of each pair of consecutive start times. medianssdendtimeofepisodemain minutes squared Same as avgendtimeofepisodemain[DAY_TYPE] but the median is computed over the squared differences of each pair of consecutive end times. medianssdmidpointofepisodemain minutes squared Same as avgmidpointofepisodemain[DAY_TYPE] but the median is computed over the squared differences of each pair of consecutive mid times. Assumptions/Observations These features are based on descriptive statistics computed across daily values (start/end/mid times of sleep episodes). This is the reason why they are only available on time segments that are longer than 24 hours (we need at least 1 day to get the average). Even though Fitbit provides 2 types of sleep episodes ( main and nap ), only main sleep episodes are considered. How do we assign sleep episodes to specific dates? START_TIME and LENGTH control the dates that sleep episodes belong to. For a pair of [START_TIME] and [LENGTH] , sleep episodes (blue boxes) can only be placed at the following places: Relationship between sleep episodes and the given times ([START_TIME], [LENGTH]) If the end time of a sleep episode is before [START_TIME] , it will belong to the day before its start date (e.g. sleep episode #1). if (1) the start time or the end time of a sleep episode are between (overlap) [START_TIME] and [START_TIME] + [LENGTH] or (2) the start time is before [START_TIME] and the end time is after [START_TIME] + [LENGTH] , it will belong to its start date (e.g. sleep episode #2, #3, #4, #5). If the start time of a sleep episode is after START_TIME] + [LENGTH] , it will belong to the day after its start date (e.g. sleep episode #6). Only main sleep episodes that intersect or contain the period between [START_TIME] and [START_TIME] + [LENGTH] will be included in the feature computation. If we process the following main sleep episodes: episode start end 1 2021-02-01 12:00 2021-02-01 15:00 2 2021-02-01 21:00 2021-02-02 03:00 3 2021-02-02 05:00 2021-02-02 08:00 4 2021-02-02 11:00 2021-02-02 14:00 5 2021-02-02 19:00 2021-02-03 06:00 And our parameters: [INCLUDE_EPISODES_INTERSECTING][START_TIME] = 1320 (today\u2019s 22:00) [INCLUDE_EPISODES_INTERSECTING][LENGTH] = 720 (tomorrow\u2019s 10:00, or 22:00 + 12 hours) Only sleep episodes 2, 3,and 5 would be considered. Time related features represent the number of minutes between the start/end/midpoint of sleep episodes and the assigned day\u2019s midnight. All main sleep episodes are chunked within the requested time segments which need to be at least 24 hours or more long (1, 2, 3, 7 days, etc.). Then, daily features will be extracted and averaged across the length of the time segment, for example: The daily features extracted on 2021-02-01 will be: starttimeofepisodemain (bedtime) is 21 * 60 (episode 2 start time 2021-02-01 21:00) endtimeofepisodemain (wake time) is 32 * 60 (episode 3 end time 2021-02-02 08:00 + 24) midpointofepisodemain (midpoint sleep) is [(21 * 60) + (32 * 60)] / 2 The daily features extracted on 2021-02-02 will be: starttimeofepisodemain (bedtime) is 19 * 60 (episode 5 start time 2021-02-01 19:00) endtimeofepisodemain (wake time) is 30 * 60 (episode 5 end time 2021-02-03 06:00 + 24) midpointofepisodemain (midpoint sleep) is [(19 * 60) + (30 * 60)] / 2 And avgstarttimeofepisodemain[DAY_TYPE] will be ([21 * 60] + [19 * 60]) / 2","title":"PRICE provider"},{"location":"features/fitbit-sleep-summary/","text":"Fitbit Sleep Summary \u00b6 Sensor parameters description for [FITBIT_SLEEP_SUMMARY] : Key Description [CONTAINER] Container where your sleep summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. RAPIDS provider \u00b6 Available time segments Only available for segments that span 1 or more complete days (e.g. Jan 1 st 00:00 to Jan 3 rd 23:59) File Sequence - data/raw/ { pid } /fitbit_sleep_summary_raw.csv - data/raw/ { pid } /fitbit_sleep_summary_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_summary_features/fitbit_sleep_summary_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_sleep_summary.csv Parameters description for [FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_SLEEP_SUMMARY features from the RAPIDS provider [SLEEP_TYPES] Types of sleep to be included in the feature extraction computation. Fitbit provides 3 types of sleep: main , nap , all . [FEATURES] Features to be computed from sleep summary data, see table below Features description for [FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS] : Feature Units Description countepisodeTYPE episodes Number of sleep episodes for a certain sleep type during a time segment. avgefficiencyTYPE scores Average sleep efficiency for a certain sleep type during a time segment. sumdurationafterwakeupTYPE minutes Total duration the user stayed in bed after waking up for a certain sleep type during a time segment. sumdurationasleepTYPE minutes Total sleep duration for a certain sleep type during a time segment. sumdurationawakeTYPE minutes Total duration the user stayed awake but still in bed for a certain sleep type during a time segment. sumdurationtofallasleepTYPE minutes Total duration the user spent to fall asleep for a certain sleep type during a time segment. sumdurationinbedTYPE minutes Total duration the user stayed in bed (sumdurationtofallasleep + sumdurationawake + sumdurationasleep + sumdurationafterwakeup) for a certain sleep type during a time segment. avgdurationafterwakeupTYPE minutes Average duration the user stayed in bed after waking up for a certain sleep type during a time segment. avgdurationasleepTYPE minutes Average sleep duration for a certain sleep type during a time segment. avgdurationawakeTYPE minutes Average duration the user stayed awake but still in bed for a certain sleep type during a time segment. avgdurationtofallasleepTYPE minutes Average duration the user spent to fall asleep for a certain sleep type during a time segment. avgdurationinbedTYPE minutes Average duration the user stayed in bed (sumdurationtofallasleep + sumdurationawake + sumdurationasleep + sumdurationafterwakeup) for a certain sleep type during a time segment. Assumptions/Observations There are three sleep types (TYPE): main , nap , all . The all type contains both main sleep and naps. There are two versions of Fitbit\u2019s sleep API ( version 1 and version 1.2 ), and each provides raw sleep data in a different format: Count & duration summaries . v1 contains count_awake , duration_awake , count_awakenings , count_restless , and duration_restless fields for every sleep record but v1.2 does not. API columns . Features are computed based on the values provided by Fitbit\u2019s API: efficiency , minutes_after_wakeup , minutes_asleep , minutes_awake , minutes_to_fall_asleep , minutes_in_bed , is_main_sleep and type .","title":"Fitbit Sleep Summary"},{"location":"features/fitbit-sleep-summary/#fitbit-sleep-summary","text":"Sensor parameters description for [FITBIT_SLEEP_SUMMARY] : Key Description [CONTAINER] Container where your sleep summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc.","title":"Fitbit Sleep Summary"},{"location":"features/fitbit-sleep-summary/#rapids-provider","text":"Available time segments Only available for segments that span 1 or more complete days (e.g. Jan 1 st 00:00 to Jan 3 rd 23:59) File Sequence - data/raw/ { pid } /fitbit_sleep_summary_raw.csv - data/raw/ { pid } /fitbit_sleep_summary_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_summary_features/fitbit_sleep_summary_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_sleep_summary.csv Parameters description for [FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_SLEEP_SUMMARY features from the RAPIDS provider [SLEEP_TYPES] Types of sleep to be included in the feature extraction computation. Fitbit provides 3 types of sleep: main , nap , all . [FEATURES] Features to be computed from sleep summary data, see table below Features description for [FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS] : Feature Units Description countepisodeTYPE episodes Number of sleep episodes for a certain sleep type during a time segment. avgefficiencyTYPE scores Average sleep efficiency for a certain sleep type during a time segment. sumdurationafterwakeupTYPE minutes Total duration the user stayed in bed after waking up for a certain sleep type during a time segment. sumdurationasleepTYPE minutes Total sleep duration for a certain sleep type during a time segment. sumdurationawakeTYPE minutes Total duration the user stayed awake but still in bed for a certain sleep type during a time segment. sumdurationtofallasleepTYPE minutes Total duration the user spent to fall asleep for a certain sleep type during a time segment. sumdurationinbedTYPE minutes Total duration the user stayed in bed (sumdurationtofallasleep + sumdurationawake + sumdurationasleep + sumdurationafterwakeup) for a certain sleep type during a time segment. avgdurationafterwakeupTYPE minutes Average duration the user stayed in bed after waking up for a certain sleep type during a time segment. avgdurationasleepTYPE minutes Average sleep duration for a certain sleep type during a time segment. avgdurationawakeTYPE minutes Average duration the user stayed awake but still in bed for a certain sleep type during a time segment. avgdurationtofallasleepTYPE minutes Average duration the user spent to fall asleep for a certain sleep type during a time segment. avgdurationinbedTYPE minutes Average duration the user stayed in bed (sumdurationtofallasleep + sumdurationawake + sumdurationasleep + sumdurationafterwakeup) for a certain sleep type during a time segment. Assumptions/Observations There are three sleep types (TYPE): main , nap , all . The all type contains both main sleep and naps. There are two versions of Fitbit\u2019s sleep API ( version 1 and version 1.2 ), and each provides raw sleep data in a different format: Count & duration summaries . v1 contains count_awake , duration_awake , count_awakenings , count_restless , and duration_restless fields for every sleep record but v1.2 does not. API columns . Features are computed based on the values provided by Fitbit\u2019s API: efficiency , minutes_after_wakeup , minutes_asleep , minutes_awake , minutes_to_fall_asleep , minutes_in_bed , is_main_sleep and type .","title":"RAPIDS provider"},{"location":"features/fitbit-steps-intraday/","text":"Fitbit Steps Intraday \u00b6 Sensor parameters description for [FITBIT_STEPS_INTRADAY] : Key Description [CONTAINER] Container where your steps intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. RAPIDS provider \u00b6 Available time segments Available for all time segments File Sequence - data/raw/ { pid } /fitbit_steps_intraday_raw.csv - data/raw/ { pid } /fitbit_steps_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_steps_intraday_features/fitbit_steps_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_steps_intraday.csv Parameters description for [FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_STEPS_INTRADAY features from the RAPIDS provider [FEATURES] Features to be computed from steps intraday data, see table below [THRESHOLD_ACTIVE_BOUT] Every minute with Fitbit steps data wil be labelled as sedentary if its step count is below this threshold, otherwise, active . [INCLUDE_ZERO_STEP_ROWS] Whether or not to include time segments with a 0 step count during the whole day. Features description for [FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS] : Feature Units Description sumsteps steps The total step count during a time segment. maxsteps steps The maximum step count during a time segment. minsteps steps The minimum step count during a time segment. avgsteps steps The average step count during a time segment. stdsteps steps The standard deviation of step count during a time segment. countepisodesedentarybout bouts Number of sedentary bouts during a time segment. sumdurationsedentarybout minutes Total duration of all sedentary bouts during a time segment. maxdurationsedentarybout minutes The maximum duration of any sedentary bout during a time segment. mindurationsedentarybout minutes The minimum duration of any sedentary bout during a time segment. avgdurationsedentarybout minutes The average duration of sedentary bouts during a time segment. stddurationsedentarybout minutes The standard deviation of the duration of sedentary bouts during a time segment. countepisodeactivebout bouts Number of active bouts during a time segment. sumdurationactivebout minutes Total duration of all active bouts during a time segment. maxdurationactivebout minutes The maximum duration of any active bout during a time segment. mindurationactivebout minutes The minimum duration of any active bout during a time segment. avgdurationactivebout minutes The average duration of active bouts during a time segment. stddurationactivebout minutes The standard deviation of the duration of active bouts during a time segment. Assumptions/Observations Active and sedentary bouts . If the step count per minute is smaller than THRESHOLD_ACTIVE_BOUT (default value is 10), that minute is labelled as sedentary, otherwise, is labelled as active. Active and sedentary bouts are periods of consecutive minutes labelled as active or sedentary .","title":"Fitbit Steps Intraday"},{"location":"features/fitbit-steps-intraday/#fitbit-steps-intraday","text":"Sensor parameters description for [FITBIT_STEPS_INTRADAY] : Key Description [CONTAINER] Container where your steps intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc.","title":"Fitbit Steps Intraday"},{"location":"features/fitbit-steps-intraday/#rapids-provider","text":"Available time segments Available for all time segments File Sequence - data/raw/ { pid } /fitbit_steps_intraday_raw.csv - data/raw/ { pid } /fitbit_steps_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_steps_intraday_features/fitbit_steps_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_steps_intraday.csv Parameters description for [FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_STEPS_INTRADAY features from the RAPIDS provider [FEATURES] Features to be computed from steps intraday data, see table below [THRESHOLD_ACTIVE_BOUT] Every minute with Fitbit steps data wil be labelled as sedentary if its step count is below this threshold, otherwise, active . [INCLUDE_ZERO_STEP_ROWS] Whether or not to include time segments with a 0 step count during the whole day. Features description for [FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS] : Feature Units Description sumsteps steps The total step count during a time segment. maxsteps steps The maximum step count during a time segment. minsteps steps The minimum step count during a time segment. avgsteps steps The average step count during a time segment. stdsteps steps The standard deviation of step count during a time segment. countepisodesedentarybout bouts Number of sedentary bouts during a time segment. sumdurationsedentarybout minutes Total duration of all sedentary bouts during a time segment. maxdurationsedentarybout minutes The maximum duration of any sedentary bout during a time segment. mindurationsedentarybout minutes The minimum duration of any sedentary bout during a time segment. avgdurationsedentarybout minutes The average duration of sedentary bouts during a time segment. stddurationsedentarybout minutes The standard deviation of the duration of sedentary bouts during a time segment. countepisodeactivebout bouts Number of active bouts during a time segment. sumdurationactivebout minutes Total duration of all active bouts during a time segment. maxdurationactivebout minutes The maximum duration of any active bout during a time segment. mindurationactivebout minutes The minimum duration of any active bout during a time segment. avgdurationactivebout minutes The average duration of active bouts during a time segment. stddurationactivebout minutes The standard deviation of the duration of active bouts during a time segment. Assumptions/Observations Active and sedentary bouts . If the step count per minute is smaller than THRESHOLD_ACTIVE_BOUT (default value is 10), that minute is labelled as sedentary, otherwise, is labelled as active. Active and sedentary bouts are periods of consecutive minutes labelled as active or sedentary .","title":"RAPIDS provider"},{"location":"features/fitbit-steps-summary/","text":"Fitbit Steps Summary \u00b6 Sensor parameters description for [FITBIT_STEPS_SUMMARY] : Key Description [CONTAINER] Container where your steps summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. RAPIDS provider \u00b6 Available time segments Only available for segments that span 1 or more complete days (e.g. Jan 1 st 00:00 to Jan 3 rd 23:59) File Sequence - data/raw/ { pid } /fitbit_steps_summary_raw.csv - data/raw/ { pid } /fitbit_steps_summary_with_datetime.csv - data/interim/ { pid } /fitbit_steps_summary_features/fitbit_steps_summary_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_steps_summary.csv Parameters description for [FITBIT_STEPS_SUMMARY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_STEPS_SUMMARY features from the RAPIDS provider [FEATURES] Features to be computed from steps summary data, see table below Features description for [FITBIT_STEPS_SUMMARY][PROVIDERS][RAPIDS] : Feature Units Description maxsumsteps steps The maximum daily step count during a time segment. minsumsteps steps The minimum daily step count during a time segment. avgsumsteps steps The average daily step count during a time segment. mediansumsteps steps The median of daily step count during a time segment. stdsumsteps steps The standard deviation of daily step count during a time segment. Assumptions/Observations NA","title":"Fitbit Steps Summary"},{"location":"features/fitbit-steps-summary/#fitbit-steps-summary","text":"Sensor parameters description for [FITBIT_STEPS_SUMMARY] : Key Description [CONTAINER] Container where your steps summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc.","title":"Fitbit Steps Summary"},{"location":"features/fitbit-steps-summary/#rapids-provider","text":"Available time segments Only available for segments that span 1 or more complete days (e.g. Jan 1 st 00:00 to Jan 3 rd 23:59) File Sequence - data/raw/ { pid } /fitbit_steps_summary_raw.csv - data/raw/ { pid } /fitbit_steps_summary_with_datetime.csv - data/interim/ { pid } /fitbit_steps_summary_features/fitbit_steps_summary_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_steps_summary.csv Parameters description for [FITBIT_STEPS_SUMMARY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_STEPS_SUMMARY features from the RAPIDS provider [FEATURES] Features to be computed from steps summary data, see table below Features description for [FITBIT_STEPS_SUMMARY][PROVIDERS][RAPIDS] : Feature Units Description maxsumsteps steps The maximum daily step count during a time segment. minsumsteps steps The minimum daily step count during a time segment. avgsumsteps steps The average daily step count during a time segment. mediansumsteps steps The median of daily step count during a time segment. stdsumsteps steps The standard deviation of daily step count during a time segment. Assumptions/Observations NA","title":"RAPIDS provider"},{"location":"features/phone-accelerometer/","text":"Phone Accelerometer \u00b6 Sensor parameters description for [PHONE_ACCELEROMETER] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the accelerometer data is stored RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_accelerometer_raw.csv - data/raw/ { pid } /phone_accelerometer_with_datetime.csv - data/interim/ { pid } /phone_accelerometer_features/phone_accelerometer_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_accelerometer.csv Parameters description for [PHONE_ACCELEROMETER][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_ACCELEROMETER features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_ACCELEROMETER][PROVIDERS][RAPIDS] : Feature Units Description maxmagnitude m/s 2 The maximum magnitude of acceleration ( \\(\\|acceleration\\| = \\sqrt{x^2 + y^2 + z^2}\\) ). minmagnitude m/s 2 The minimum magnitude of acceleration. avgmagnitude m/s 2 The average magnitude of acceleration. medianmagnitude m/s 2 The median magnitude of acceleration. stdmagnitude m/s 2 The standard deviation of acceleration. Assumptions/Observations Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is to big to fit in memory. We are considering different alternatives to overcome this problem. PANDA provider \u00b6 These features are based on the work by Panda et al . Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_accelerometer_raw.csv - data/raw/ { pid } /phone_accelerometer_with_datetime.csv - data/interim/ { pid } /phone_accelerometer_features/phone_accelerometer_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_accelerometer.csv Parameters description for [PHONE_ACCELEROMETER][PROVIDERS][PANDA] : Key Description [COMPUTE] Set to True to extract PHONE_ACCELEROMETER features from the PANDA provider [FEATURES] Features to be computed for exertional and non-exertional activity episodes, see table below Features description for [PHONE_ACCELEROMETER][PROVIDERS][PANDA] : Feature Units Description sumduration minutes Total duration of all exertional or non-exertional activity episodes. maxduration minutes Longest duration of any exertional or non-exertional activity episode. minduration minutes Shortest duration of any exertional or non-exertional activity episode. avgduration minutes Average duration of any exertional or non-exertional activity episode. medianduration minutes Median duration of any exertional or non-exertional activity episode. stdduration minutes Standard deviation of the duration of all exertional or non-exertional activity episodes. Assumptions/Observations Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is to big to fit in memory. We are considering different alternatives to overcome this problem. See Panda et al for a definition of exertional and non-exertional activity episodes","title":"Phone Accelerometer"},{"location":"features/phone-accelerometer/#phone-accelerometer","text":"Sensor parameters description for [PHONE_ACCELEROMETER] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the accelerometer data is stored","title":"Phone Accelerometer"},{"location":"features/phone-accelerometer/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_accelerometer_raw.csv - data/raw/ { pid } /phone_accelerometer_with_datetime.csv - data/interim/ { pid } /phone_accelerometer_features/phone_accelerometer_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_accelerometer.csv Parameters description for [PHONE_ACCELEROMETER][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_ACCELEROMETER features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_ACCELEROMETER][PROVIDERS][RAPIDS] : Feature Units Description maxmagnitude m/s 2 The maximum magnitude of acceleration ( \\(\\|acceleration\\| = \\sqrt{x^2 + y^2 + z^2}\\) ). minmagnitude m/s 2 The minimum magnitude of acceleration. avgmagnitude m/s 2 The average magnitude of acceleration. medianmagnitude m/s 2 The median magnitude of acceleration. stdmagnitude m/s 2 The standard deviation of acceleration. Assumptions/Observations Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is to big to fit in memory. We are considering different alternatives to overcome this problem.","title":"RAPIDS provider"},{"location":"features/phone-accelerometer/#panda-provider","text":"These features are based on the work by Panda et al . Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_accelerometer_raw.csv - data/raw/ { pid } /phone_accelerometer_with_datetime.csv - data/interim/ { pid } /phone_accelerometer_features/phone_accelerometer_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_accelerometer.csv Parameters description for [PHONE_ACCELEROMETER][PROVIDERS][PANDA] : Key Description [COMPUTE] Set to True to extract PHONE_ACCELEROMETER features from the PANDA provider [FEATURES] Features to be computed for exertional and non-exertional activity episodes, see table below Features description for [PHONE_ACCELEROMETER][PROVIDERS][PANDA] : Feature Units Description sumduration minutes Total duration of all exertional or non-exertional activity episodes. maxduration minutes Longest duration of any exertional or non-exertional activity episode. minduration minutes Shortest duration of any exertional or non-exertional activity episode. avgduration minutes Average duration of any exertional or non-exertional activity episode. medianduration minutes Median duration of any exertional or non-exertional activity episode. stdduration minutes Standard deviation of the duration of all exertional or non-exertional activity episodes. Assumptions/Observations Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is to big to fit in memory. We are considering different alternatives to overcome this problem. See Panda et al for a definition of exertional and non-exertional activity episodes","title":"PANDA provider"},{"location":"features/phone-activity-recognition/","text":"Phone Activity Recognition \u00b6 Sensor parameters description for [PHONE_ACTIVITY_RECOGNITION] : Key Description [CONTAINER][ANDROID] Data stream container (database table, CSV file, etc.) where the activity data from Android devices is stored (the AWARE client saves this data on different tables for Android and iOS) [CONTAINER][IOS] Data stream container (database table, CSV file, etc.) where the activity data from iOS devices is stored (the AWARE client saves this data on different tables for Android and iOS) [EPISODE_THRESHOLD_BETWEEN_ROWS] Difference in minutes between any two rows for them to be considered part of the same activity episode RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_activity_recognition_raw.csv - data/raw/ { pid } /phone_activity_recognition_with_datetime.csv - data/interim/ { pid } /phone_activity_recognition_episodes.csv - data/interim/ { pid } /phone_activity_recognition_episodes_resampled.csv - data/interim/ { pid } /phone_activity_recognition_episodes_resampled_with_datetime.csv - data/interim/ { pid } /phone_activity_recognition_features/phone_activity_recognition_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_activity_recognition.csv Parameters description for [PHONE_ACTIVITY_RECOGNITION][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_ACTIVITY_RECOGNITION features from the RAPIDS provider [FEATURES] Features to be computed, see table below [ACTIVITY_CLASSES][STATIONARY] An array of the activity labels to be considered in the STATIONARY category choose any of still , tilting [ACTIVITY_CLASSES][MOBILE] An array of the activity labels to be considered in the MOBILE category choose any of on_foot , walking , running , on_bicycle [ACTIVITY_CLASSES][VEHICLE] An array of the activity labels to be considered in the VEHICLE category choose any of in_vehicule Features description for [PHONE_ACTIVITY_RECOGNITION][PROVIDERS][RAPIDS] : Feature Units Description count rows Number of episodes. mostcommonactivity activity type The most common activity type (e.g. still , on_foot , etc.). If there is a tie, the first one is chosen. countuniqueactivities activity type Number of unique activities. durationstationary minutes The total duration of [ACTIVITY_CLASSES][STATIONARY] episodes durationmobile minutes The total duration of [ACTIVITY_CLASSES][MOBILE] episodes of on foot, running, and on bicycle activities durationvehicle minutes The total duration of [ACTIVITY_CLASSES][VEHICLE] episodes of on vehicle activity Assumptions/Observations iOS Activity Recognition names and types are unified with Android labels: iOS Activity Name Android Activity Name Android Activity Type walking walking 7 running running 8 cycling on_bicycle 1 automotive in_vehicle 0 stationary still 3 unknown unknown 4 In AWARE, Activity Recognition data for Android and iOS are stored in two different database tables, RAPIDS automatically infers what platform each participant belongs to based on their participant file .","title":"Phone Activity Recognition"},{"location":"features/phone-activity-recognition/#phone-activity-recognition","text":"Sensor parameters description for [PHONE_ACTIVITY_RECOGNITION] : Key Description [CONTAINER][ANDROID] Data stream container (database table, CSV file, etc.) where the activity data from Android devices is stored (the AWARE client saves this data on different tables for Android and iOS) [CONTAINER][IOS] Data stream container (database table, CSV file, etc.) where the activity data from iOS devices is stored (the AWARE client saves this data on different tables for Android and iOS) [EPISODE_THRESHOLD_BETWEEN_ROWS] Difference in minutes between any two rows for them to be considered part of the same activity episode","title":"Phone Activity Recognition"},{"location":"features/phone-activity-recognition/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_activity_recognition_raw.csv - data/raw/ { pid } /phone_activity_recognition_with_datetime.csv - data/interim/ { pid } /phone_activity_recognition_episodes.csv - data/interim/ { pid } /phone_activity_recognition_episodes_resampled.csv - data/interim/ { pid } /phone_activity_recognition_episodes_resampled_with_datetime.csv - data/interim/ { pid } /phone_activity_recognition_features/phone_activity_recognition_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_activity_recognition.csv Parameters description for [PHONE_ACTIVITY_RECOGNITION][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_ACTIVITY_RECOGNITION features from the RAPIDS provider [FEATURES] Features to be computed, see table below [ACTIVITY_CLASSES][STATIONARY] An array of the activity labels to be considered in the STATIONARY category choose any of still , tilting [ACTIVITY_CLASSES][MOBILE] An array of the activity labels to be considered in the MOBILE category choose any of on_foot , walking , running , on_bicycle [ACTIVITY_CLASSES][VEHICLE] An array of the activity labels to be considered in the VEHICLE category choose any of in_vehicule Features description for [PHONE_ACTIVITY_RECOGNITION][PROVIDERS][RAPIDS] : Feature Units Description count rows Number of episodes. mostcommonactivity activity type The most common activity type (e.g. still , on_foot , etc.). If there is a tie, the first one is chosen. countuniqueactivities activity type Number of unique activities. durationstationary minutes The total duration of [ACTIVITY_CLASSES][STATIONARY] episodes durationmobile minutes The total duration of [ACTIVITY_CLASSES][MOBILE] episodes of on foot, running, and on bicycle activities durationvehicle minutes The total duration of [ACTIVITY_CLASSES][VEHICLE] episodes of on vehicle activity Assumptions/Observations iOS Activity Recognition names and types are unified with Android labels: iOS Activity Name Android Activity Name Android Activity Type walking walking 7 running running 8 cycling on_bicycle 1 automotive in_vehicle 0 stationary still 3 unknown unknown 4 In AWARE, Activity Recognition data for Android and iOS are stored in two different database tables, RAPIDS automatically infers what platform each participant belongs to based on their participant file .","title":"RAPIDS provider"},{"location":"features/phone-applications-crashes/","text":"Phone Applications Crashes \u00b6 Sensor parameters description for [PHONE_APPLICATIONS_CRASHES] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the applications crashes data is stored [APPLICATION_CATEGORIES][CATALOGUE_SOURCE] FILE or GOOGLE . If FILE , app categories (genres) are read from [CATALOGUE_FILE] . If [GOOGLE] , app categories (genres) are scrapped from the Play Store [APPLICATION_CATEGORIES][CATALOGUE_FILE] CSV file with a package_name and genre column. By default we provide the catalogue created by Stachl et al in data/external/stachl_application_genre_catalogue.csv [APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE] if [CATALOGUE_SOURCE] is equal to FILE , this flag signals whether or not to update [CATALOGUE_FILE] , if [CATALOGUE_SOURCE] is equal to GOOGLE all scraped genres will be saved to [CATALOGUE_FILE] [APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES] This flag signals whether or not to scrape categories (genres) missing from the [CATALOGUE_FILE] . If [CATALOGUE_SOURCE] is equal to GOOGLE , all genres are scraped anyway (this flag is ignored) Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_APPLICATIONS_CRASHES ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Applications Crashes"},{"location":"features/phone-applications-crashes/#phone-applications-crashes","text":"Sensor parameters description for [PHONE_APPLICATIONS_CRASHES] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the applications crashes data is stored [APPLICATION_CATEGORIES][CATALOGUE_SOURCE] FILE or GOOGLE . If FILE , app categories (genres) are read from [CATALOGUE_FILE] . If [GOOGLE] , app categories (genres) are scrapped from the Play Store [APPLICATION_CATEGORIES][CATALOGUE_FILE] CSV file with a package_name and genre column. By default we provide the catalogue created by Stachl et al in data/external/stachl_application_genre_catalogue.csv [APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE] if [CATALOGUE_SOURCE] is equal to FILE , this flag signals whether or not to update [CATALOGUE_FILE] , if [CATALOGUE_SOURCE] is equal to GOOGLE all scraped genres will be saved to [CATALOGUE_FILE] [APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES] This flag signals whether or not to scrape categories (genres) missing from the [CATALOGUE_FILE] . If [CATALOGUE_SOURCE] is equal to GOOGLE , all genres are scraped anyway (this flag is ignored) Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_APPLICATIONS_CRASHES ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Applications Crashes"},{"location":"features/phone-applications-foreground/","text":"Phone Applications Foreground \u00b6 Sensor parameters description for [PHONE_APPLICATIONS_FOREGROUND] (these parameters are used by the only provider available at the moment, RAPIDS): Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the applications foreground data is stored [APPLICATION_CATEGORIES][CATALOGUE_SOURCE] FILE or GOOGLE . If FILE , app categories (genres) are read from [CATALOGUE_FILE] . If [GOOGLE] , app categories (genres) are scrapped from the Play Store [APPLICATION_CATEGORIES][CATALOGUE_FILE] CSV file with a package_name and genre column. By default we provide the catalogue created by Stachl et al in data/external/stachl_application_genre_catalogue.csv [APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE] if [CATALOGUE_SOURCE] is equal to FILE , this flag signals whether or not to update [CATALOGUE_FILE] , if [CATALOGUE_SOURCE] is equal to GOOGLE all scraped genres will be saved to [CATALOGUE_FILE] [APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES] This flag signals whether or not to scrape categories (genres) missing from the [CATALOGUE_FILE] . If [CATALOGUE_SOURCE] is equal to GOOGLE , all genres are scraped anyway (this flag is ignored) RAPIDS provider \u00b6 The app category (genre) catalogue used in these features was originally created by Stachl et al . Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_applications_foreground_raw.csv - data/raw/ { pid } /phone_applications_foreground_with_datetime.csv - data/raw/ { pid } /phone_applications_foreground_with_datetime_with_categories.csv - data/interim/ { pid } /phone_applications_foreground_features/phone_applications_foreground_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_applications_foreground.csv Parameters description for [PHONE_APPLICATIONS_FOREGROUND][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_APPLICATIONS_FOREGROUND features from the RAPIDS provider [FEATURES] Features to be computed, see table below [SINGLE_CATEGORIES] An array of app categories to be included in the feature extraction computation. The special keyword all represents a category with all the apps from each participant. By default we use the category catalogue pointed by [APPLICATION_CATEGORIES][CATALOGUE_FILE] (see the Sensor parameters description table above) [MULTIPLE_CATEGORIES] An array of collections representing meta-categories (a group of categories). They key of each element is the name of the meta-category and the value is an array of member app categories. By default we use the category catalogue pointed by [APPLICATION_CATEGORIES][CATALOGUE_FILE] (see the Sensor parameters description table above) [SINGLE_APPS] An array of apps to be included in the feature extraction computation. Use their package name (e.g. com.google.android.youtube ) or the reserved keyword top1global (the most used app by a participant over the whole monitoring study) [EXCLUDED_CATEGORIES] An array of app categories to be excluded from the feature extraction computation. By default we use the category catalogue pointed by [APPLICATION_CATEGORIES][CATALOGUE_FILE] (see the Sensor parameters description table above) [EXCLUDED_APPS] An array of apps to be excluded from the feature extraction computation. Use their package name, for example: com.google.android.youtube Features description for [PHONE_APPLICATIONS_FOREGROUND][PROVIDERS][RAPIDS] : Feature Units Description count apps Number of times a single app or apps within a category were used (i.e. they were brought to the foreground either by tapping their icon or switching to it from another app) timeoffirstuse minutes The time in minutes between 12:00am (midnight) and the first use of a single app or apps within a category during a time_segment timeoflastuse minutes The time in minutes between 12:00am (midnight) and the last use of a single app or apps within a category during a time_segment frequencyentropy nats The entropy of the used apps within a category during a time_segment (each app is seen as a unique event, the more apps were used, the higher the entropy). This is especially relevant when computed over all apps. Entropy cannot be obtained for a single app Assumptions/Observations Features can be computed by app, by apps grouped under a single category (genre) and by multiple categories grouped together (meta-categories). For example, we can get features for Facebook (single app), for Social Network apps (a category including Facebook and other social media apps) or for Social (a meta-category formed by Social Network and Social Media Tools categories). Apps installed by default like YouTube are considered systems apps on some phones. We do an exact match to exclude apps where \u201cgenre\u201d == EXCLUDED_CATEGORIES or \u201cpackage_name\u201d == EXCLUDED_APPS . We provide three ways of classifying and app within a category (genre): a) by automatically scraping its official category from the Google Play Store, b) by using the catalogue created by Stachl et al. which we provide in RAPIDS ( data/external/stachl_application_genre_catalogue.csv ), or c) by manually creating a personalized catalogue. You can choose a, b or c by modifying [APPLICATION_GENRES] keys and values (see the Sensor parameters description table above).","title":"Phone Applications Foreground"},{"location":"features/phone-applications-foreground/#phone-applications-foreground","text":"Sensor parameters description for [PHONE_APPLICATIONS_FOREGROUND] (these parameters are used by the only provider available at the moment, RAPIDS): Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the applications foreground data is stored [APPLICATION_CATEGORIES][CATALOGUE_SOURCE] FILE or GOOGLE . If FILE , app categories (genres) are read from [CATALOGUE_FILE] . If [GOOGLE] , app categories (genres) are scrapped from the Play Store [APPLICATION_CATEGORIES][CATALOGUE_FILE] CSV file with a package_name and genre column. By default we provide the catalogue created by Stachl et al in data/external/stachl_application_genre_catalogue.csv [APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE] if [CATALOGUE_SOURCE] is equal to FILE , this flag signals whether or not to update [CATALOGUE_FILE] , if [CATALOGUE_SOURCE] is equal to GOOGLE all scraped genres will be saved to [CATALOGUE_FILE] [APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES] This flag signals whether or not to scrape categories (genres) missing from the [CATALOGUE_FILE] . If [CATALOGUE_SOURCE] is equal to GOOGLE , all genres are scraped anyway (this flag is ignored)","title":"Phone Applications Foreground"},{"location":"features/phone-applications-foreground/#rapids-provider","text":"The app category (genre) catalogue used in these features was originally created by Stachl et al . Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_applications_foreground_raw.csv - data/raw/ { pid } /phone_applications_foreground_with_datetime.csv - data/raw/ { pid } /phone_applications_foreground_with_datetime_with_categories.csv - data/interim/ { pid } /phone_applications_foreground_features/phone_applications_foreground_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_applications_foreground.csv Parameters description for [PHONE_APPLICATIONS_FOREGROUND][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_APPLICATIONS_FOREGROUND features from the RAPIDS provider [FEATURES] Features to be computed, see table below [SINGLE_CATEGORIES] An array of app categories to be included in the feature extraction computation. The special keyword all represents a category with all the apps from each participant. By default we use the category catalogue pointed by [APPLICATION_CATEGORIES][CATALOGUE_FILE] (see the Sensor parameters description table above) [MULTIPLE_CATEGORIES] An array of collections representing meta-categories (a group of categories). They key of each element is the name of the meta-category and the value is an array of member app categories. By default we use the category catalogue pointed by [APPLICATION_CATEGORIES][CATALOGUE_FILE] (see the Sensor parameters description table above) [SINGLE_APPS] An array of apps to be included in the feature extraction computation. Use their package name (e.g. com.google.android.youtube ) or the reserved keyword top1global (the most used app by a participant over the whole monitoring study) [EXCLUDED_CATEGORIES] An array of app categories to be excluded from the feature extraction computation. By default we use the category catalogue pointed by [APPLICATION_CATEGORIES][CATALOGUE_FILE] (see the Sensor parameters description table above) [EXCLUDED_APPS] An array of apps to be excluded from the feature extraction computation. Use their package name, for example: com.google.android.youtube Features description for [PHONE_APPLICATIONS_FOREGROUND][PROVIDERS][RAPIDS] : Feature Units Description count apps Number of times a single app or apps within a category were used (i.e. they were brought to the foreground either by tapping their icon or switching to it from another app) timeoffirstuse minutes The time in minutes between 12:00am (midnight) and the first use of a single app or apps within a category during a time_segment timeoflastuse minutes The time in minutes between 12:00am (midnight) and the last use of a single app or apps within a category during a time_segment frequencyentropy nats The entropy of the used apps within a category during a time_segment (each app is seen as a unique event, the more apps were used, the higher the entropy). This is especially relevant when computed over all apps. Entropy cannot be obtained for a single app Assumptions/Observations Features can be computed by app, by apps grouped under a single category (genre) and by multiple categories grouped together (meta-categories). For example, we can get features for Facebook (single app), for Social Network apps (a category including Facebook and other social media apps) or for Social (a meta-category formed by Social Network and Social Media Tools categories). Apps installed by default like YouTube are considered systems apps on some phones. We do an exact match to exclude apps where \u201cgenre\u201d == EXCLUDED_CATEGORIES or \u201cpackage_name\u201d == EXCLUDED_APPS . We provide three ways of classifying and app within a category (genre): a) by automatically scraping its official category from the Google Play Store, b) by using the catalogue created by Stachl et al. which we provide in RAPIDS ( data/external/stachl_application_genre_catalogue.csv ), or c) by manually creating a personalized catalogue. You can choose a, b or c by modifying [APPLICATION_GENRES] keys and values (see the Sensor parameters description table above).","title":"RAPIDS provider"},{"location":"features/phone-applications-notifications/","text":"Phone Applications Notifications \u00b6 Sensor parameters description for [PHONE_APPLICATIONS_NOTIFICATIONS] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the applications notifications data is stored [APPLICATION_CATEGORIES][CATALOGUE_SOURCE] FILE or GOOGLE . If FILE , app categories (genres) are read from [CATALOGUE_FILE] . If [GOOGLE] , app categories (genres) are scrapped from the Play Store [APPLICATION_CATEGORIES][CATALOGUE_FILE] CSV file with a package_name and genre column. By default we provide the catalogue created by Stachl et al in data/external/stachl_application_genre_catalogue.csv [APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE] if [CATALOGUE_SOURCE] is equal to FILE , this flag signals whether or not to update [CATALOGUE_FILE] , if [CATALOGUE_SOURCE] is equal to GOOGLE all scraped genres will be saved to [CATALOGUE_FILE] [APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES] This flag signals whether or not to scrape categories (genres) missing from the [CATALOGUE_FILE] . If [CATALOGUE_SOURCE] is equal to GOOGLE , all genres are scraped anyway (this flag is ignored) Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_APPLICATIONS_NOTIFICATIONS ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Applications Notifications"},{"location":"features/phone-applications-notifications/#phone-applications-notifications","text":"Sensor parameters description for [PHONE_APPLICATIONS_NOTIFICATIONS] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the applications notifications data is stored [APPLICATION_CATEGORIES][CATALOGUE_SOURCE] FILE or GOOGLE . If FILE , app categories (genres) are read from [CATALOGUE_FILE] . If [GOOGLE] , app categories (genres) are scrapped from the Play Store [APPLICATION_CATEGORIES][CATALOGUE_FILE] CSV file with a package_name and genre column. By default we provide the catalogue created by Stachl et al in data/external/stachl_application_genre_catalogue.csv [APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE] if [CATALOGUE_SOURCE] is equal to FILE , this flag signals whether or not to update [CATALOGUE_FILE] , if [CATALOGUE_SOURCE] is equal to GOOGLE all scraped genres will be saved to [CATALOGUE_FILE] [APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES] This flag signals whether or not to scrape categories (genres) missing from the [CATALOGUE_FILE] . If [CATALOGUE_SOURCE] is equal to GOOGLE , all genres are scraped anyway (this flag is ignored) Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_APPLICATIONS_NOTIFICATIONS ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Applications Notifications"},{"location":"features/phone-battery/","text":"Phone Battery \u00b6 Sensor parameters description for [PHONE_BATTERY] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the battery data is stored [EPISODE_THRESHOLD_BETWEEN_ROWS] Difference in minutes between any two rows for them to be considered part of the same battery charge or discharge episode RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_battery_raw.csv - data/interim/ { pid } /phone_battery_episodes.csv - data/interim/ { pid } /phone_battery_episodes_resampled.csv - data/interim/ { pid } /phone_battery_episodes_resampled_with_datetime.csv - data/interim/ { pid } /phone_battery_features/phone_battery_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_battery.csv Parameters description for [PHONE_BATTERY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_BATTERY features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_BATTERY][PROVIDERS][RAPIDS] : Feature Units Description countdischarge episodes Number of discharging episodes. sumdurationdischarge minutes The total duration of all discharging episodes. countcharge episodes Number of battery charging episodes. sumdurationcharge minutes The total duration of all charging episodes. avgconsumptionrate episodes/minutes The average of all episodes\u2019 consumption rates. An episode\u2019s consumption rate is defined as the ratio between its battery delta and duration maxconsumptionrate episodes/minutes The highest of all episodes\u2019 consumption rates. An episode\u2019s consumption rate is defined as the ratio between its battery delta and duration Assumptions/Observations We convert battery data collected with iOS client v1 (autodetected because battery status 4 do not exist) to match Android battery format: we swap status 3 for 5 and 1 for 3 We group battery data into discharge or charge episodes considering any contiguous rows with consecutive reductions or increases of the battery level if they are logged within [EPISODE_THRESHOLD_BETWEEN_ROWS] minutes from each other.","title":"Phone Battery"},{"location":"features/phone-battery/#phone-battery","text":"Sensor parameters description for [PHONE_BATTERY] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the battery data is stored [EPISODE_THRESHOLD_BETWEEN_ROWS] Difference in minutes between any two rows for them to be considered part of the same battery charge or discharge episode","title":"Phone Battery"},{"location":"features/phone-battery/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_battery_raw.csv - data/interim/ { pid } /phone_battery_episodes.csv - data/interim/ { pid } /phone_battery_episodes_resampled.csv - data/interim/ { pid } /phone_battery_episodes_resampled_with_datetime.csv - data/interim/ { pid } /phone_battery_features/phone_battery_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_battery.csv Parameters description for [PHONE_BATTERY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_BATTERY features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_BATTERY][PROVIDERS][RAPIDS] : Feature Units Description countdischarge episodes Number of discharging episodes. sumdurationdischarge minutes The total duration of all discharging episodes. countcharge episodes Number of battery charging episodes. sumdurationcharge minutes The total duration of all charging episodes. avgconsumptionrate episodes/minutes The average of all episodes\u2019 consumption rates. An episode\u2019s consumption rate is defined as the ratio between its battery delta and duration maxconsumptionrate episodes/minutes The highest of all episodes\u2019 consumption rates. An episode\u2019s consumption rate is defined as the ratio between its battery delta and duration Assumptions/Observations We convert battery data collected with iOS client v1 (autodetected because battery status 4 do not exist) to match Android battery format: we swap status 3 for 5 and 1 for 3 We group battery data into discharge or charge episodes considering any contiguous rows with consecutive reductions or increases of the battery level if they are logged within [EPISODE_THRESHOLD_BETWEEN_ROWS] minutes from each other.","title":"RAPIDS provider"},{"location":"features/phone-bluetooth/","text":"Phone Bluetooth \u00b6 Sensor parameters description for [PHONE_BLUETOOTH] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the bluetooth data is stored RAPIDS provider \u00b6 Warning The features of this provider are deprecated in favor of DORYAB provider (see below). Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_bluetooth_raw.csv - data/raw/ { pid } /phone_bluetooth_with_datetime.csv - data/interim/ { pid } /phone_bluetooth_features/phone_bluetooth_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_bluetooth.csv \" Parameters description for [PHONE_BLUETOOTH][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_BLUETOOTH features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_BLUETOOTH][PROVIDERS][RAPIDS] : Feature Units Description countscans devices Number of scanned devices during a time segment, a device can be detected multiple times over time and these appearances are counted separately uniquedevices devices Number of unique devices during a time segment as identified by their hardware ( bt_address ) address countscansmostuniquedevice scans Number of scans of the most sensed device within each time segment instance Assumptions/Observations From v0.2.0 countscans , uniquedevices , countscansmostuniquedevice were deprecated because they overlap with the respective features for ALL devices of the PHONE_BLUETOOTH DORYAB provider DORYAB provider \u00b6 This provider is adapted from the work by Doryab et al . Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_bluetooth_raw.csv - data/raw/ { pid } /phone_bluetooth_with_datetime.csv - data/interim/ { pid } /phone_bluetooth_features/phone_bluetooth_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_bluetooth.csv \" Parameters description for [PHONE_BLUETOOTH][PROVIDERS][DORYAB] : Key Description [COMPUTE] Set to True to extract PHONE_BLUETOOTH features from the DORYAB provider [FEATURES] Features to be computed, see table below. These features are computed for three device categories: all devices, own devices and other devices. Features description for [PHONE_BLUETOOTH][PROVIDERS][DORYAB] : Feature Units Description countscans scans Number of scans (rows) from the devices sensed during a time segment instance. The more scans a bluetooth device has the longer it remained within range of the participant\u2019s phone uniquedevices devices Number of unique bluetooth devices sensed during a time segment instance as identified by their hardware addresses ( bt_address ) meanscans scans Mean of the scans of every sensed device within each time segment instance stdscans scans Standard deviation of the scans of every sensed device within each time segment instance countscans most frequentdevice within segments scans Number of scans of the most sensed device within each time segment instance countscans least frequentdevice within segments scans Number of scans of the least sensed device within each time segment instance countscans most frequentdevice across segments scans Number of scans of the most sensed device across time segment instances of the same type countscans least frequentdevice across segments scans Number of scans of the least sensed device across time segment instances of the same type per device countscans most frequentdevice acrossdataset scans Number of scans of the most sensed device across the entire dataset of every participant countscans least frequentdevice acrossdataset scans Number of scans of the least sensed device across the entire dataset of every participant Assumptions/Observations Devices are classified as belonging to the participant ( own ) or to other people ( others ) using k-means based on the number of times and the number of days each device was detected across each participant\u2019s dataset. See Doryab et al for more details. If ownership cannot be computed because all devices were detected on only one day, they are all considered as other . Thus all and other features will be equal. The likelihood of this scenario decreases the more days of data you have. The most and least frequent devices will be the same across time segment instances and across the entire dataset when every time segment instance covers every hour of a dataset. For example, daily segments (00:00 to 23:59) fall in this category but morning segments (06:00am to 11:59am) or periodic 30-minute segments don\u2019t. Example Simplified raw bluetooth data The following is a simplified example with bluetooth data from three days and two time segments: morning and afternoon. There are two own devices: 5C836F5-487E-405F-8E28-21DBD40FA4FF detected seven times across two days and 499A1EAF-DDF1-4657-986C-EA5032104448 detected eight times on a single day. local_date segment bt_address own_device 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 48872A52-68DE-420D-98DA-73339A1C4685 0 2016-11-29 afternoon 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 afternoon 48872A52-68DE-420D-98DA-73339A1C4685 0 2016-11-30 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-30 morning 48872A52-68DE-420D-98DA-73339A1C4685 0 2016-11-30 morning 25262DC7-780C-4AD5-AD3A-D9776AEF7FC1 0 2016-11-30 morning 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2016-11-30 morning 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2016-11-30 afternoon 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2017-05-07 morning 5C5A9C41-2F68-4CEB-96D0-77DE3729B729 0 2017-05-07 morning 25262DC7-780C-4AD5-AD3A-D9776AEF7FC1 0 2017-05-07 morning 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2017-05-07 morning 6C444841-FE64-4375-BC3F-FA410CDC0AC7 0 2017-05-07 morning 4DC7A22D-9F1F-4DEF-8576-086910AABCB5 0 2017-05-07 afternoon 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 The most and least frequent OTHER devices ( own_device == 0 ) during morning segments The most and least frequent ALL | OWN | OTHER devices are computed within each time segment instance, across time segment instances of the same type and across the entire dataset of each person. These are the most and least frequent devices for OTHER devices during morning segments. most frequent device across 2016-11-29 morning: '48872A52-68DE-420D-98DA-73339A1C4685' (this device is the only one in this instance) least frequent device across 2016-11-29 morning: '48872A52-68DE-420D-98DA-73339A1C4685' (this device is the only one in this instance) most frequent device across 2016-11-30 morning: '5B1E6981-2E50-4D9A-99D8-67AED430C5A8' least frequent device across 2016-11-30 morning: '25262DC7-780C-4AD5-AD3A-D9776AEF7FC1' (when tied, the first occurance is chosen) most frequent device across 2017-05-07 morning: '25262DC7-780C-4AD5-AD3A-D9776AEF7FC1' (when tied, the first occurance is chosen) least frequent device across 2017-05-07 morning: '25262DC7-780C-4AD5-AD3A-D9776AEF7FC1' (when tied, the first occurance is chosen) most frequent across morning segments: '5B1E6981-2E50-4D9A-99D8-67AED430C5A8' least frequent across morning segments: '6C444841-FE64-4375-BC3F-FA410CDC0AC7' (when tied, the first occurance is chosen) most frequent across dataset: '499A1EAF-DDF1-4657-986C-EA5032104448' (only taking into account \"morning\" segments) least frequent across dataset: '4DC7A22D-9F1F-4DEF-8576-086910AABCB5' (when tied, the first occurance is chosen) Bluetooth features for OTHER devices and morning segments For brevity we only show the following features for morning segments: OTHER : DEVICES : [ \"countscans\" , \"uniquedevices\" , \"meanscans\" , \"stdscans\" ] SCANS_MOST_FREQUENT_DEVICE : [ \"withinsegments\" , \"acrosssegments\" , \"acrossdataset\" ] Note that countscansmostfrequentdeviceacrossdatasetothers is all 0 s because 499A1EAF-DDF1-4657-986C-EA5032104448 is excluded from the count as is labelled as an own device (not other ). local_segment countscansothers uniquedevicesothers meanscansothers stdscansothers countscansmostfrequentdevicewithinsegmentsothers countscansmostfrequentdeviceacrosssegmentsothers countscansmostfrequentdeviceacrossdatasetothers 2016-11-29-morning 1 1 1.000000 NaN 1 0.0 0.0 2016-11-30-morning 4 3 1.333333 0.57735 2 2.0 2.0 2017-05-07-morning 5 5 1.000000 0.00000 1 1.0 1.0","title":"Phone Bluetooth"},{"location":"features/phone-bluetooth/#phone-bluetooth","text":"Sensor parameters description for [PHONE_BLUETOOTH] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the bluetooth data is stored","title":"Phone Bluetooth"},{"location":"features/phone-bluetooth/#rapids-provider","text":"Warning The features of this provider are deprecated in favor of DORYAB provider (see below). Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_bluetooth_raw.csv - data/raw/ { pid } /phone_bluetooth_with_datetime.csv - data/interim/ { pid } /phone_bluetooth_features/phone_bluetooth_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_bluetooth.csv \" Parameters description for [PHONE_BLUETOOTH][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_BLUETOOTH features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_BLUETOOTH][PROVIDERS][RAPIDS] : Feature Units Description countscans devices Number of scanned devices during a time segment, a device can be detected multiple times over time and these appearances are counted separately uniquedevices devices Number of unique devices during a time segment as identified by their hardware ( bt_address ) address countscansmostuniquedevice scans Number of scans of the most sensed device within each time segment instance Assumptions/Observations From v0.2.0 countscans , uniquedevices , countscansmostuniquedevice were deprecated because they overlap with the respective features for ALL devices of the PHONE_BLUETOOTH DORYAB provider","title":"RAPIDS provider"},{"location":"features/phone-bluetooth/#doryab-provider","text":"This provider is adapted from the work by Doryab et al . Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_bluetooth_raw.csv - data/raw/ { pid } /phone_bluetooth_with_datetime.csv - data/interim/ { pid } /phone_bluetooth_features/phone_bluetooth_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_bluetooth.csv \" Parameters description for [PHONE_BLUETOOTH][PROVIDERS][DORYAB] : Key Description [COMPUTE] Set to True to extract PHONE_BLUETOOTH features from the DORYAB provider [FEATURES] Features to be computed, see table below. These features are computed for three device categories: all devices, own devices and other devices. Features description for [PHONE_BLUETOOTH][PROVIDERS][DORYAB] : Feature Units Description countscans scans Number of scans (rows) from the devices sensed during a time segment instance. The more scans a bluetooth device has the longer it remained within range of the participant\u2019s phone uniquedevices devices Number of unique bluetooth devices sensed during a time segment instance as identified by their hardware addresses ( bt_address ) meanscans scans Mean of the scans of every sensed device within each time segment instance stdscans scans Standard deviation of the scans of every sensed device within each time segment instance countscans most frequentdevice within segments scans Number of scans of the most sensed device within each time segment instance countscans least frequentdevice within segments scans Number of scans of the least sensed device within each time segment instance countscans most frequentdevice across segments scans Number of scans of the most sensed device across time segment instances of the same type countscans least frequentdevice across segments scans Number of scans of the least sensed device across time segment instances of the same type per device countscans most frequentdevice acrossdataset scans Number of scans of the most sensed device across the entire dataset of every participant countscans least frequentdevice acrossdataset scans Number of scans of the least sensed device across the entire dataset of every participant Assumptions/Observations Devices are classified as belonging to the participant ( own ) or to other people ( others ) using k-means based on the number of times and the number of days each device was detected across each participant\u2019s dataset. See Doryab et al for more details. If ownership cannot be computed because all devices were detected on only one day, they are all considered as other . Thus all and other features will be equal. The likelihood of this scenario decreases the more days of data you have. The most and least frequent devices will be the same across time segment instances and across the entire dataset when every time segment instance covers every hour of a dataset. For example, daily segments (00:00 to 23:59) fall in this category but morning segments (06:00am to 11:59am) or periodic 30-minute segments don\u2019t. Example Simplified raw bluetooth data The following is a simplified example with bluetooth data from three days and two time segments: morning and afternoon. There are two own devices: 5C836F5-487E-405F-8E28-21DBD40FA4FF detected seven times across two days and 499A1EAF-DDF1-4657-986C-EA5032104448 detected eight times on a single day. local_date segment bt_address own_device 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 48872A52-68DE-420D-98DA-73339A1C4685 0 2016-11-29 afternoon 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 afternoon 48872A52-68DE-420D-98DA-73339A1C4685 0 2016-11-30 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-30 morning 48872A52-68DE-420D-98DA-73339A1C4685 0 2016-11-30 morning 25262DC7-780C-4AD5-AD3A-D9776AEF7FC1 0 2016-11-30 morning 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2016-11-30 morning 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2016-11-30 afternoon 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2017-05-07 morning 5C5A9C41-2F68-4CEB-96D0-77DE3729B729 0 2017-05-07 morning 25262DC7-780C-4AD5-AD3A-D9776AEF7FC1 0 2017-05-07 morning 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2017-05-07 morning 6C444841-FE64-4375-BC3F-FA410CDC0AC7 0 2017-05-07 morning 4DC7A22D-9F1F-4DEF-8576-086910AABCB5 0 2017-05-07 afternoon 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 The most and least frequent OTHER devices ( own_device == 0 ) during morning segments The most and least frequent ALL | OWN | OTHER devices are computed within each time segment instance, across time segment instances of the same type and across the entire dataset of each person. These are the most and least frequent devices for OTHER devices during morning segments. most frequent device across 2016-11-29 morning: '48872A52-68DE-420D-98DA-73339A1C4685' (this device is the only one in this instance) least frequent device across 2016-11-29 morning: '48872A52-68DE-420D-98DA-73339A1C4685' (this device is the only one in this instance) most frequent device across 2016-11-30 morning: '5B1E6981-2E50-4D9A-99D8-67AED430C5A8' least frequent device across 2016-11-30 morning: '25262DC7-780C-4AD5-AD3A-D9776AEF7FC1' (when tied, the first occurance is chosen) most frequent device across 2017-05-07 morning: '25262DC7-780C-4AD5-AD3A-D9776AEF7FC1' (when tied, the first occurance is chosen) least frequent device across 2017-05-07 morning: '25262DC7-780C-4AD5-AD3A-D9776AEF7FC1' (when tied, the first occurance is chosen) most frequent across morning segments: '5B1E6981-2E50-4D9A-99D8-67AED430C5A8' least frequent across morning segments: '6C444841-FE64-4375-BC3F-FA410CDC0AC7' (when tied, the first occurance is chosen) most frequent across dataset: '499A1EAF-DDF1-4657-986C-EA5032104448' (only taking into account \"morning\" segments) least frequent across dataset: '4DC7A22D-9F1F-4DEF-8576-086910AABCB5' (when tied, the first occurance is chosen) Bluetooth features for OTHER devices and morning segments For brevity we only show the following features for morning segments: OTHER : DEVICES : [ \"countscans\" , \"uniquedevices\" , \"meanscans\" , \"stdscans\" ] SCANS_MOST_FREQUENT_DEVICE : [ \"withinsegments\" , \"acrosssegments\" , \"acrossdataset\" ] Note that countscansmostfrequentdeviceacrossdatasetothers is all 0 s because 499A1EAF-DDF1-4657-986C-EA5032104448 is excluded from the count as is labelled as an own device (not other ). local_segment countscansothers uniquedevicesothers meanscansothers stdscansothers countscansmostfrequentdevicewithinsegmentsothers countscansmostfrequentdeviceacrosssegmentsothers countscansmostfrequentdeviceacrossdatasetothers 2016-11-29-morning 1 1 1.000000 NaN 1 0.0 0.0 2016-11-30-morning 4 3 1.333333 0.57735 2 2.0 2.0 2017-05-07-morning 5 5 1.000000 0.00000 1 1.0 1.0","title":"DORYAB provider"},{"location":"features/phone-calls/","text":"Phone Calls \u00b6 Sensor parameters description for [PHONE_CALLS] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the calls data is stored RAPIDS Provider \u00b6 Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_calls_raw.csv - data/raw/ { pid } /phone_calls_with_datetime.csv - data/interim/ { pid } /phone_calls_features/phone_calls_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_calls.csv Parameters description for [PHONE_CALLS][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_CALLS features from the RAPIDS provider [CALL_TYPES] The particular call_type that will be analyzed. The options for this parameter are incoming, outgoing or missed. [FEATURES] Features to be computed for outgoing , incoming , and missed calls. Note that the same features are available for both incoming and outgoing calls, while missed calls has its own set of features. See the tables below. Features description for [PHONE_CALLS][PROVIDERS][RAPIDS] incoming and outgoing calls: Feature Units Description count calls Number of calls of a particular call_type occurred during a particular time_segment . distinctcontacts contacts Number of distinct contacts that are associated with a particular call_type for a particular time_segment meanduration seconds The mean duration of all calls of a particular call_type during a particular time_segment . sumduration seconds The sum of the duration of all calls of a particular call_type during a particular time_segment . minduration seconds The duration of the shortest call of a particular call_type during a particular time_segment . maxduration seconds The duration of the longest call of a particular call_type during a particular time_segment . stdduration seconds The standard deviation of the duration of all the calls of a particular call_type during a particular time_segment . modeduration seconds The mode of the duration of all the calls of a particular call_type during a particular time_segment . entropyduration nats The estimate of the Shannon entropy for the the duration of all the calls of a particular call_type during a particular time_segment . timefirstcall minutes The time in minutes between 12:00am (midnight) and the first call of call_type . timelastcall minutes The time in minutes between 12:00am (midnight) and the last call of call_type . countmostfrequentcontact calls The number of calls of a particular call_type during a particular time_segment of the most frequent contact throughout the monitored period. Features description for [PHONE_CALLS][PROVIDERS][RAPIDS] missed calls: Feature Units Description count calls Number of missed calls that occurred during a particular time_segment . distinctcontacts contacts Number of distinct contacts that are associated with missed calls for a particular time_segment timefirstcall minutes The time in hours from 12:00am (Midnight) that the first missed call occurred. timelastcall minutes The time in hours from 12:00am (Midnight) that the last missed call occurred. countmostfrequentcontact calls The number of missed calls during a particular time_segment of the most frequent contact throughout the monitored period. Assumptions/Observations Traces for iOS calls are unique even for the same contact calling a participant more than once which renders countmostfrequentcontact meaningless and distinctcontacts equal to the total number of traces. [CALL_TYPES] and [FEATURES] keys in config.yaml need to match. For example, [CALL_TYPES] outgoing matches the [FEATURES] key outgoing iOS calls data is transformed to match Android calls data format. See our algorithm","title":"Phone Calls"},{"location":"features/phone-calls/#phone-calls","text":"Sensor parameters description for [PHONE_CALLS] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the calls data is stored","title":"Phone Calls"},{"location":"features/phone-calls/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_calls_raw.csv - data/raw/ { pid } /phone_calls_with_datetime.csv - data/interim/ { pid } /phone_calls_features/phone_calls_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_calls.csv Parameters description for [PHONE_CALLS][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_CALLS features from the RAPIDS provider [CALL_TYPES] The particular call_type that will be analyzed. The options for this parameter are incoming, outgoing or missed. [FEATURES] Features to be computed for outgoing , incoming , and missed calls. Note that the same features are available for both incoming and outgoing calls, while missed calls has its own set of features. See the tables below. Features description for [PHONE_CALLS][PROVIDERS][RAPIDS] incoming and outgoing calls: Feature Units Description count calls Number of calls of a particular call_type occurred during a particular time_segment . distinctcontacts contacts Number of distinct contacts that are associated with a particular call_type for a particular time_segment meanduration seconds The mean duration of all calls of a particular call_type during a particular time_segment . sumduration seconds The sum of the duration of all calls of a particular call_type during a particular time_segment . minduration seconds The duration of the shortest call of a particular call_type during a particular time_segment . maxduration seconds The duration of the longest call of a particular call_type during a particular time_segment . stdduration seconds The standard deviation of the duration of all the calls of a particular call_type during a particular time_segment . modeduration seconds The mode of the duration of all the calls of a particular call_type during a particular time_segment . entropyduration nats The estimate of the Shannon entropy for the the duration of all the calls of a particular call_type during a particular time_segment . timefirstcall minutes The time in minutes between 12:00am (midnight) and the first call of call_type . timelastcall minutes The time in minutes between 12:00am (midnight) and the last call of call_type . countmostfrequentcontact calls The number of calls of a particular call_type during a particular time_segment of the most frequent contact throughout the monitored period. Features description for [PHONE_CALLS][PROVIDERS][RAPIDS] missed calls: Feature Units Description count calls Number of missed calls that occurred during a particular time_segment . distinctcontacts contacts Number of distinct contacts that are associated with missed calls for a particular time_segment timefirstcall minutes The time in hours from 12:00am (Midnight) that the first missed call occurred. timelastcall minutes The time in hours from 12:00am (Midnight) that the last missed call occurred. countmostfrequentcontact calls The number of missed calls during a particular time_segment of the most frequent contact throughout the monitored period. Assumptions/Observations Traces for iOS calls are unique even for the same contact calling a participant more than once which renders countmostfrequentcontact meaningless and distinctcontacts equal to the total number of traces. [CALL_TYPES] and [FEATURES] keys in config.yaml need to match. For example, [CALL_TYPES] outgoing matches the [FEATURES] key outgoing iOS calls data is transformed to match Android calls data format. See our algorithm","title":"RAPIDS Provider"},{"location":"features/phone-conversation/","text":"Phone Conversation \u00b6 Sensor parameters description for [PHONE_CONVERSATION] : Key Description [CONTAINER][ANDROID] Data stream container (database table, CSV file, etc.) where the conversation data from Android devices is stored (the AWARE client saves this data on different tables for Android and iOS) [CONTAINER][IOS] Data stream container (database table, CSV file, etc.) where the conversation data from iOS devices is stored (the AWARE client saves this data on different tables for Android and iOS) RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_conversation_raw.csv - data/raw/ { pid } /phone_conversation_with_datetime.csv - data/interim/ { pid } /phone_conversation_features/phone_conversation_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_conversation.csv Parameters description for [PHONE_CONVERSATION][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_CONVERSATION features from the RAPIDS provider [FEATURES] Features to be computed, see table below [RECORDING_MINUTES] Minutes the plugin was recording audio (default 1 min) [PAUSED_MINUTES] Minutes the plugin was NOT recording audio (default 3 min) Features description for [PHONE_CONVERSATION][PROVIDERS][RAPIDS] : Feature Units Description minutessilence minutes Minutes labeled as silence minutesnoise minutes Minutes labeled as noise minutesvoice minutes Minutes labeled as voice minutesunknown minutes Minutes labeled as unknown sumconversationduration minutes Total duration of all conversations maxconversationduration minutes Longest duration of all conversations minconversationduration minutes Shortest duration of all conversations avgconversationduration minutes Average duration of all conversations sdconversationduration minutes Standard Deviation of the duration of all conversations timefirstconversation minutes Minutes since midnight when the first conversation for a time segment was detected timelastconversation minutes Minutes since midnight when the last conversation for a time segment was detected noisesumenergy L2-norm Sum of all energy values when inference is noise noiseavgenergy L2-norm Average of all energy values when inference is noise noisesdenergy L2-norm Standard Deviation of all energy values when inference is noise noiseminenergy L2-norm Minimum of all energy values when inference is noise noisemaxenergy L2-norm Maximum of all energy values when inference is noise voicesumenergy L2-norm Sum of all energy values when inference is voice voiceavgenergy L2-norm Average of all energy values when inference is voice voicesdenergy L2-norm Standard Deviation of all energy values when inference is voice voiceminenergy L2-norm Minimum of all energy values when inference is voice voicemaxenergy L2-norm Maximum of all energy values when inference is voice silencesensedfraction - Ratio between minutessilence and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) noisesensedfraction - Ratio between minutesnoise and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) voicesensedfraction - Ratio between minutesvoice and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) unknownsensedfraction - Ratio between minutesunknown and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) silenceexpectedfraction - Ration between minutessilence and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) noiseexpectedfraction - Ration between minutesnoise and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) voiceexpectedfraction - Ration between minutesvoice and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) unknownexpectedfraction - Ration between minutesunknown and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) Assumptions/Observations The timestamp of conversation rows in iOS is in seconds so we convert it to milliseconds to match Android\u2019s format","title":"Phone Conversation"},{"location":"features/phone-conversation/#phone-conversation","text":"Sensor parameters description for [PHONE_CONVERSATION] : Key Description [CONTAINER][ANDROID] Data stream container (database table, CSV file, etc.) where the conversation data from Android devices is stored (the AWARE client saves this data on different tables for Android and iOS) [CONTAINER][IOS] Data stream container (database table, CSV file, etc.) where the conversation data from iOS devices is stored (the AWARE client saves this data on different tables for Android and iOS)","title":"Phone Conversation"},{"location":"features/phone-conversation/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_conversation_raw.csv - data/raw/ { pid } /phone_conversation_with_datetime.csv - data/interim/ { pid } /phone_conversation_features/phone_conversation_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_conversation.csv Parameters description for [PHONE_CONVERSATION][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_CONVERSATION features from the RAPIDS provider [FEATURES] Features to be computed, see table below [RECORDING_MINUTES] Minutes the plugin was recording audio (default 1 min) [PAUSED_MINUTES] Minutes the plugin was NOT recording audio (default 3 min) Features description for [PHONE_CONVERSATION][PROVIDERS][RAPIDS] : Feature Units Description minutessilence minutes Minutes labeled as silence minutesnoise minutes Minutes labeled as noise minutesvoice minutes Minutes labeled as voice minutesunknown minutes Minutes labeled as unknown sumconversationduration minutes Total duration of all conversations maxconversationduration minutes Longest duration of all conversations minconversationduration minutes Shortest duration of all conversations avgconversationduration minutes Average duration of all conversations sdconversationduration minutes Standard Deviation of the duration of all conversations timefirstconversation minutes Minutes since midnight when the first conversation for a time segment was detected timelastconversation minutes Minutes since midnight when the last conversation for a time segment was detected noisesumenergy L2-norm Sum of all energy values when inference is noise noiseavgenergy L2-norm Average of all energy values when inference is noise noisesdenergy L2-norm Standard Deviation of all energy values when inference is noise noiseminenergy L2-norm Minimum of all energy values when inference is noise noisemaxenergy L2-norm Maximum of all energy values when inference is noise voicesumenergy L2-norm Sum of all energy values when inference is voice voiceavgenergy L2-norm Average of all energy values when inference is voice voicesdenergy L2-norm Standard Deviation of all energy values when inference is voice voiceminenergy L2-norm Minimum of all energy values when inference is voice voicemaxenergy L2-norm Maximum of all energy values when inference is voice silencesensedfraction - Ratio between minutessilence and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) noisesensedfraction - Ratio between minutesnoise and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) voicesensedfraction - Ratio between minutesvoice and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) unknownsensedfraction - Ratio between minutesunknown and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) silenceexpectedfraction - Ration between minutessilence and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) noiseexpectedfraction - Ration between minutesnoise and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) voiceexpectedfraction - Ration between minutesvoice and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) unknownexpectedfraction - Ration between minutesunknown and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) Assumptions/Observations The timestamp of conversation rows in iOS is in seconds so we convert it to milliseconds to match Android\u2019s format","title":"RAPIDS provider"},{"location":"features/phone-data-yield/","text":"Phone Data Yield \u00b6 This is a combinatorial sensor which means that we use the data from multiple sensors to extract data yield features. Data yield features can be used to remove rows ( time segments ) that do not contain enough data. You should decide what is your \u201cenough\u201d threshold depending on the type of sensors you collected (frequency vs event based, e.g. acceleroemter vs calls), the length of your study, and the rates of missing data that your analysis could handle. Why is data yield important? Imagine that you want to extract PHONE_CALL features on daily segments ( 00:00 to 23:59 ). Let\u2019s say that on day 1 the phone logged 10 calls and 23 hours of data from other sensors and on day 2 the phone logged 10 calls and only 2 hours of data from other sensors. It\u2019s more likely that other calls were placed on the 22 hours of data that you didn\u2019t log on day 2 than on the 1 hour of data you didn\u2019t log on day 1, and so including day 2 in your analysis could bias your results. Sensor parameters description for [PHONE_DATA_YIELD] : Key Description [SENSORS] One or more phone sensor config keys (e.g. PHONE_MESSAGE ). The more keys you include the more accurately RAPIDS can approximate the time an smartphone was sensing data. The supported phone sensors you can include in this list are outlined below ( do NOT include Fitbit sensors, ONLY include phone sensors ). Supported phone sensors for [PHONE_DATA_YIELD][SENSORS] PHONE_ACCELEROMETER PHONE_ACTIVITY_RECOGNITION PHONE_APPLICATIONS_CRASHES PHONE_APPLICATIONS_FOREGROUND PHONE_APPLICATIONS_NOTIFICATIONS PHONE_BATTERY PHONE_BLUETOOTH PHONE_CALLS PHONE_CONVERSATION PHONE_KEYBOARD PHONE_LIGHT PHONE_LOCATIONS PHONE_LOG PHONE_MESSAGES PHONE_SCREEN PHONE_WIFI_CONNECTED PHONE_WIFI_VISIBLE RAPIDS provider \u00b6 Before explaining the data yield features, let\u2019s define the following relevant concepts: A valid minute is any 60 second window when any phone sensor logged at least 1 row of data A valid hour is any 60 minute window with at least X valid minutes. The X or threshold is given by [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] The timestamps of all sensors are concatenated and then grouped per time segment. Minute and hour windows are created from the beginning of each time segment instance and these windows are marked as valid based on the definitions above. The duration of each time segment is taken into account to compute the features described below. Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } / { sensor } _raw.csv # one for every [PHONE_DATA_YIELD][SENSORS] - data/interim/ { pid } /phone_yielded_timestamps.csv - data/interim/ { pid } /phone_yielded_timestamps_with_datetime.csv - data/interim/ { pid } /phone_data_yield_features/phone_data_yield_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_data_yield.csv Parameters description for [PHONE_DATA_YIELD][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_DATA_YIELD features from the RAPIDS provider [FEATURES] Features to be computed, see table below [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] The proportion [0.0 ,1.0] of valid minutes in a 60-minute window necessary to flag that window as valid. Features description for [PHONE_DATA_YIELD][PROVIDERS][RAPIDS] : Feature Units Description ratiovalidyieldedminutes - The ratio between the number of valid minutes and the duration in minutes of a time segment. ratiovalidyieldedhours - The ratio between the number of valid hours and the duration in hours of a time segment. If the time segment is shorter than 1 hour this feature will always be 1. Assumptions/Observations We recommend using ratiovalidyieldedminutes on time segments that are shorter than two or three hours and ratiovalidyieldedhours for longer segments. This is because relying on yielded minutes only can be misleading when a big chunk of those missing minutes are clustered together. For example, let\u2019s assume we are working with a 24-hour time segment that is missing 12 hours of data. Two extreme cases can occur: the 12 missing hours are from the beginning of the segment or 30 minutes could be missing from every hour (24 * 30 minutes = 12 hours). ratiovalidyieldedminutes would be 0.5 for both a and b (hinting the missing circumstances are similar). However, ratiovalidyieldedhours would be 0.5 for a and 1.0 for b if [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] is between [0.0 and 0.49] (hinting that the missing circumstances might be more favorable for b . In other words, sensed data for b is more evenly spread compared to a .","title":"Phone Data Yield"},{"location":"features/phone-data-yield/#phone-data-yield","text":"This is a combinatorial sensor which means that we use the data from multiple sensors to extract data yield features. Data yield features can be used to remove rows ( time segments ) that do not contain enough data. You should decide what is your \u201cenough\u201d threshold depending on the type of sensors you collected (frequency vs event based, e.g. acceleroemter vs calls), the length of your study, and the rates of missing data that your analysis could handle. Why is data yield important? Imagine that you want to extract PHONE_CALL features on daily segments ( 00:00 to 23:59 ). Let\u2019s say that on day 1 the phone logged 10 calls and 23 hours of data from other sensors and on day 2 the phone logged 10 calls and only 2 hours of data from other sensors. It\u2019s more likely that other calls were placed on the 22 hours of data that you didn\u2019t log on day 2 than on the 1 hour of data you didn\u2019t log on day 1, and so including day 2 in your analysis could bias your results. Sensor parameters description for [PHONE_DATA_YIELD] : Key Description [SENSORS] One or more phone sensor config keys (e.g. PHONE_MESSAGE ). The more keys you include the more accurately RAPIDS can approximate the time an smartphone was sensing data. The supported phone sensors you can include in this list are outlined below ( do NOT include Fitbit sensors, ONLY include phone sensors ). Supported phone sensors for [PHONE_DATA_YIELD][SENSORS] PHONE_ACCELEROMETER PHONE_ACTIVITY_RECOGNITION PHONE_APPLICATIONS_CRASHES PHONE_APPLICATIONS_FOREGROUND PHONE_APPLICATIONS_NOTIFICATIONS PHONE_BATTERY PHONE_BLUETOOTH PHONE_CALLS PHONE_CONVERSATION PHONE_KEYBOARD PHONE_LIGHT PHONE_LOCATIONS PHONE_LOG PHONE_MESSAGES PHONE_SCREEN PHONE_WIFI_CONNECTED PHONE_WIFI_VISIBLE","title":"Phone Data Yield"},{"location":"features/phone-data-yield/#rapids-provider","text":"Before explaining the data yield features, let\u2019s define the following relevant concepts: A valid minute is any 60 second window when any phone sensor logged at least 1 row of data A valid hour is any 60 minute window with at least X valid minutes. The X or threshold is given by [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] The timestamps of all sensors are concatenated and then grouped per time segment. Minute and hour windows are created from the beginning of each time segment instance and these windows are marked as valid based on the definitions above. The duration of each time segment is taken into account to compute the features described below. Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } / { sensor } _raw.csv # one for every [PHONE_DATA_YIELD][SENSORS] - data/interim/ { pid } /phone_yielded_timestamps.csv - data/interim/ { pid } /phone_yielded_timestamps_with_datetime.csv - data/interim/ { pid } /phone_data_yield_features/phone_data_yield_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_data_yield.csv Parameters description for [PHONE_DATA_YIELD][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_DATA_YIELD features from the RAPIDS provider [FEATURES] Features to be computed, see table below [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] The proportion [0.0 ,1.0] of valid minutes in a 60-minute window necessary to flag that window as valid. Features description for [PHONE_DATA_YIELD][PROVIDERS][RAPIDS] : Feature Units Description ratiovalidyieldedminutes - The ratio between the number of valid minutes and the duration in minutes of a time segment. ratiovalidyieldedhours - The ratio between the number of valid hours and the duration in hours of a time segment. If the time segment is shorter than 1 hour this feature will always be 1. Assumptions/Observations We recommend using ratiovalidyieldedminutes on time segments that are shorter than two or three hours and ratiovalidyieldedhours for longer segments. This is because relying on yielded minutes only can be misleading when a big chunk of those missing minutes are clustered together. For example, let\u2019s assume we are working with a 24-hour time segment that is missing 12 hours of data. Two extreme cases can occur: the 12 missing hours are from the beginning of the segment or 30 minutes could be missing from every hour (24 * 30 minutes = 12 hours). ratiovalidyieldedminutes would be 0.5 for both a and b (hinting the missing circumstances are similar). However, ratiovalidyieldedhours would be 0.5 for a and 1.0 for b if [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] is between [0.0 and 0.49] (hinting that the missing circumstances might be more favorable for b . In other words, sensed data for b is more evenly spread compared to a .","title":"RAPIDS provider"},{"location":"features/phone-keyboard/","text":"Phone Keyboard \u00b6 Sensor parameters description for [PHONE_KEYBOARD] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the keyboard data is stored Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_KEYBOARD ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Keyboard"},{"location":"features/phone-keyboard/#phone-keyboard","text":"Sensor parameters description for [PHONE_KEYBOARD] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the keyboard data is stored Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_KEYBOARD ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Keyboard"},{"location":"features/phone-light/","text":"Phone Light \u00b6 Sensor parameters description for [PHONE_LIGHT] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the light data is stored RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_light_raw.csv - data/raw/ { pid } /phone_light_with_datetime.csv - data/interim/ { pid } /phone_light_features/phone_light_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_light.csv Parameters description for [PHONE_LIGHT][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_LIGHT features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_LIGHT][PROVIDERS][RAPIDS] : Feature Units Description count rows Number light sensor rows recorded. maxlux lux The maximum ambient luminance. minlux lux The minimum ambient luminance. avglux lux The average ambient luminance. medianlux lux The median ambient luminance. stdlux lux The standard deviation of ambient luminance. Assumptions/Observations NA","title":"Phone Light"},{"location":"features/phone-light/#phone-light","text":"Sensor parameters description for [PHONE_LIGHT] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the light data is stored","title":"Phone Light"},{"location":"features/phone-light/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_light_raw.csv - data/raw/ { pid } /phone_light_with_datetime.csv - data/interim/ { pid } /phone_light_features/phone_light_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_light.csv Parameters description for [PHONE_LIGHT][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_LIGHT features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_LIGHT][PROVIDERS][RAPIDS] : Feature Units Description count rows Number light sensor rows recorded. maxlux lux The maximum ambient luminance. minlux lux The minimum ambient luminance. avglux lux The average ambient luminance. medianlux lux The median ambient luminance. stdlux lux The standard deviation of ambient luminance. Assumptions/Observations NA","title":"RAPIDS provider"},{"location":"features/phone-locations/","text":"Phone Locations \u00b6 Sensor parameters description for [PHONE_LOCATIONS] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the location data is stored [LOCATIONS_TO_USE] Type of location data to use, one of ALL , GPS , ALL_RESAMPLED or FUSED_RESAMPLED . This filter is based on the provider column of the locations table, ALL includes every row, GPS only includes rows where the provider is gps, ALL_RESAMPLED includes all rows after being resampled, and FUSED_RESAMPLED only includes rows where the provider is fused after being resampled. [FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD] if ALL_RESAMPLED or FUSED_RESAMPLED is used, the original fused data has to be resampled, a location row is resampled to the next valid timestamp (see the Assumptions/Observations below) only if the time difference between them is less or equal than this threshold (in minutes). [FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION] if ALL_RESAMPLED or FUSED_RESAMPLED is used, the original fused data has to be resampled, a location row is resampled at most for this long (in minutes) Assumptions/Observations Types of location data to use Android and iOS clients can collect location coordinates through the phone\u2019s GPS, the network cellular towers around the phone, or Google\u2019s fused location API. If you want to use only the GPS provider, set [LOCATIONS_TO_USE] to GPS If you want to use all providers, set [LOCATIONS_TO_USE] to ALL If you collected location data from different providers, including the fused API, use ALL_RESAMPLED If your mobile client was configured to use fused location only or want to focus only on this provider, set [LOCATIONS_TO_USE] to RESAMPLE_FUSED . ALL_RESAMPLED and RESAMPLE_FUSED take the original location coordinates and replicate each pair forward in time as long as the phone was sensing data as indicated by the joined timestamps of [PHONE_DATA_YIELD][SENSORS] . This is done because Google\u2019s API only logs a new location coordinate pair when it is sufficiently different in time or space from the previous one and because GPS and network providers can log data at variable rates. There are two parameters associated with resampling fused location. FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD (in minutes, default 30) controls the maximum gap between any two coordinate pairs to replicate the last known pair. For example, participant A\u2019s phone did not collect data between 10.30 am and 10:50 am and between 11:05am and 11:40am, the last known coordinate pair is replicated during the first period but not the second. In other words, we assume that we cannot longer guarantee the participant stayed at the last known location if the phone did not sense data for more than 30 minutes. FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION (in minutes, default 720 or 12 hours) stops the last known fused location from being replicated longer than this threshold even if the phone was sensing data continuously. For example, participant A went home at 9 pm, and their phone was sensing data without gaps until 11 am the next morning, the last known location is replicated until 9 am. If you have suggestions to modify or improve this resampling, let us know. BARNETT provider \u00b6 These features are based on the original open-source implementation by Barnett et al and some features created by Canzian et al . Available time segments and platforms Available only for segments that start at 00:00:00 and end at 23:59:59 of the same or a different day (daily, weekly, weekend, etc.) Available for Android and iOS File Sequence - data/raw/ { pid } /phone_locations_raw.csv - data/interim/ { pid } /phone_locations_processed.csv - data/interim/ { pid } /phone_locations_processed_with_datetime.csv - data/interim/ { pid } /phone_locations_features/phone_locations_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_locations.csv Parameters description for [PHONE_LOCATIONS][PROVIDERS][BARNETT] : Key Description [COMPUTE] Set to True to extract PHONE_LOCATIONS features from the BARNETT provider [FEATURES] Features to be computed, see table below [ACCURACY_LIMIT] An integer in meters, any location rows with an accuracy higher than this is dropped. This number means there\u2019s a 68% probability the actual location is within this radius [IF_MULTIPLE_TIMEZONES] Currently, USE_MOST_COMMON is the only value supported. If the location data for a participant belongs to multiple time zones, we select the most common because Barnett\u2019s algorithm can only handle one time zone [MINUTES_DATA_USED] Set to True to include an extra column in the final location feature file containing the number of minutes used to compute the features on each time segment. Use this for quality control purposes; the more data minutes exist for a period, the more reliable its features should be. For fused location, a single minute can contain more than one coordinate pair if the participant is moving fast enough. Features description for [PHONE_LOCATIONS][PROVIDERS][BARNETT] adapted from Beiwe Summary Statistics : Feature Units Description hometime minutes Time at home. Time spent at home in minutes. Home is the most visited significant location between 8 pm and 8 am, including any pauses within a 200-meter radius. disttravelled meters Total distance traveled over a day (flights). rog meters The Radius of Gyration (rog) is a measure in meters of the area covered by a person over a day. A centroid is calculated for all the places (pauses) visited during a day, and a weighted distance between all the places and that centroid is computed. The weights are proportional to the time spent in each place. maxdiam meters The maximum diameter is the largest distance between any two pauses. maxhomedist meters The maximum distance from home in meters. siglocsvisited locations The number of significant locations visited during the day. Significant locations are computed using k-means clustering over pauses found in the whole monitoring period. The number of clusters is found iterating k from 1 to 200 stopping until the centroids of two significant locations are within 400 meters of one another. avgflightlen meters Mean length of all flights. stdflightlen meters Standard deviation of the length of all flights. avgflightdur seconds Mean duration of all flights. stdflightdur seconds The standard deviation of the duration of all flights. probpause - The fraction of a day spent in a pause (as opposed to a flight) siglocentropy nats Shannon\u2019s entropy measurement is based on the proportion of time spent at each significant location visited during a day. circdnrtn - A continuous metric quantifying a person\u2019s circadian routine that can take any value between 0 and 1, where 0 represents a daily routine completely different from any other sensed days and 1 a routine the same as every other sensed day. wkenddayrtn - Same as circdnrtn but computed separately for weekends and weekdays. Assumptions/Observations Multi day segment features Barnett\u2019s features are only available on time segments that span entire days (00:00:00 to 23:59:59). Such segments can be one-day long (daily) or multi-day (weekly, for example). Multi-day segment features are computed based on daily features summarized the following way: sum for hometime , disttravelled , siglocsvisited , and minutes_data_used max for maxdiam , and maxhomedist mean for rog , avgflightlen , stdflightlen , avgflightdur , stdflightdur , probpause , siglocentropy , circdnrtn , wkenddayrtn , and minsmissing Computation speed The process to extract these features can be slow compared to other sensors and providers due to the required simulation. How are these features computed? These features are based on a Pause-Flight model. A pause is defined as a mobility trace (location pings) within a certain duration and distance (by default, 300 seconds and 60 meters). A flight is any mobility trace between two pauses. Data is resampled and imputed before the features are computed. See Barnett et al for more information. In RAPIDS, we only expose one parameter for these features (accuracy limit). You can change other parameters in src/features/phone_locations/barnett/library/MobilityFeatures.R . Significant Locations Significant locations are determined using K-means clustering on pauses longer than 10 minutes. The number of clusters (K) is increased until no two clusters are within 400 meters from each other. After this, pauses within a certain range of a cluster (200 meters by default) count as a visit to that significant location. This description was adapted from the Supplementary Materials of Barnett et al . The Circadian Calculation For a detailed description of how this is calculated, see Canzian et al . DORYAB provider \u00b6 These features are based on the original implementation by Doryab et al. . Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_locations_raw.csv - data/interim/ { pid } /phone_locations_processed.csv - data/interim/ { pid } /phone_locations_processed_with_datetime.csv - data/interim/ { pid } /phone_locations_processed_with_datetime_with_home.csv - data/interim/ { pid } /phone_locations_features/phone_locations_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_locations.csv Parameters description for [PHONE_LOCATIONS][PROVIDERS][DORYAB] : Key Description [COMPUTE] Set to True to extract PHONE_LOCATIONS features from the BARNETT provider [FEATURES] Features to be computed, see table below [ACCURACY_LIMIT] An integer in meters, any location rows with an accuracy higher than this will be dropped. This number means there\u2019s a 68% probability the true location is within this radius [DBSCAN_EPS] The maximum distance in meters between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function. [DBSCAN_MINSAMPLES] The number of samples (or total weight) in a neighborhood for a point to be considered as a core point of a cluster. This includes the point itself. [THRESHOLD_STATIC] It is the threshold value in km/hr which labels a row as Static or Moving. [MAXIMUM_ROW_GAP] The maximum gap (in seconds) allowed between any two consecutive rows for them to be considered part of the same displacement. If this threshold is too high, it can throw speed and distance calculations off for periods when the phone was not sensing. [MAXIMUM_ROW_DURATION] The time difference between any two consecutive rows A and B is considered as the time a participant spent in A . If this difference is bigger than MAXIMUM_ROW_GAP we substitute it with MAXIMUM_ROW_DURATION . [MINUTES_DATA_USED] Set to True to include an extra column in the final location feature file containing the number of minutes used to compute the features on each time segment. Use this for quality control purposes; the more data minutes exist for a period, the more reliable its features should be. For fused location, a single minute can contain more than one coordinate pair if the participant is moving fast enough. [SAMPLING_FREQUENCY] Expected time difference between any two location rows in minutes. If set to 0 , the sampling frequency will be inferred automatically as the median of all the differences between two consecutive row timestamps (recommended if you are using FUSED_RESAMPLED data). This parameter impacts all the time calculations. [CLUSTER_ON] Set this flag to PARTICIPANT_DATASET to create clusters based on the entire participant\u2019s dataset or to TIME_SEGMENT to create clusters based on all the instances of the corresponding time segment (e.g. all mornings). [CLUSTERING_ALGORITHM] The original Doryab et al. implementation uses DBSCAN , OPTICS is also available with similar (but not identical) clustering results and lower memory consumption. [RADIUS_FOR_HOME] All location coordinates within this distance (meters) from the home location coordinates are considered a homestay (see timeathome feature). Features description for [PHONE_LOCATIONS][PROVIDERS][DORYAB] : Feature Units Description locationvariance \\(meters^2\\) The sum of the variances of the latitude and longitude columns. loglocationvariance - Log of the sum of the variances of the latitude and longitude columns. totaldistance meters Total distance traveled in a time segment using the haversine formula. averagespeed km/hr Average speed in a time segment considering only the instances labeled as Moving. varspeed km/hr Speed variance in a time segment considering only the instances labeled as Moving. circadianmovement - Not suggested for use now; see Observations below. \u201cIt encodes the extent to which a person\u2019s location patterns follow a 24-hour circadian cycle.\" Doryab et al. . numberofsignificantplaces places Number of significant locations visited. It is calculated using the DBSCAN/OPTICS clustering algorithm which takes in EPS and MIN_SAMPLES as parameters to identify clusters. Each cluster is a significant place. numberlocationtransitions transitions Number of movements between any two clusters in a time segment. radiusgyration meters Quantifies the area covered by a participant timeattop1location minutes Time spent at the most significant location. timeattop2location minutes Time spent at the 2 nd most significant location. timeattop3location minutes Time spent at the 3 rd most significant location. movingtostaticratio - Ratio between stationary time and total location sensed time. A lat/long coordinate pair is labeled as stationary if its speed (distance/time) to the next coordinate pair is less than 1km/hr. A higher value represents a more stationary routine. These times are computed using timeInSeconds feature. outlierstimepercent - Ratio between the time spent in non-significant clusters divided by the time spent in all clusters (total location sensed time). A higher value represents more time spent in non-significant clusters. These times are computed using timeInSeconds feature. maxlengthstayatclusters minutes Maximum time spent in a cluster (significant location). minlengthstayatclusters minutes Minimum time spent in a cluster (significant location). meanlengthstayatclusters minutes Average time spent in a cluster (significant location). stdlengthstayatclusters minutes Standard deviation of time spent in a cluster (significant location). locationentropy nats Shannon Entropy computed over the row count of each cluster (significant location), it is higher the more rows belong to a cluster (i.e., the more time a participant spent at a significant location). normalizedlocationentropy nats Shannon Entropy computed over the row count of each cluster (significant location) divided by the number of clusters; it is higher the more rows belong to a cluster (i.e., the more time a participant spent at a significant location). timeathome minutes Time spent at home (see Observations below for a description on how we compute home). Assumptions/Observations Significant Locations Identified Significant locations are determined using DBSCAN clustering on locations that a patient visit over the course of the period of data collection. Circadian Movement Calculation Note Feb 3 2021. It seems the implementation of this feature is not correct; we suggest not to use this feature until a fix is in place. For a detailed description of how this should be calculated, see Saeb et al . Fine-Tuning Clustering Parameters Based on an experiment where we collected fused location data for 7 days with a mean accuracy of 86 & SD of 350.874635, we determined that EPS/MAX_EPS =100 produced closer clustering results to reality. Higher values (>100) missed out on some significant places, like a short grocery visit, while lower values (<100) picked up traffic lights and stop signs while driving as significant locations. We recommend you set EPS based on your location data\u2019s accuracy (the more accurate your data is, the lower you should be able to set EPS). Duration Calculation To calculate the time duration component for our features, we compute the difference between consecutive rows\u2019 timestamps to take into account sampling rate variability. If this time difference is larger than a threshold (300 seconds by default), we replace it with a maximum duration (60 seconds by default, i.e., we assume a participant spent at least 60 seconds in their last known location) Home location Home is calculated using all location data of a participant between 12 am and 6 am, then applying a clustering algorithm ( DB_SCAN or OPTICS ) and considering the center of the biggest cluster home for that participant.","title":"Phone Locations"},{"location":"features/phone-locations/#phone-locations","text":"Sensor parameters description for [PHONE_LOCATIONS] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the location data is stored [LOCATIONS_TO_USE] Type of location data to use, one of ALL , GPS , ALL_RESAMPLED or FUSED_RESAMPLED . This filter is based on the provider column of the locations table, ALL includes every row, GPS only includes rows where the provider is gps, ALL_RESAMPLED includes all rows after being resampled, and FUSED_RESAMPLED only includes rows where the provider is fused after being resampled. [FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD] if ALL_RESAMPLED or FUSED_RESAMPLED is used, the original fused data has to be resampled, a location row is resampled to the next valid timestamp (see the Assumptions/Observations below) only if the time difference between them is less or equal than this threshold (in minutes). [FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION] if ALL_RESAMPLED or FUSED_RESAMPLED is used, the original fused data has to be resampled, a location row is resampled at most for this long (in minutes) Assumptions/Observations Types of location data to use Android and iOS clients can collect location coordinates through the phone\u2019s GPS, the network cellular towers around the phone, or Google\u2019s fused location API. If you want to use only the GPS provider, set [LOCATIONS_TO_USE] to GPS If you want to use all providers, set [LOCATIONS_TO_USE] to ALL If you collected location data from different providers, including the fused API, use ALL_RESAMPLED If your mobile client was configured to use fused location only or want to focus only on this provider, set [LOCATIONS_TO_USE] to RESAMPLE_FUSED . ALL_RESAMPLED and RESAMPLE_FUSED take the original location coordinates and replicate each pair forward in time as long as the phone was sensing data as indicated by the joined timestamps of [PHONE_DATA_YIELD][SENSORS] . This is done because Google\u2019s API only logs a new location coordinate pair when it is sufficiently different in time or space from the previous one and because GPS and network providers can log data at variable rates. There are two parameters associated with resampling fused location. FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD (in minutes, default 30) controls the maximum gap between any two coordinate pairs to replicate the last known pair. For example, participant A\u2019s phone did not collect data between 10.30 am and 10:50 am and between 11:05am and 11:40am, the last known coordinate pair is replicated during the first period but not the second. In other words, we assume that we cannot longer guarantee the participant stayed at the last known location if the phone did not sense data for more than 30 minutes. FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION (in minutes, default 720 or 12 hours) stops the last known fused location from being replicated longer than this threshold even if the phone was sensing data continuously. For example, participant A went home at 9 pm, and their phone was sensing data without gaps until 11 am the next morning, the last known location is replicated until 9 am. If you have suggestions to modify or improve this resampling, let us know.","title":"Phone Locations"},{"location":"features/phone-locations/#barnett-provider","text":"These features are based on the original open-source implementation by Barnett et al and some features created by Canzian et al . Available time segments and platforms Available only for segments that start at 00:00:00 and end at 23:59:59 of the same or a different day (daily, weekly, weekend, etc.) Available for Android and iOS File Sequence - data/raw/ { pid } /phone_locations_raw.csv - data/interim/ { pid } /phone_locations_processed.csv - data/interim/ { pid } /phone_locations_processed_with_datetime.csv - data/interim/ { pid } /phone_locations_features/phone_locations_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_locations.csv Parameters description for [PHONE_LOCATIONS][PROVIDERS][BARNETT] : Key Description [COMPUTE] Set to True to extract PHONE_LOCATIONS features from the BARNETT provider [FEATURES] Features to be computed, see table below [ACCURACY_LIMIT] An integer in meters, any location rows with an accuracy higher than this is dropped. This number means there\u2019s a 68% probability the actual location is within this radius [IF_MULTIPLE_TIMEZONES] Currently, USE_MOST_COMMON is the only value supported. If the location data for a participant belongs to multiple time zones, we select the most common because Barnett\u2019s algorithm can only handle one time zone [MINUTES_DATA_USED] Set to True to include an extra column in the final location feature file containing the number of minutes used to compute the features on each time segment. Use this for quality control purposes; the more data minutes exist for a period, the more reliable its features should be. For fused location, a single minute can contain more than one coordinate pair if the participant is moving fast enough. Features description for [PHONE_LOCATIONS][PROVIDERS][BARNETT] adapted from Beiwe Summary Statistics : Feature Units Description hometime minutes Time at home. Time spent at home in minutes. Home is the most visited significant location between 8 pm and 8 am, including any pauses within a 200-meter radius. disttravelled meters Total distance traveled over a day (flights). rog meters The Radius of Gyration (rog) is a measure in meters of the area covered by a person over a day. A centroid is calculated for all the places (pauses) visited during a day, and a weighted distance between all the places and that centroid is computed. The weights are proportional to the time spent in each place. maxdiam meters The maximum diameter is the largest distance between any two pauses. maxhomedist meters The maximum distance from home in meters. siglocsvisited locations The number of significant locations visited during the day. Significant locations are computed using k-means clustering over pauses found in the whole monitoring period. The number of clusters is found iterating k from 1 to 200 stopping until the centroids of two significant locations are within 400 meters of one another. avgflightlen meters Mean length of all flights. stdflightlen meters Standard deviation of the length of all flights. avgflightdur seconds Mean duration of all flights. stdflightdur seconds The standard deviation of the duration of all flights. probpause - The fraction of a day spent in a pause (as opposed to a flight) siglocentropy nats Shannon\u2019s entropy measurement is based on the proportion of time spent at each significant location visited during a day. circdnrtn - A continuous metric quantifying a person\u2019s circadian routine that can take any value between 0 and 1, where 0 represents a daily routine completely different from any other sensed days and 1 a routine the same as every other sensed day. wkenddayrtn - Same as circdnrtn but computed separately for weekends and weekdays. Assumptions/Observations Multi day segment features Barnett\u2019s features are only available on time segments that span entire days (00:00:00 to 23:59:59). Such segments can be one-day long (daily) or multi-day (weekly, for example). Multi-day segment features are computed based on daily features summarized the following way: sum for hometime , disttravelled , siglocsvisited , and minutes_data_used max for maxdiam , and maxhomedist mean for rog , avgflightlen , stdflightlen , avgflightdur , stdflightdur , probpause , siglocentropy , circdnrtn , wkenddayrtn , and minsmissing Computation speed The process to extract these features can be slow compared to other sensors and providers due to the required simulation. How are these features computed? These features are based on a Pause-Flight model. A pause is defined as a mobility trace (location pings) within a certain duration and distance (by default, 300 seconds and 60 meters). A flight is any mobility trace between two pauses. Data is resampled and imputed before the features are computed. See Barnett et al for more information. In RAPIDS, we only expose one parameter for these features (accuracy limit). You can change other parameters in src/features/phone_locations/barnett/library/MobilityFeatures.R . Significant Locations Significant locations are determined using K-means clustering on pauses longer than 10 minutes. The number of clusters (K) is increased until no two clusters are within 400 meters from each other. After this, pauses within a certain range of a cluster (200 meters by default) count as a visit to that significant location. This description was adapted from the Supplementary Materials of Barnett et al . The Circadian Calculation For a detailed description of how this is calculated, see Canzian et al .","title":"BARNETT provider"},{"location":"features/phone-locations/#doryab-provider","text":"These features are based on the original implementation by Doryab et al. . Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_locations_raw.csv - data/interim/ { pid } /phone_locations_processed.csv - data/interim/ { pid } /phone_locations_processed_with_datetime.csv - data/interim/ { pid } /phone_locations_processed_with_datetime_with_home.csv - data/interim/ { pid } /phone_locations_features/phone_locations_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_locations.csv Parameters description for [PHONE_LOCATIONS][PROVIDERS][DORYAB] : Key Description [COMPUTE] Set to True to extract PHONE_LOCATIONS features from the BARNETT provider [FEATURES] Features to be computed, see table below [ACCURACY_LIMIT] An integer in meters, any location rows with an accuracy higher than this will be dropped. This number means there\u2019s a 68% probability the true location is within this radius [DBSCAN_EPS] The maximum distance in meters between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function. [DBSCAN_MINSAMPLES] The number of samples (or total weight) in a neighborhood for a point to be considered as a core point of a cluster. This includes the point itself. [THRESHOLD_STATIC] It is the threshold value in km/hr which labels a row as Static or Moving. [MAXIMUM_ROW_GAP] The maximum gap (in seconds) allowed between any two consecutive rows for them to be considered part of the same displacement. If this threshold is too high, it can throw speed and distance calculations off for periods when the phone was not sensing. [MAXIMUM_ROW_DURATION] The time difference between any two consecutive rows A and B is considered as the time a participant spent in A . If this difference is bigger than MAXIMUM_ROW_GAP we substitute it with MAXIMUM_ROW_DURATION . [MINUTES_DATA_USED] Set to True to include an extra column in the final location feature file containing the number of minutes used to compute the features on each time segment. Use this for quality control purposes; the more data minutes exist for a period, the more reliable its features should be. For fused location, a single minute can contain more than one coordinate pair if the participant is moving fast enough. [SAMPLING_FREQUENCY] Expected time difference between any two location rows in minutes. If set to 0 , the sampling frequency will be inferred automatically as the median of all the differences between two consecutive row timestamps (recommended if you are using FUSED_RESAMPLED data). This parameter impacts all the time calculations. [CLUSTER_ON] Set this flag to PARTICIPANT_DATASET to create clusters based on the entire participant\u2019s dataset or to TIME_SEGMENT to create clusters based on all the instances of the corresponding time segment (e.g. all mornings). [CLUSTERING_ALGORITHM] The original Doryab et al. implementation uses DBSCAN , OPTICS is also available with similar (but not identical) clustering results and lower memory consumption. [RADIUS_FOR_HOME] All location coordinates within this distance (meters) from the home location coordinates are considered a homestay (see timeathome feature). Features description for [PHONE_LOCATIONS][PROVIDERS][DORYAB] : Feature Units Description locationvariance \\(meters^2\\) The sum of the variances of the latitude and longitude columns. loglocationvariance - Log of the sum of the variances of the latitude and longitude columns. totaldistance meters Total distance traveled in a time segment using the haversine formula. averagespeed km/hr Average speed in a time segment considering only the instances labeled as Moving. varspeed km/hr Speed variance in a time segment considering only the instances labeled as Moving. circadianmovement - Not suggested for use now; see Observations below. \u201cIt encodes the extent to which a person\u2019s location patterns follow a 24-hour circadian cycle.\" Doryab et al. . numberofsignificantplaces places Number of significant locations visited. It is calculated using the DBSCAN/OPTICS clustering algorithm which takes in EPS and MIN_SAMPLES as parameters to identify clusters. Each cluster is a significant place. numberlocationtransitions transitions Number of movements between any two clusters in a time segment. radiusgyration meters Quantifies the area covered by a participant timeattop1location minutes Time spent at the most significant location. timeattop2location minutes Time spent at the 2 nd most significant location. timeattop3location minutes Time spent at the 3 rd most significant location. movingtostaticratio - Ratio between stationary time and total location sensed time. A lat/long coordinate pair is labeled as stationary if its speed (distance/time) to the next coordinate pair is less than 1km/hr. A higher value represents a more stationary routine. These times are computed using timeInSeconds feature. outlierstimepercent - Ratio between the time spent in non-significant clusters divided by the time spent in all clusters (total location sensed time). A higher value represents more time spent in non-significant clusters. These times are computed using timeInSeconds feature. maxlengthstayatclusters minutes Maximum time spent in a cluster (significant location). minlengthstayatclusters minutes Minimum time spent in a cluster (significant location). meanlengthstayatclusters minutes Average time spent in a cluster (significant location). stdlengthstayatclusters minutes Standard deviation of time spent in a cluster (significant location). locationentropy nats Shannon Entropy computed over the row count of each cluster (significant location), it is higher the more rows belong to a cluster (i.e., the more time a participant spent at a significant location). normalizedlocationentropy nats Shannon Entropy computed over the row count of each cluster (significant location) divided by the number of clusters; it is higher the more rows belong to a cluster (i.e., the more time a participant spent at a significant location). timeathome minutes Time spent at home (see Observations below for a description on how we compute home). Assumptions/Observations Significant Locations Identified Significant locations are determined using DBSCAN clustering on locations that a patient visit over the course of the period of data collection. Circadian Movement Calculation Note Feb 3 2021. It seems the implementation of this feature is not correct; we suggest not to use this feature until a fix is in place. For a detailed description of how this should be calculated, see Saeb et al . Fine-Tuning Clustering Parameters Based on an experiment where we collected fused location data for 7 days with a mean accuracy of 86 & SD of 350.874635, we determined that EPS/MAX_EPS =100 produced closer clustering results to reality. Higher values (>100) missed out on some significant places, like a short grocery visit, while lower values (<100) picked up traffic lights and stop signs while driving as significant locations. We recommend you set EPS based on your location data\u2019s accuracy (the more accurate your data is, the lower you should be able to set EPS). Duration Calculation To calculate the time duration component for our features, we compute the difference between consecutive rows\u2019 timestamps to take into account sampling rate variability. If this time difference is larger than a threshold (300 seconds by default), we replace it with a maximum duration (60 seconds by default, i.e., we assume a participant spent at least 60 seconds in their last known location) Home location Home is calculated using all location data of a participant between 12 am and 6 am, then applying a clustering algorithm ( DB_SCAN or OPTICS ) and considering the center of the biggest cluster home for that participant.","title":"DORYAB provider"},{"location":"features/phone-log/","text":"Phone Log \u00b6 Sensor parameters description for [PHONE_LOG] : Key Description [CONTAINER][ANDROID] Data stream container (database table, CSV file, etc.) where a data log is stored for Android devices [CONTAINER][IOS] Data stream container (database table, CSV file, etc.) where a data log is stored for iOS devices Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_LOG ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Log"},{"location":"features/phone-log/#phone-log","text":"Sensor parameters description for [PHONE_LOG] : Key Description [CONTAINER][ANDROID] Data stream container (database table, CSV file, etc.) where a data log is stored for Android devices [CONTAINER][IOS] Data stream container (database table, CSV file, etc.) where a data log is stored for iOS devices Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_LOG ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Log"},{"location":"features/phone-messages/","text":"Phone Messages \u00b6 Sensor parameters description for [PHONE_MESSAGES] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the messages data is stored RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_messages_raw.csv - data/raw/ { pid } /phone_messages_with_datetime.csv - data/interim/ { pid } /phone_messages_features/phone_messages_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_messages.csv Parameters description for [PHONE_MESSAGES][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_MESSAGES features from the RAPIDS provider [MESSAGES_TYPES] The messages_type that will be analyzed. The options for this parameter are received or sent . [FEATURES] Features to be computed, see table below for [MESSAGES_TYPES] received and sent Features description for [PHONE_MESSAGES][PROVIDERS][RAPIDS] : Feature Units Description count messages Number of messages of type messages_type that occurred during a particular time_segment . distinctcontacts contacts Number of distinct contacts that are associated with a particular messages_type during a particular time_segment . timefirstmessages minutes Number of minutes between 12:00am (midnight) and the first message of a particular messages_type during a particular time_segment . timelastmessages minutes Number of minutes between 12:00am (midnight) and the last message of a particular messages_type during a particular time_segment . countmostfrequentcontact messages Number of messages from the contact with the most messages of messages_type during a time_segment throughout the whole dataset of each participant. Assumptions/Observations [MESSAGES_TYPES] and [FEATURES] keys in config.yaml need to match. For example, [MESSAGES_TYPES] sent matches the [FEATURES] key sent","title":"Phone Messages"},{"location":"features/phone-messages/#phone-messages","text":"Sensor parameters description for [PHONE_MESSAGES] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the messages data is stored","title":"Phone Messages"},{"location":"features/phone-messages/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_messages_raw.csv - data/raw/ { pid } /phone_messages_with_datetime.csv - data/interim/ { pid } /phone_messages_features/phone_messages_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_messages.csv Parameters description for [PHONE_MESSAGES][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_MESSAGES features from the RAPIDS provider [MESSAGES_TYPES] The messages_type that will be analyzed. The options for this parameter are received or sent . [FEATURES] Features to be computed, see table below for [MESSAGES_TYPES] received and sent Features description for [PHONE_MESSAGES][PROVIDERS][RAPIDS] : Feature Units Description count messages Number of messages of type messages_type that occurred during a particular time_segment . distinctcontacts contacts Number of distinct contacts that are associated with a particular messages_type during a particular time_segment . timefirstmessages minutes Number of minutes between 12:00am (midnight) and the first message of a particular messages_type during a particular time_segment . timelastmessages minutes Number of minutes between 12:00am (midnight) and the last message of a particular messages_type during a particular time_segment . countmostfrequentcontact messages Number of messages from the contact with the most messages of messages_type during a time_segment throughout the whole dataset of each participant. Assumptions/Observations [MESSAGES_TYPES] and [FEATURES] keys in config.yaml need to match. For example, [MESSAGES_TYPES] sent matches the [FEATURES] key sent","title":"RAPIDS provider"},{"location":"features/phone-screen/","text":"Phone Screen \u00b6 Sensor parameters description for [PHONE_SCREEN] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the screen data is stored RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_screen_raw.csv - data/raw/ { pid } /phone_screen_with_datetime.csv - data/interim/ { pid } /phone_screen_episodes.csv - data/interim/ { pid } /phone_screen_episodes_resampled.csv - data/interim/ { pid } /phone_screen_episodes_resampled_with_datetime.csv - data/interim/ { pid } /phone_screen_features/phone_screen_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_screen.csv Parameters description for [PHONE_SCREEN][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_SCREEN features from the RAPIDS provider [FEATURES] Features to be computed, see table below [REFERENCE_HOUR_FIRST_USE] The reference point from which firstuseafter is to be computed, default is midnight [IGNORE_EPISODES_SHORTER_THAN] Ignore episodes that are shorter than this threshold (minutes). Set to 0 to disable this filter. [IGNORE_EPISODES_LONGER_THAN] Ignore episodes that are longer than this threshold (minutes). Set to 0 to disable this filter. [EPISODE_TYPES] Currently we only support unlock episodes (from when the phone is unlocked until the screen is off) Features description for [PHONE_SCREEN][PROVIDERS][RAPIDS] : Feature Units Description sumduration minutes Total duration of all unlock episodes. maxduration minutes Longest duration of any unlock episode. minduration minutes Shortest duration of any unlock episode. avgduration minutes Average duration of all unlock episodes. stdduration minutes Standard deviation duration of all unlock episodes. countepisode episodes Number of all unlock episodes firstuseafter minutes Minutes until the first unlock episode. Assumptions/Observations In Android, lock events can happen right after an off event, after a few seconds of an off event, or never happen depending on the phone's settings, therefore, an unlock episode is defined as the time between an unlock and a off event. In iOS, on and off events do not exist, so an unlock episode is defined as the time between an unlock and a lock event. Events in iOS are recorded reliably albeit some duplicated lock events within milliseconds from each other, so we only keep consecutive unlock/lock pairs. In Android you cand find multiple consecutive unlock or lock events, so we only keep consecutive unlock/off pairs. In our experiments these cases are less than 10% of the screen events collected and this happens because ACTION_SCREEN_OFF and ACTION_SCREEN_ON are sent when the device becomes non-interactive which may have nothing to do with the screen turning off . In addition to unlock/off episodes, in Android it is possible to measure the time spent on the lock screen before an unlock event as well as the total screen time (i.e. ON to OFF ) but these are not implemented at the moment. We transform iOS screen events to match Android\u2019s format, we replace lock episodes with off episodes (2 with 0) in iOS. However, as mentioned above this is still computing unlock to lock episodes.","title":"Phone Screen"},{"location":"features/phone-screen/#phone-screen","text":"Sensor parameters description for [PHONE_SCREEN] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the screen data is stored","title":"Phone Screen"},{"location":"features/phone-screen/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_screen_raw.csv - data/raw/ { pid } /phone_screen_with_datetime.csv - data/interim/ { pid } /phone_screen_episodes.csv - data/interim/ { pid } /phone_screen_episodes_resampled.csv - data/interim/ { pid } /phone_screen_episodes_resampled_with_datetime.csv - data/interim/ { pid } /phone_screen_features/phone_screen_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_screen.csv Parameters description for [PHONE_SCREEN][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_SCREEN features from the RAPIDS provider [FEATURES] Features to be computed, see table below [REFERENCE_HOUR_FIRST_USE] The reference point from which firstuseafter is to be computed, default is midnight [IGNORE_EPISODES_SHORTER_THAN] Ignore episodes that are shorter than this threshold (minutes). Set to 0 to disable this filter. [IGNORE_EPISODES_LONGER_THAN] Ignore episodes that are longer than this threshold (minutes). Set to 0 to disable this filter. [EPISODE_TYPES] Currently we only support unlock episodes (from when the phone is unlocked until the screen is off) Features description for [PHONE_SCREEN][PROVIDERS][RAPIDS] : Feature Units Description sumduration minutes Total duration of all unlock episodes. maxduration minutes Longest duration of any unlock episode. minduration minutes Shortest duration of any unlock episode. avgduration minutes Average duration of all unlock episodes. stdduration minutes Standard deviation duration of all unlock episodes. countepisode episodes Number of all unlock episodes firstuseafter minutes Minutes until the first unlock episode. Assumptions/Observations In Android, lock events can happen right after an off event, after a few seconds of an off event, or never happen depending on the phone's settings, therefore, an unlock episode is defined as the time between an unlock and a off event. In iOS, on and off events do not exist, so an unlock episode is defined as the time between an unlock and a lock event. Events in iOS are recorded reliably albeit some duplicated lock events within milliseconds from each other, so we only keep consecutive unlock/lock pairs. In Android you cand find multiple consecutive unlock or lock events, so we only keep consecutive unlock/off pairs. In our experiments these cases are less than 10% of the screen events collected and this happens because ACTION_SCREEN_OFF and ACTION_SCREEN_ON are sent when the device becomes non-interactive which may have nothing to do with the screen turning off . In addition to unlock/off episodes, in Android it is possible to measure the time spent on the lock screen before an unlock event as well as the total screen time (i.e. ON to OFF ) but these are not implemented at the moment. We transform iOS screen events to match Android\u2019s format, we replace lock episodes with off episodes (2 with 0) in iOS. However, as mentioned above this is still computing unlock to lock episodes.","title":"RAPIDS provider"},{"location":"features/phone-wifi-connected/","text":"Phone WiFi Connected \u00b6 Sensor parameters description for [PHONE_WIFI_CONNECTED] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the wifi (connected) data is stored RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_wifi_connected_raw.csv - data/raw/ { pid } /phone_wifi_connected_with_datetime.csv - data/interim/ { pid } /phone_wifi_connected_features/phone_wifi_connected_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_wifi_connected.csv Parameters description for [PHONE_WIFI_CONNECTED][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_WIFI_CONNECTED features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_WIFI_CONNECTED][PROVIDERS][RAPIDS] : Feature Units Description countscans devices Number of scanned WiFi access points connected during a time_segment, an access point can be detected multiple times over time and these appearances are counted separately uniquedevices devices Number of unique access point during a time_segment as identified by their hardware address countscansmostuniquedevice scans Number of scans of the most scanned access point during a time_segment across the whole monitoring period Assumptions/Observations A connected WiFI access point is one that a phone was connected to. By default AWARE stores this data in the sensor_wifi table.","title":"Phone WiFI Connected"},{"location":"features/phone-wifi-connected/#phone-wifi-connected","text":"Sensor parameters description for [PHONE_WIFI_CONNECTED] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the wifi (connected) data is stored","title":"Phone WiFi Connected"},{"location":"features/phone-wifi-connected/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_wifi_connected_raw.csv - data/raw/ { pid } /phone_wifi_connected_with_datetime.csv - data/interim/ { pid } /phone_wifi_connected_features/phone_wifi_connected_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_wifi_connected.csv Parameters description for [PHONE_WIFI_CONNECTED][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_WIFI_CONNECTED features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_WIFI_CONNECTED][PROVIDERS][RAPIDS] : Feature Units Description countscans devices Number of scanned WiFi access points connected during a time_segment, an access point can be detected multiple times over time and these appearances are counted separately uniquedevices devices Number of unique access point during a time_segment as identified by their hardware address countscansmostuniquedevice scans Number of scans of the most scanned access point during a time_segment across the whole monitoring period Assumptions/Observations A connected WiFI access point is one that a phone was connected to. By default AWARE stores this data in the sensor_wifi table.","title":"RAPIDS provider"},{"location":"features/phone-wifi-visible/","text":"Phone WiFi Visible \u00b6 Sensor parameters description for [PHONE_WIFI_VISIBLE] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the wifi (visible) data is stored RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_wifi_visible_raw.csv - data/raw/ { pid } /phone_wifi_visible_with_datetime.csv - data/interim/ { pid } /phone_wifi_visible_features/phone_wifi_visible_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_wifi_visible.csv Parameters description for [PHONE_WIFI_VISIBLE][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_WIFI_VISIBLE features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_WIFI_VISIBLE][PROVIDERS][RAPIDS] : Feature Units Description countscans devices Number of scanned WiFi access points visible during a time_segment, an access point can be detected multiple times over time and these appearances are counted separately uniquedevices devices Number of unique access point during a time_segment as identified by their hardware address countscansmostuniquedevice scans Number of scans of the most scanned access point during a time_segment across the whole monitoring period Assumptions/Observations A visible WiFI access point is one that a phone sensed around itself but that it was not connected to. Due to API restrictions, this sensor is not available on iOS. By default AWARE stores this data in the wifi table.","title":"Phone WiFI Visible"},{"location":"features/phone-wifi-visible/#phone-wifi-visible","text":"Sensor parameters description for [PHONE_WIFI_VISIBLE] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the wifi (visible) data is stored","title":"Phone WiFi Visible"},{"location":"features/phone-wifi-visible/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_wifi_visible_raw.csv - data/raw/ { pid } /phone_wifi_visible_with_datetime.csv - data/interim/ { pid } /phone_wifi_visible_features/phone_wifi_visible_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_wifi_visible.csv Parameters description for [PHONE_WIFI_VISIBLE][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_WIFI_VISIBLE features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_WIFI_VISIBLE][PROVIDERS][RAPIDS] : Feature Units Description countscans devices Number of scanned WiFi access points visible during a time_segment, an access point can be detected multiple times over time and these appearances are counted separately uniquedevices devices Number of unique access point during a time_segment as identified by their hardware address countscansmostuniquedevice scans Number of scans of the most scanned access point during a time_segment across the whole monitoring period Assumptions/Observations A visible WiFI access point is one that a phone sensed around itself but that it was not connected to. Due to API restrictions, this sensor is not available on iOS. By default AWARE stores this data in the wifi table.","title":"RAPIDS provider"},{"location":"setup/configuration/","text":"Configuration \u00b6 You need to follow these steps to configure your RAPIDS deployment before you can extract behavioral features Verify RAPIDS can process your data streams Create your participants files Select what time segments you want to extract features on Choose the timezone of your study Configure your data streams Select what sensors and features you want to process When you are done with this configuration, go to executing RAPIDS . Hint Every time you see config[\"KEY\"] or [KEY] in these docs we are referring to the corresponding key in the config.yaml file. Supported data streams \u00b6 A data stream refers to sensor data collected using a specific type of device with a specific format and stored in a specific container . For example, the aware_mysql data stream handles smartphone data ( device ) collected with the AWARE Framework ( format ) stored in a MySQL database ( container ). Check the table in introduction to data streams to know what data streams we support. If your data stream is supported, continue to the next configuration section, you will use its label later in this guide (e.g. aware_mysql ). If your steam is not supported but you want to implement it, follow the tutorial to add support for new data streams and get in touch by email or in Slack if you have any questions. Participant files \u00b6 Participant files link together multiple devices (smartphones and wearables) to specific participants and identify them throughout RAPIDS. You can create these files manually or automatically . Participant files are stored in data/external/participant_files/pxx.yaml and follow a unified structure . Remember to modify the config.yaml file with your PIDS The list PIDS in config.yaml needs to have the participant file names of the people you want to process. For example, if you created p01.yaml , p02.yaml and p03.yaml files in /data/external/participant_files/ , then PIDS should be: PIDS : [ p01 , p02 , p03 ] Optional: Migrating participants files with the old format If you were using the pre-release version of RAPIDS with participant files in plain text (as opposed to yaml), you can run the following command and your old files will be converted into yaml files stored in data/external/participant_files/ python tools/update_format_participant_files.py Structure of participants files \u00b6 Example of the structure of a participant file In this example, the participant used an android phone, an ios phone, a fitbit device, and a Empatica device throughout the study between Apr 23 rd 2020 and Oct 28 th 2020 If your participants didn\u2019t use a [PHONE] , [FITBIT] or [EMPATICA] device, it is not necessary to include that section in their participant file. In other words, you can analyse data from 1 or more devices per participant. PHONE : DEVICE_IDS : [ a748ee1a-1d0b-4ae9-9074-279a2b6ba524 , dsadas-2324-fgsf-sdwr-gdfgs4rfsdf43 ] PLATFORMS : [ android , ios ] LABEL : test01 START_DATE : 2020-04-23 END_DATE : 2020-10-28 FITBIT : DEVICE_IDS : [ fitbit1 ] LABEL : test01 START_DATE : 2020-04-23 END_DATE : 2020-10-28 EMPATICA : DEVICE_IDS : [ empatica1 ] LABEL : test01 START_DATE : 2020-04-23 END_DATE : 2020-10-28 [PHONE] Key Description [DEVICE_IDS] An array of the strings that uniquely identify each smartphone, you can have more than one for when participants changed phones in the middle of the study. [PLATFORMS] An array that specifies the OS of each smartphone in [DEVICE_IDS] , use a combination of android or ios (we support participants that changed platforms in the middle of your study!). You can set [PLATFORMS]: [infer] and RAPIDS will infer them automatically (each phone data stream infer this differently, e.g. aware_mysql uses the aware_device table). [LABEL] A string that is used in reports and visualizations. [START_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected after this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [END_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected before this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [FITBIT] Key Description [DEVICE_IDS] An array of the strings that uniquely identify each Fitbit, you can have more than one in case the participant changed devices in the middle of the study. [LABEL] A string that is used in reports and visualizations. [START_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected after this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [END_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected before this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [EMPATICA] Key Description [DEVICE_IDS] An array of the strings that uniquely identify each Empatica device used by this participant. Since the most common use case involves having multiple zip files from a single device for each person, set this device id to an arbitrary string (we usually use their pid ) [LABEL] A string that is used in reports and visualizations. [START_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected after this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [END_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected before this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . Automatic creation of participant files \u00b6 You can use a CSV file with a row per participant to automatically create participant files. AWARE_DEVICE_TABLE was deprecated In previous versions of RAPIDS, you could create participant files automatically using the aware_device table. We deprecated this option but you can still achieve the same results if you export the output of the following SQL query as a CSV file and follow the instructions below: SELECT device_id , device_id as fitbit_id , CONCAT ( \"p\" , _id ) as empatica_id , CONCAT ( \"p\" , _id ) as pid , if ( brand = \"iPhone\" , \"ios\" , \"android\" ) as platform , CONCAT ( \"p\" , _id ) as label , DATE_FORMAT ( FROM_UNIXTIME (( timestamp / 1000 ) - 86400 ), \"%Y-%m-%d\" ) as start_date , CURRENT_DATE as end_date from aware_device order by _id ; In your config.yaml : Set CSV_FILE_PATH to a CSV file path that complies with the specs described below Set the devices ( PHONE , FITBIT , EMPATICA ) [ADD] flag to TRUE depending on what devices you used in your study. CREATE_PARTICIPANT_FILES : CSV_FILE_PATH : \"your_path/to_your.csv\" PHONE_SECTION : ADD : TRUE # or FALSE IGNORED_DEVICE_IDS : [] FITBIT_SECTION : ADD : TRUE # or FALSE IGNORED_DEVICE_IDS : [] EMPATICA_SECTION : ADD : TRUE # or FALSE IGNORED_DEVICE_IDS : [] Your CSV file ( [CSV_FILE_PATH] ) should have the following columns (headers) but the values within each column can be empty: Column Description device_id Phone device id. Separate multiple ids with ; fitbit_id Fitbit device id. Separate multiple ids with ; empatica_id Empatica device id. Since the most common use case involves having multiple zip files from a single device for each person, set this device id to an arbitrary string (we usually use their pid ) pid Unique identifiers with the format pXXX (your participant files will be named with this string) platform Use android , ios or infer as explained above, separate values with ; label A human readable string that is used in reports and visualizations. start_date A string with format YYY-MM-DD or YYYY-MM-DD HH:MM:SS . By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . end_date A string with format YYY-MM-DD or YYYY-MM-DD HH:MM:SS . By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . Example We added white spaces to this example to make it easy to read but you don\u2019t have to. device_id ,fitbit_id, empatica_id ,pid ,label ,platform ,start_date ,end_date a748ee1a-1d0b-4ae9-9074-279a2b6ba524;dsadas-2324-fgsf-sdwr-gdfgs4rfsdf43 ,fitbit1 , p01 ,p01 ,julio ,android;ios ,2020-01-01 ,2021-01-01 4c4cf7a1-0340-44bc-be0f-d5053bf7390c ,fitbit2 , p02 ,p02 ,meng ,ios ,2021-01-01 ,2022-01-01 Then run snakemake -j1 create_participants_files Time Segments \u00b6 Time segments (or epochs) are the time windows on which you want to extract behavioral features. For example, you might want to process data on every day, every morning, or only during weekends. RAPIDS offers three categories of time segments that are flexible enough to cover most use cases: frequency (short time windows every day), periodic (arbitrary time windows on any day), and event (arbitrary time windows around events of interest). See also our examples . Frequency Segments These segments are computed on every day and all have the same duration (for example 30 minutes). Set the following keys in your config.yaml TIME_SEGMENTS : &time_segments TYPE : FREQUENCY FILE : \"data/external/your_frequency_segments.csv\" INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE The file pointed by [TIME_SEGMENTS][FILE] should have the following format and can only have 1 row. Column Description label A string that is used as a prefix in the name of your time segments length An integer representing the duration of your time segments in minutes Example label,length thirtyminutes,30 This configuration will compute 48 time segments for every day when any data from any participant was sensed. For example: start_time,length,label 00:00,30,thirtyminutes0000 00:30,30,thirtyminutes0001 01:00,30,thirtyminutes0002 01:30,30,thirtyminutes0003 ... Periodic Segments These segments can be computed every day, or on specific days of the week, month, quarter, and year. Their minimum duration is 1 minute but they can be as long as you want. Set the following keys in your config.yaml . TIME_SEGMENTS : &time_segments TYPE : PERIODIC FILE : \"data/external/your_periodic_segments.csv\" INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE # or TRUE If [INCLUDE_PAST_PERIODIC_SEGMENTS] is set to TRUE , RAPIDS will consider instances of your segments back enough in the past as to include the first row of data of each participant. For example, if the first row of data from a participant happened on Saturday March 7 th 2020 and the requested segment duration is 7 days starting on every Sunday, the first segment to be considered would start on Sunday March 1 st if [INCLUDE_PAST_PERIODIC_SEGMENTS] is TRUE or on Sunday March 8 th if FALSE . The file pointed by [TIME_SEGMENTS][FILE] should have the following format and can have multiple rows. Column Description label A string that is used as a prefix in the name of your time segments. It has to be unique between rows start_time A string with format HH:MM:SS representing the starting time of this segment on any day length A string representing the length of this segment.It can have one or more of the following strings XXD XXH XXM XXS to represent days, hours, minutes and seconds. For example 7D 23H 59M 59S repeats_on One of the follow options every_day , wday , qday , mday , and yday . The last four represent a week, quarter, month and year day repeats_value An integer complementing repeats_on . If you set repeats_on to every_day set this to 0 , otherwise 1-7 represent a wday starting from Mondays, 1-31 represent a mday , 1-91 represent a qday , and 1-366 represent a yday Example label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 morning,06:00:00,5H 59M 59S,every_day,0 afternoon,12:00:00,5H 59M 59S,every_day,0 evening,18:00:00,5H 59M 59S,every_day,0 night,00:00:00,5H 59M 59S,every_day,0 This configuration will create five segments instances ( daily , morning , afternoon , evening , night ) on any given day ( every_day set to 0). The daily segment will start at midnight and will last 23:59:59 , the other four segments will start at 6am, 12pm, 6pm, and 12am respectively and last for 05:59:59 . Event segments These segments can be computed before or after an event of interest (defined as any UNIX timestamp). Their minimum duration is 1 minute but they can be as long as you want. The start of each segment can be shifted backwards or forwards from the specified timestamp. Set the following keys in your config.yaml . TIME_SEGMENTS : &time_segments TYPE : EVENT FILE : \"data/external/your_event_segments.csv\" INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE # or TRUE The file pointed by [TIME_SEGMENTS][FILE] should have the following format and can have multiple rows. Column Description label A string that is used as a prefix in the name of your time segments. If labels are unique, every segment is independent; if two or more segments have the same label, their data will be grouped when computing auxiliary data for features like the most frequent contact for calls (the most frequent contact will be computed across all these segments). There cannot be two overlaping event segments with the same label (RAPIDS will throw an error) event_timestamp A UNIX timestamp that represents the moment an event of interest happened (clinical relapse, survey, readmission, etc.). The corresponding time segment will be computed around this moment using length , shift , and shift_direction length A string representing the length of this segment. It can have one or more of the following keys XXD XXH XXM XXS to represent a number of days, hours, minutes, and seconds. For example 7D 23H 59M 59S shift A string representing the time shift from event_timestamp . It can have one or more of the following keys XXD XXH XXM XXS to represent a number of days, hours, minutes and seconds. For example 7D 23H 59M 59S . Use this value to change the start of a segment with respect to its event_timestamp . For example, set this variable to 1H to create a segment that starts 1 hour from an event of interest ( shift_direction determines if it\u2019s before or after). shift_direction An integer representing whether the shift is before ( -1 ) or after ( 1 ) an event_timestamp device_id The device id (smartphone or fitbit) to whom this segment belongs to. You have to create a line in this event segment file for each event of a participant that you want to analyse. If you have participants with multiple device ids you can choose any of them Example label,event_timestamp,length,shift,shift_direction,device_id stress1,1587661220000,1H,5M,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress2,1587747620000,4H,4H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress3,1587906020000,3H,5M,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress4,1584291600000,7H,4H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress5,1588172420000,9H,5M,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 mood,1587661220000,1H,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 mood,1587747620000,1D,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 mood,1587906020000,7D,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 This example will create eight segments for a single participant ( a748ee1a... ), five independent stressX segments with various lengths (1,4,3,7, and 9 hours). Segments stress1 , stress3 , and stress5 are shifted forwards by 5 minutes and stress2 and stress4 are shifted backwards by 4 hours (that is, if the stress4 event happened on March 15 th at 1pm EST ( 1584291600000 ), the time segment will start on that day at 9am and end at 4pm). The three mood segments are 1 hour, 1 day and 7 days long and have no shift. In addition, these mood segments are grouped together, meaning that although RAPIDS will compute features on each one of them, some necessary information to compute a few of such features will be extracted from all three segments, for example the phone contact that called a participant the most or the location clusters visited by a participant. Date time labels of event segments In the final feature file, you will find a row per event segment. The local_segment column of each row has a label , a start date-time string, and an end date-time string. weeklysurvey2060#2020-09-12 01 :00:00,2020-09-18 23 :59:59 All sensor data is always segmented based on timestamps, and the date-time strings are attached for informative purposes. For example, you can plot your features based on these strings. When you configure RAPIDS to work with a single time zone, such tz code will be used to convert start/end timestamps (the ones you typed in the event segments file) into start/end date-time strings. However, when you configure RAPIDS to work with multiple time zones, RAPIDS will use the most common time zone across all devices of every participant to do the conversion. The most common time zone is the one in which a participant spent the most time. In practical terms, this means that the date-time strings of event segments that happened in uncommon time zones will have shifted start/end date-time labels. However, the data within each segment was correctly filtered based on timestamps. Segment Examples \u00b6 5-minutes Use the following Frequency segment file to create 288 (12 * 60 * 24) 5-minute segments starting from midnight of every day in your study label,length fiveminutes,5 Daily Use the following Periodic segment file to create daily segments starting from midnight of every day in your study label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 Morning Use the following Periodic segment file to create morning segments starting at 06:00:00 and ending at 11:59:59 of every day in your study label,start_time,length,repeats_on,repeats_value morning,06:00:00,5H 59M 59S,every_day,0 Overnight Use the following Periodic segment file to create overnight segments starting at 20:00:00 and ending at 07:59:59 (next day) of every day in your study label,start_time,length,repeats_on,repeats_value morning,20:00:00,11H 59M 59S,every_day,0 Weekly Use the following Periodic segment file to create non-overlapping weekly segments starting at midnight of every Monday in your study label,start_time,length,repeats_on,repeats_value weekly,00:00:00,6D 23H 59M 59S,wday,1 Use the following Periodic segment file to create overlapping weekly segments starting at midnight of every day in your study label,start_time,length,repeats_on,repeats_value weekly,00:00:00,6D 23H 59M 59S,every_day,0 Week-ends Use the following Periodic segment file to create week-end segments starting at midnight of every Saturday in your study label,start_time,length,repeats_on,repeats_value weekend,00:00:00,1D 23H 59M 59S,wday,6 Around surveys Use the following Event segment file to create two 2-hour segments that start 1 hour before surveys answered by 3 participants label,event_timestamp,length,shift,shift_direction,device_id survey1,1587661220000,2H,1H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 survey2,1587747620000,2H,1H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 survey1,1587906020000,2H,1H,-1,rqtertsd-43ff-34fr-3eeg-efe4fergregr survey2,1584291600000,2H,1H,-1,rqtertsd-43ff-34fr-3eeg-efe4fergregr survey1,1588172420000,2H,1H,-1,klj34oi2-8frk-2343-21kk-324ljklewlr3 survey2,1584291600000,2H,1H,-1,klj34oi2-8frk-2343-21kk-324ljklewlr3 Timezone of your study \u00b6 Single timezone \u00b6 If your study only happened in a single time zone or you want to ignore short trips of your participants to different time zones, select the appropriate code form this list and change the following config key. Double-check your timezone code pick, for example, US Eastern Time is America/New_York not EST TIMEZONE : TYPE : SINGLE TZCODE : America/New_York Multiple timezones \u00b6 If your participants lived in different time zones or they traveled across time zones, and you know when participants\u2019 devices were in a specific time zone, RAPIDS can use this data to process your data streams with the correct date-time. You need to provide RAPIDS with the time zone data in a CSV file ( [TZCODES_FILE] ) in the format described below. TIMEZONE : TYPE : MULTIPLE SINGLE : TZCODE : America/New_York MULTIPLE : TZCODES_FILE : path_to/time_zones_csv.file IF_MISSING_TZCODE : STOP DEFAULT_TZCODE : America/New_York FITBIT : ALLOW_MULTIPLE_TZ_PER_DEVICE : False INFER_FROM_SMARTPHONE_TZ : False Parameters for [TIMEZONE] Parameter Description [TYPE] Either SINGLE or MULTIPLE as explained above [SINGLE][TZCODE] The time zone code from this list to be used across all devices [MULTIPLE][TZCODES_FILE] A CSV file containing the time zones in which participants\u2019 devices sensed data (see the required format below). Multiple devices can be linked to the same person, read more in Participants Files [MULTIPLE][IF_MISSING_TZCODE] When a device is missing from [TZCODES_FILE] Set this flag to STOP to stop RAPIDS execution and show an error, or to USE_DEFAULT to assign the time zone specified in [DEFAULT_TZCODE] to any such devices [MULTIPLE][FITBIT][ALLOW_MULTIPLE_TZ_PER_DEVICE] You only need to care about this flag if one or more Fitbit devices sensed data in one or more time zones, and you want RAPIDS to take into account this in its feature computation. Read more in \u201cHow does RAPIDS handle Fitbit devices?\u201d below. [MULTIPLE][FITBIT][INFER_FROM_SMARTPHONE_TZ] You only need to care about this flag if one or more Fitbit devices sensed data in one or more time zones, and you want RAPIDS to take into account this in its feature computation. Read more in \u201cHow does RAPIDS handle Fitbit devices?\u201d below. Format of TZCODES_FILE TZCODES_FILE has three columns and a row for each time zone a device visited (a device can be a smartphone or wearable (Fitbit/Empatica)): Column Description device_id A string that uniquely identifies a smartphone or wearable tzcode A string with the appropriate code from this list that represents the time zone where the device sensed data timestamp A UNIX timestamp indicating when was the first time this device_id sensed data in tzcode device_id, tzcode, timestamp 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/New_York, 1587500000000 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/Mexico_City, 1587600000000 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/Los_Angeles, 1587700000000 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Amsterdam, 1587100000000 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Berlin, 1587200000000 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Amsterdam, 1587300000000 Using this file, RAPDIS will create time zone intervals per device, for example for 13dbc8a3-dae3-4834-823a-4bc96a7d459d : Interval 1 [1587500000000, 1587599999999] for America/New_York Interval 2 [1587600000000, 1587699999999] for America/Mexico_City Interval 3 [1587700000000, now] for America/Los_Angeles Any sensor data row from a device will be assigned a timezone if it falls within that interval, for example: A screen row sensed at 1587533333333 will be assigned to America/New_York because it falls within Interval 1 A screen row sensed at 1587400000000 will be discarded because it was logged outside any interval. Can I get the TZCODES_FILE from the time zone table collected automatically by the AWARE app? Sure. You can put your timezone table ( timezone.csv ) collected by the AWARE app under data/external folder and run: python tools/create_multi_timezones_file.py The TZCODES_FILE will be saved as data/external/multiple_timezones.csv file. What happens if participant X lives in Los Angeles but participant Y lives in Amsterdam and they both stayed there during my study? Add a row per participant and set timestamp to 0 : device_id, tzcode, timestamp 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/Los_Angeles, 0 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Amsterdam, 0 What happens if I forget to add a timezone for one or more devices? It depends on [IF_MISSING_TZCODE] . If [IF_MISSING_TZCODE] is set to STOP , RAPIDS will stop its execution and show you an error message. If [IF_MISSING_TZCODE] is set to USE_DEFAULT , it will assign the time zone specified in [DEFAULT_TZCODE] to any devices with missing time zone information in [TZCODES_FILE] . This is helpful if only a few of your participants had multiple timezones and you don\u2019t want to specify the same time zone for the rest. How does RAPIDS handle Fitbit devices? Fitbit devices are not time zone aware and they always log data with a local date-time string. When none of the Fitbit devices in your study changed time zones (e.g., p01 was always in New York and p02 was always in Amsterdam), you can set a single time zone per Fitbit device id along with a timestamp 0 (you can still assign multiple time zones to smartphone device ids) device_id, tzcode, timestamp fitbit123, America/New_York, 0 fitbit999, Europe/Amsterdam, 0 On the other hand, when at least one of your Fitbit devices changed time zones AND you want RAPIDS to take into account these changes, you need to set [ALLOW_MULTIPLE_TZ_PER_DEVICE] to True . You have to manually allow this option because you need to be aware it can produce inaccurate features around the times when time zones changed . This is because we cannot know exactly when the Fitbit device detected and processed the time zone change. If you want to ALLOW_MULTIPLE_TZ_PER_DEVICE you will need to add any time zone changes per device in the TZCODES_FILE as explained above. You could obtain this data by hand but if your participants also used a smartphone during your study, you can use their time zone logs. Recall that in RAPIDS every participant is represented with a participant file pXX.yaml , this file links together multiple devices and we will use it to know what smartphone time zone data should be applied to Fitbit devices. Thus set INFER_FROM_SMARTPHONE_TZ to TRUE , if you have included smartphone time zone data in your TZCODE_FILE and you want to make a participant\u2019s Fitbit data time zone aware with their respective smartphone data. Data Stream Configuration \u00b6 Modify the following keys in your config.yaml depending on the data stream you want to process. Phone Set [PHONE_DATA_STREAMS][TYPE] to the smartphone data stream you want to process (e.g. aware_mysql ) and configure its parameters (e.g. [DATABASE_GROUP] ). Ignore the parameters of streams you are not using (e.g. [FOLDER] of aware_csv ). PHONE_DATA_STREAMS : USE : aware_mysql # AVAILABLE: aware_mysql : DATABASE_GROUP : MY_GROUP aware_csv : FOLDER : data/external/aware_csv aware_mysql Key Description [DATABASE_GROUP] A database credentials group. Read the instructions below to set it up Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine. aware_csv Key Description [FOLDER] Folder where you have to place a CSV file per phone sensor. Each file has to contain all the data from every participant you want to process. Fitbit Set [FITBIT_DATA_STREAMS][TYPE] to the Fitbit data stream you want to process (e.g. fitbitjson_mysql ) and configure its parameters (e.g. [DATABASE_GROUP] ). Ignore the parameters of the other streams you are not using (e.g. [FOLDER] of aware_csv ). Warning You will probably have to tell RAPIDS the name of the columns where you stored your Fitbit data. To do this, modify your chosen stream\u2019s format.yaml column mappings to match your raw data column names. FITBIT_DATA_STREAMS : USE : fitbitjson_mysql # AVAILABLE: fitbitjson_mysql : DATABASE_GROUP : MY_GROUP SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitjson_csv : FOLDER : data/external/fitbit_csv SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitparsed_mysql : DATABASE_GROUP : MY_GROUP SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitparsed_csv : FOLDER : data/external/fitbit_csv SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitjson_mysql This data stream process Fitbit data inside a JSON column as obtained from the Fitbit API and stored in a MySQL database. Read more about its column mappings and mutations in fitbitjson_mysql . Key Description [DATABASE_GROUP] A database credentials group. Read the instructions below to set it up [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine. fitbitjson_csv This data stream process Fitbit data inside a JSON column as obtained from the Fitbit API and stored in a CSV file. Read more about its column mappings and mutations in fitbitjson_csv . Key Description [FOLDER] Folder where you have to place a CSV file per Fitbit sensor. Each file has to contain all the data from every participant you want to process. [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). fitbitparsed_mysql This data stream process Fitbit data stored in multiple columns after being parsed from the JSON column returned by Fitbit API and stored in a MySQL database. Read more about its column mappings and mutations in fitbitparsed_mysql . Key Description [DATABASE_GROUP] A database credentials group. Read the instructions below to set it up [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine. fitbitparsed_csv This data stream process Fitbit data stored in multiple columns (plain text) after being parsed from the JSON column returned by Fitbit API and stored in a CSV file. Read more about its column mappings and mutations in fitbitparsed_csv . Key Description [FOLDER] Folder where you have to place a CSV file per Fitbit sensor. Each file has to contain all the data from every participant you want to process. [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). Empatica Set [USE] to the Empatica data stream you want to use; see the table in introduction to data streams . Configure any parameters as indicated below. EMPATICA_DATA_STREAMS : USE : empatica_zip # AVAILABLE: empatica_zip : FOLDER : data/external/empatica empatica_zip Key Description [FOLDER] The relative path to a folder containing one subfolder per participant. The name of a participant folder should match their device_id assigned in their participant file. Each participant folder can have one or more zip files with any name; in other words, the sensor data in those zip files belong to a single participant. The zip files are automatically generated by Empatica and have a CSV file per sensor ( ACC , HR , TEMP , EDA , BVP , TAGS ). All CSV files of the same type contained in one or more zip files are uncompressed, parsed, sorted by timestamp, and joined together. Example of an EMPATICA FOLDER In the file tree below, we want to process three participants\u2019 data: p01 , p02 , and p03 . p01 has two zip files, p02 has only one zip file, and p03 has three zip files. Each zip has a CSV file per sensor that are joined together and processed by RAPIDS. data/ # this folder exists in the root RAPIDS folder external/ empatica/ p01/ file1.zip file2.zip p02/ aaaa.zip p03/ t1.zip t2.zip t3.zip Sensor and Features to Process \u00b6 Finally, you need to modify the config.yaml section of the sensors you want to extract behavioral features from. All sensors follow the same naming nomenclature ( DEVICE_SENSOR ) and parameter structure which we explain in the Behavioral Features Introduction . Done Head over to Execution to learn how to execute RAPIDS.","title":"Configuration"},{"location":"setup/configuration/#configuration","text":"You need to follow these steps to configure your RAPIDS deployment before you can extract behavioral features Verify RAPIDS can process your data streams Create your participants files Select what time segments you want to extract features on Choose the timezone of your study Configure your data streams Select what sensors and features you want to process When you are done with this configuration, go to executing RAPIDS . Hint Every time you see config[\"KEY\"] or [KEY] in these docs we are referring to the corresponding key in the config.yaml file.","title":"Configuration"},{"location":"setup/configuration/#supported-data-streams","text":"A data stream refers to sensor data collected using a specific type of device with a specific format and stored in a specific container . For example, the aware_mysql data stream handles smartphone data ( device ) collected with the AWARE Framework ( format ) stored in a MySQL database ( container ). Check the table in introduction to data streams to know what data streams we support. If your data stream is supported, continue to the next configuration section, you will use its label later in this guide (e.g. aware_mysql ). If your steam is not supported but you want to implement it, follow the tutorial to add support for new data streams and get in touch by email or in Slack if you have any questions.","title":"Supported data streams"},{"location":"setup/configuration/#participant-files","text":"Participant files link together multiple devices (smartphones and wearables) to specific participants and identify them throughout RAPIDS. You can create these files manually or automatically . Participant files are stored in data/external/participant_files/pxx.yaml and follow a unified structure . Remember to modify the config.yaml file with your PIDS The list PIDS in config.yaml needs to have the participant file names of the people you want to process. For example, if you created p01.yaml , p02.yaml and p03.yaml files in /data/external/participant_files/ , then PIDS should be: PIDS : [ p01 , p02 , p03 ] Optional: Migrating participants files with the old format If you were using the pre-release version of RAPIDS with participant files in plain text (as opposed to yaml), you can run the following command and your old files will be converted into yaml files stored in data/external/participant_files/ python tools/update_format_participant_files.py","title":"Participant files"},{"location":"setup/configuration/#structure-of-participants-files","text":"Example of the structure of a participant file In this example, the participant used an android phone, an ios phone, a fitbit device, and a Empatica device throughout the study between Apr 23 rd 2020 and Oct 28 th 2020 If your participants didn\u2019t use a [PHONE] , [FITBIT] or [EMPATICA] device, it is not necessary to include that section in their participant file. In other words, you can analyse data from 1 or more devices per participant. PHONE : DEVICE_IDS : [ a748ee1a-1d0b-4ae9-9074-279a2b6ba524 , dsadas-2324-fgsf-sdwr-gdfgs4rfsdf43 ] PLATFORMS : [ android , ios ] LABEL : test01 START_DATE : 2020-04-23 END_DATE : 2020-10-28 FITBIT : DEVICE_IDS : [ fitbit1 ] LABEL : test01 START_DATE : 2020-04-23 END_DATE : 2020-10-28 EMPATICA : DEVICE_IDS : [ empatica1 ] LABEL : test01 START_DATE : 2020-04-23 END_DATE : 2020-10-28 [PHONE] Key Description [DEVICE_IDS] An array of the strings that uniquely identify each smartphone, you can have more than one for when participants changed phones in the middle of the study. [PLATFORMS] An array that specifies the OS of each smartphone in [DEVICE_IDS] , use a combination of android or ios (we support participants that changed platforms in the middle of your study!). You can set [PLATFORMS]: [infer] and RAPIDS will infer them automatically (each phone data stream infer this differently, e.g. aware_mysql uses the aware_device table). [LABEL] A string that is used in reports and visualizations. [START_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected after this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [END_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected before this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [FITBIT] Key Description [DEVICE_IDS] An array of the strings that uniquely identify each Fitbit, you can have more than one in case the participant changed devices in the middle of the study. [LABEL] A string that is used in reports and visualizations. [START_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected after this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [END_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected before this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [EMPATICA] Key Description [DEVICE_IDS] An array of the strings that uniquely identify each Empatica device used by this participant. Since the most common use case involves having multiple zip files from a single device for each person, set this device id to an arbitrary string (we usually use their pid ) [LABEL] A string that is used in reports and visualizations. [START_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected after this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [END_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected before this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 .","title":"Structure of participants files"},{"location":"setup/configuration/#automatic-creation-of-participant-files","text":"You can use a CSV file with a row per participant to automatically create participant files. AWARE_DEVICE_TABLE was deprecated In previous versions of RAPIDS, you could create participant files automatically using the aware_device table. We deprecated this option but you can still achieve the same results if you export the output of the following SQL query as a CSV file and follow the instructions below: SELECT device_id , device_id as fitbit_id , CONCAT ( \"p\" , _id ) as empatica_id , CONCAT ( \"p\" , _id ) as pid , if ( brand = \"iPhone\" , \"ios\" , \"android\" ) as platform , CONCAT ( \"p\" , _id ) as label , DATE_FORMAT ( FROM_UNIXTIME (( timestamp / 1000 ) - 86400 ), \"%Y-%m-%d\" ) as start_date , CURRENT_DATE as end_date from aware_device order by _id ; In your config.yaml : Set CSV_FILE_PATH to a CSV file path that complies with the specs described below Set the devices ( PHONE , FITBIT , EMPATICA ) [ADD] flag to TRUE depending on what devices you used in your study. CREATE_PARTICIPANT_FILES : CSV_FILE_PATH : \"your_path/to_your.csv\" PHONE_SECTION : ADD : TRUE # or FALSE IGNORED_DEVICE_IDS : [] FITBIT_SECTION : ADD : TRUE # or FALSE IGNORED_DEVICE_IDS : [] EMPATICA_SECTION : ADD : TRUE # or FALSE IGNORED_DEVICE_IDS : [] Your CSV file ( [CSV_FILE_PATH] ) should have the following columns (headers) but the values within each column can be empty: Column Description device_id Phone device id. Separate multiple ids with ; fitbit_id Fitbit device id. Separate multiple ids with ; empatica_id Empatica device id. Since the most common use case involves having multiple zip files from a single device for each person, set this device id to an arbitrary string (we usually use their pid ) pid Unique identifiers with the format pXXX (your participant files will be named with this string) platform Use android , ios or infer as explained above, separate values with ; label A human readable string that is used in reports and visualizations. start_date A string with format YYY-MM-DD or YYYY-MM-DD HH:MM:SS . By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . end_date A string with format YYY-MM-DD or YYYY-MM-DD HH:MM:SS . By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . Example We added white spaces to this example to make it easy to read but you don\u2019t have to. device_id ,fitbit_id, empatica_id ,pid ,label ,platform ,start_date ,end_date a748ee1a-1d0b-4ae9-9074-279a2b6ba524;dsadas-2324-fgsf-sdwr-gdfgs4rfsdf43 ,fitbit1 , p01 ,p01 ,julio ,android;ios ,2020-01-01 ,2021-01-01 4c4cf7a1-0340-44bc-be0f-d5053bf7390c ,fitbit2 , p02 ,p02 ,meng ,ios ,2021-01-01 ,2022-01-01 Then run snakemake -j1 create_participants_files","title":"Automatic creation of participant files"},{"location":"setup/configuration/#time-segments","text":"Time segments (or epochs) are the time windows on which you want to extract behavioral features. For example, you might want to process data on every day, every morning, or only during weekends. RAPIDS offers three categories of time segments that are flexible enough to cover most use cases: frequency (short time windows every day), periodic (arbitrary time windows on any day), and event (arbitrary time windows around events of interest). See also our examples . Frequency Segments These segments are computed on every day and all have the same duration (for example 30 minutes). Set the following keys in your config.yaml TIME_SEGMENTS : &time_segments TYPE : FREQUENCY FILE : \"data/external/your_frequency_segments.csv\" INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE The file pointed by [TIME_SEGMENTS][FILE] should have the following format and can only have 1 row. Column Description label A string that is used as a prefix in the name of your time segments length An integer representing the duration of your time segments in minutes Example label,length thirtyminutes,30 This configuration will compute 48 time segments for every day when any data from any participant was sensed. For example: start_time,length,label 00:00,30,thirtyminutes0000 00:30,30,thirtyminutes0001 01:00,30,thirtyminutes0002 01:30,30,thirtyminutes0003 ... Periodic Segments These segments can be computed every day, or on specific days of the week, month, quarter, and year. Their minimum duration is 1 minute but they can be as long as you want. Set the following keys in your config.yaml . TIME_SEGMENTS : &time_segments TYPE : PERIODIC FILE : \"data/external/your_periodic_segments.csv\" INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE # or TRUE If [INCLUDE_PAST_PERIODIC_SEGMENTS] is set to TRUE , RAPIDS will consider instances of your segments back enough in the past as to include the first row of data of each participant. For example, if the first row of data from a participant happened on Saturday March 7 th 2020 and the requested segment duration is 7 days starting on every Sunday, the first segment to be considered would start on Sunday March 1 st if [INCLUDE_PAST_PERIODIC_SEGMENTS] is TRUE or on Sunday March 8 th if FALSE . The file pointed by [TIME_SEGMENTS][FILE] should have the following format and can have multiple rows. Column Description label A string that is used as a prefix in the name of your time segments. It has to be unique between rows start_time A string with format HH:MM:SS representing the starting time of this segment on any day length A string representing the length of this segment.It can have one or more of the following strings XXD XXH XXM XXS to represent days, hours, minutes and seconds. For example 7D 23H 59M 59S repeats_on One of the follow options every_day , wday , qday , mday , and yday . The last four represent a week, quarter, month and year day repeats_value An integer complementing repeats_on . If you set repeats_on to every_day set this to 0 , otherwise 1-7 represent a wday starting from Mondays, 1-31 represent a mday , 1-91 represent a qday , and 1-366 represent a yday Example label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 morning,06:00:00,5H 59M 59S,every_day,0 afternoon,12:00:00,5H 59M 59S,every_day,0 evening,18:00:00,5H 59M 59S,every_day,0 night,00:00:00,5H 59M 59S,every_day,0 This configuration will create five segments instances ( daily , morning , afternoon , evening , night ) on any given day ( every_day set to 0). The daily segment will start at midnight and will last 23:59:59 , the other four segments will start at 6am, 12pm, 6pm, and 12am respectively and last for 05:59:59 . Event segments These segments can be computed before or after an event of interest (defined as any UNIX timestamp). Their minimum duration is 1 minute but they can be as long as you want. The start of each segment can be shifted backwards or forwards from the specified timestamp. Set the following keys in your config.yaml . TIME_SEGMENTS : &time_segments TYPE : EVENT FILE : \"data/external/your_event_segments.csv\" INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE # or TRUE The file pointed by [TIME_SEGMENTS][FILE] should have the following format and can have multiple rows. Column Description label A string that is used as a prefix in the name of your time segments. If labels are unique, every segment is independent; if two or more segments have the same label, their data will be grouped when computing auxiliary data for features like the most frequent contact for calls (the most frequent contact will be computed across all these segments). There cannot be two overlaping event segments with the same label (RAPIDS will throw an error) event_timestamp A UNIX timestamp that represents the moment an event of interest happened (clinical relapse, survey, readmission, etc.). The corresponding time segment will be computed around this moment using length , shift , and shift_direction length A string representing the length of this segment. It can have one or more of the following keys XXD XXH XXM XXS to represent a number of days, hours, minutes, and seconds. For example 7D 23H 59M 59S shift A string representing the time shift from event_timestamp . It can have one or more of the following keys XXD XXH XXM XXS to represent a number of days, hours, minutes and seconds. For example 7D 23H 59M 59S . Use this value to change the start of a segment with respect to its event_timestamp . For example, set this variable to 1H to create a segment that starts 1 hour from an event of interest ( shift_direction determines if it\u2019s before or after). shift_direction An integer representing whether the shift is before ( -1 ) or after ( 1 ) an event_timestamp device_id The device id (smartphone or fitbit) to whom this segment belongs to. You have to create a line in this event segment file for each event of a participant that you want to analyse. If you have participants with multiple device ids you can choose any of them Example label,event_timestamp,length,shift,shift_direction,device_id stress1,1587661220000,1H,5M,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress2,1587747620000,4H,4H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress3,1587906020000,3H,5M,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress4,1584291600000,7H,4H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress5,1588172420000,9H,5M,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 mood,1587661220000,1H,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 mood,1587747620000,1D,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 mood,1587906020000,7D,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 This example will create eight segments for a single participant ( a748ee1a... ), five independent stressX segments with various lengths (1,4,3,7, and 9 hours). Segments stress1 , stress3 , and stress5 are shifted forwards by 5 minutes and stress2 and stress4 are shifted backwards by 4 hours (that is, if the stress4 event happened on March 15 th at 1pm EST ( 1584291600000 ), the time segment will start on that day at 9am and end at 4pm). The three mood segments are 1 hour, 1 day and 7 days long and have no shift. In addition, these mood segments are grouped together, meaning that although RAPIDS will compute features on each one of them, some necessary information to compute a few of such features will be extracted from all three segments, for example the phone contact that called a participant the most or the location clusters visited by a participant. Date time labels of event segments In the final feature file, you will find a row per event segment. The local_segment column of each row has a label , a start date-time string, and an end date-time string. weeklysurvey2060#2020-09-12 01 :00:00,2020-09-18 23 :59:59 All sensor data is always segmented based on timestamps, and the date-time strings are attached for informative purposes. For example, you can plot your features based on these strings. When you configure RAPIDS to work with a single time zone, such tz code will be used to convert start/end timestamps (the ones you typed in the event segments file) into start/end date-time strings. However, when you configure RAPIDS to work with multiple time zones, RAPIDS will use the most common time zone across all devices of every participant to do the conversion. The most common time zone is the one in which a participant spent the most time. In practical terms, this means that the date-time strings of event segments that happened in uncommon time zones will have shifted start/end date-time labels. However, the data within each segment was correctly filtered based on timestamps.","title":"Time Segments"},{"location":"setup/configuration/#segment-examples","text":"5-minutes Use the following Frequency segment file to create 288 (12 * 60 * 24) 5-minute segments starting from midnight of every day in your study label,length fiveminutes,5 Daily Use the following Periodic segment file to create daily segments starting from midnight of every day in your study label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 Morning Use the following Periodic segment file to create morning segments starting at 06:00:00 and ending at 11:59:59 of every day in your study label,start_time,length,repeats_on,repeats_value morning,06:00:00,5H 59M 59S,every_day,0 Overnight Use the following Periodic segment file to create overnight segments starting at 20:00:00 and ending at 07:59:59 (next day) of every day in your study label,start_time,length,repeats_on,repeats_value morning,20:00:00,11H 59M 59S,every_day,0 Weekly Use the following Periodic segment file to create non-overlapping weekly segments starting at midnight of every Monday in your study label,start_time,length,repeats_on,repeats_value weekly,00:00:00,6D 23H 59M 59S,wday,1 Use the following Periodic segment file to create overlapping weekly segments starting at midnight of every day in your study label,start_time,length,repeats_on,repeats_value weekly,00:00:00,6D 23H 59M 59S,every_day,0 Week-ends Use the following Periodic segment file to create week-end segments starting at midnight of every Saturday in your study label,start_time,length,repeats_on,repeats_value weekend,00:00:00,1D 23H 59M 59S,wday,6 Around surveys Use the following Event segment file to create two 2-hour segments that start 1 hour before surveys answered by 3 participants label,event_timestamp,length,shift,shift_direction,device_id survey1,1587661220000,2H,1H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 survey2,1587747620000,2H,1H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 survey1,1587906020000,2H,1H,-1,rqtertsd-43ff-34fr-3eeg-efe4fergregr survey2,1584291600000,2H,1H,-1,rqtertsd-43ff-34fr-3eeg-efe4fergregr survey1,1588172420000,2H,1H,-1,klj34oi2-8frk-2343-21kk-324ljklewlr3 survey2,1584291600000,2H,1H,-1,klj34oi2-8frk-2343-21kk-324ljklewlr3","title":"Segment Examples"},{"location":"setup/configuration/#timezone-of-your-study","text":"","title":"Timezone of your study"},{"location":"setup/configuration/#single-timezone","text":"If your study only happened in a single time zone or you want to ignore short trips of your participants to different time zones, select the appropriate code form this list and change the following config key. Double-check your timezone code pick, for example, US Eastern Time is America/New_York not EST TIMEZONE : TYPE : SINGLE TZCODE : America/New_York","title":"Single timezone"},{"location":"setup/configuration/#multiple-timezones","text":"If your participants lived in different time zones or they traveled across time zones, and you know when participants\u2019 devices were in a specific time zone, RAPIDS can use this data to process your data streams with the correct date-time. You need to provide RAPIDS with the time zone data in a CSV file ( [TZCODES_FILE] ) in the format described below. TIMEZONE : TYPE : MULTIPLE SINGLE : TZCODE : America/New_York MULTIPLE : TZCODES_FILE : path_to/time_zones_csv.file IF_MISSING_TZCODE : STOP DEFAULT_TZCODE : America/New_York FITBIT : ALLOW_MULTIPLE_TZ_PER_DEVICE : False INFER_FROM_SMARTPHONE_TZ : False Parameters for [TIMEZONE] Parameter Description [TYPE] Either SINGLE or MULTIPLE as explained above [SINGLE][TZCODE] The time zone code from this list to be used across all devices [MULTIPLE][TZCODES_FILE] A CSV file containing the time zones in which participants\u2019 devices sensed data (see the required format below). Multiple devices can be linked to the same person, read more in Participants Files [MULTIPLE][IF_MISSING_TZCODE] When a device is missing from [TZCODES_FILE] Set this flag to STOP to stop RAPIDS execution and show an error, or to USE_DEFAULT to assign the time zone specified in [DEFAULT_TZCODE] to any such devices [MULTIPLE][FITBIT][ALLOW_MULTIPLE_TZ_PER_DEVICE] You only need to care about this flag if one or more Fitbit devices sensed data in one or more time zones, and you want RAPIDS to take into account this in its feature computation. Read more in \u201cHow does RAPIDS handle Fitbit devices?\u201d below. [MULTIPLE][FITBIT][INFER_FROM_SMARTPHONE_TZ] You only need to care about this flag if one or more Fitbit devices sensed data in one or more time zones, and you want RAPIDS to take into account this in its feature computation. Read more in \u201cHow does RAPIDS handle Fitbit devices?\u201d below. Format of TZCODES_FILE TZCODES_FILE has three columns and a row for each time zone a device visited (a device can be a smartphone or wearable (Fitbit/Empatica)): Column Description device_id A string that uniquely identifies a smartphone or wearable tzcode A string with the appropriate code from this list that represents the time zone where the device sensed data timestamp A UNIX timestamp indicating when was the first time this device_id sensed data in tzcode device_id, tzcode, timestamp 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/New_York, 1587500000000 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/Mexico_City, 1587600000000 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/Los_Angeles, 1587700000000 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Amsterdam, 1587100000000 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Berlin, 1587200000000 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Amsterdam, 1587300000000 Using this file, RAPDIS will create time zone intervals per device, for example for 13dbc8a3-dae3-4834-823a-4bc96a7d459d : Interval 1 [1587500000000, 1587599999999] for America/New_York Interval 2 [1587600000000, 1587699999999] for America/Mexico_City Interval 3 [1587700000000, now] for America/Los_Angeles Any sensor data row from a device will be assigned a timezone if it falls within that interval, for example: A screen row sensed at 1587533333333 will be assigned to America/New_York because it falls within Interval 1 A screen row sensed at 1587400000000 will be discarded because it was logged outside any interval. Can I get the TZCODES_FILE from the time zone table collected automatically by the AWARE app? Sure. You can put your timezone table ( timezone.csv ) collected by the AWARE app under data/external folder and run: python tools/create_multi_timezones_file.py The TZCODES_FILE will be saved as data/external/multiple_timezones.csv file. What happens if participant X lives in Los Angeles but participant Y lives in Amsterdam and they both stayed there during my study? Add a row per participant and set timestamp to 0 : device_id, tzcode, timestamp 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/Los_Angeles, 0 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Amsterdam, 0 What happens if I forget to add a timezone for one or more devices? It depends on [IF_MISSING_TZCODE] . If [IF_MISSING_TZCODE] is set to STOP , RAPIDS will stop its execution and show you an error message. If [IF_MISSING_TZCODE] is set to USE_DEFAULT , it will assign the time zone specified in [DEFAULT_TZCODE] to any devices with missing time zone information in [TZCODES_FILE] . This is helpful if only a few of your participants had multiple timezones and you don\u2019t want to specify the same time zone for the rest. How does RAPIDS handle Fitbit devices? Fitbit devices are not time zone aware and they always log data with a local date-time string. When none of the Fitbit devices in your study changed time zones (e.g., p01 was always in New York and p02 was always in Amsterdam), you can set a single time zone per Fitbit device id along with a timestamp 0 (you can still assign multiple time zones to smartphone device ids) device_id, tzcode, timestamp fitbit123, America/New_York, 0 fitbit999, Europe/Amsterdam, 0 On the other hand, when at least one of your Fitbit devices changed time zones AND you want RAPIDS to take into account these changes, you need to set [ALLOW_MULTIPLE_TZ_PER_DEVICE] to True . You have to manually allow this option because you need to be aware it can produce inaccurate features around the times when time zones changed . This is because we cannot know exactly when the Fitbit device detected and processed the time zone change. If you want to ALLOW_MULTIPLE_TZ_PER_DEVICE you will need to add any time zone changes per device in the TZCODES_FILE as explained above. You could obtain this data by hand but if your participants also used a smartphone during your study, you can use their time zone logs. Recall that in RAPIDS every participant is represented with a participant file pXX.yaml , this file links together multiple devices and we will use it to know what smartphone time zone data should be applied to Fitbit devices. Thus set INFER_FROM_SMARTPHONE_TZ to TRUE , if you have included smartphone time zone data in your TZCODE_FILE and you want to make a participant\u2019s Fitbit data time zone aware with their respective smartphone data.","title":"Multiple timezones"},{"location":"setup/configuration/#data-stream-configuration","text":"Modify the following keys in your config.yaml depending on the data stream you want to process. Phone Set [PHONE_DATA_STREAMS][TYPE] to the smartphone data stream you want to process (e.g. aware_mysql ) and configure its parameters (e.g. [DATABASE_GROUP] ). Ignore the parameters of streams you are not using (e.g. [FOLDER] of aware_csv ). PHONE_DATA_STREAMS : USE : aware_mysql # AVAILABLE: aware_mysql : DATABASE_GROUP : MY_GROUP aware_csv : FOLDER : data/external/aware_csv aware_mysql Key Description [DATABASE_GROUP] A database credentials group. Read the instructions below to set it up Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine. aware_csv Key Description [FOLDER] Folder where you have to place a CSV file per phone sensor. Each file has to contain all the data from every participant you want to process. Fitbit Set [FITBIT_DATA_STREAMS][TYPE] to the Fitbit data stream you want to process (e.g. fitbitjson_mysql ) and configure its parameters (e.g. [DATABASE_GROUP] ). Ignore the parameters of the other streams you are not using (e.g. [FOLDER] of aware_csv ). Warning You will probably have to tell RAPIDS the name of the columns where you stored your Fitbit data. To do this, modify your chosen stream\u2019s format.yaml column mappings to match your raw data column names. FITBIT_DATA_STREAMS : USE : fitbitjson_mysql # AVAILABLE: fitbitjson_mysql : DATABASE_GROUP : MY_GROUP SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitjson_csv : FOLDER : data/external/fitbit_csv SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitparsed_mysql : DATABASE_GROUP : MY_GROUP SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitparsed_csv : FOLDER : data/external/fitbit_csv SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitjson_mysql This data stream process Fitbit data inside a JSON column as obtained from the Fitbit API and stored in a MySQL database. Read more about its column mappings and mutations in fitbitjson_mysql . Key Description [DATABASE_GROUP] A database credentials group. Read the instructions below to set it up [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine. fitbitjson_csv This data stream process Fitbit data inside a JSON column as obtained from the Fitbit API and stored in a CSV file. Read more about its column mappings and mutations in fitbitjson_csv . Key Description [FOLDER] Folder where you have to place a CSV file per Fitbit sensor. Each file has to contain all the data from every participant you want to process. [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). fitbitparsed_mysql This data stream process Fitbit data stored in multiple columns after being parsed from the JSON column returned by Fitbit API and stored in a MySQL database. Read more about its column mappings and mutations in fitbitparsed_mysql . Key Description [DATABASE_GROUP] A database credentials group. Read the instructions below to set it up [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine. fitbitparsed_csv This data stream process Fitbit data stored in multiple columns (plain text) after being parsed from the JSON column returned by Fitbit API and stored in a CSV file. Read more about its column mappings and mutations in fitbitparsed_csv . Key Description [FOLDER] Folder where you have to place a CSV file per Fitbit sensor. Each file has to contain all the data from every participant you want to process. [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). Empatica Set [USE] to the Empatica data stream you want to use; see the table in introduction to data streams . Configure any parameters as indicated below. EMPATICA_DATA_STREAMS : USE : empatica_zip # AVAILABLE: empatica_zip : FOLDER : data/external/empatica empatica_zip Key Description [FOLDER] The relative path to a folder containing one subfolder per participant. The name of a participant folder should match their device_id assigned in their participant file. Each participant folder can have one or more zip files with any name; in other words, the sensor data in those zip files belong to a single participant. The zip files are automatically generated by Empatica and have a CSV file per sensor ( ACC , HR , TEMP , EDA , BVP , TAGS ). All CSV files of the same type contained in one or more zip files are uncompressed, parsed, sorted by timestamp, and joined together. Example of an EMPATICA FOLDER In the file tree below, we want to process three participants\u2019 data: p01 , p02 , and p03 . p01 has two zip files, p02 has only one zip file, and p03 has three zip files. Each zip has a CSV file per sensor that are joined together and processed by RAPIDS. data/ # this folder exists in the root RAPIDS folder external/ empatica/ p01/ file1.zip file2.zip p02/ aaaa.zip p03/ t1.zip t2.zip t3.zip","title":"Data Stream Configuration"},{"location":"setup/configuration/#sensor-and-features-to-process","text":"Finally, you need to modify the config.yaml section of the sensors you want to extract behavioral features from. All sensors follow the same naming nomenclature ( DEVICE_SENSOR ) and parameter structure which we explain in the Behavioral Features Introduction . Done Head over to Execution to learn how to execute RAPIDS.","title":"Sensor and Features to Process"},{"location":"setup/execution/","text":"Execution \u00b6 After you have installed and configured RAPIDS, use the following command to execute it. ./rapids -j1 Ready to extract behavioral features If you are ready to extract features head over to the Behavioral Features Introduction We wrap Snakemake The script ./rapids is a wrapper around Snakemake so you can pass any parameters that Snakemake accepts (e.g. -j1 ). Updating RAPIDS output after modifying config.yaml Any changes to the config.yaml file will be applied automatically and only the relevant files will be updated. This means that after modifying the features list for PHONE_MESSAGE for example, RAPIDS will execute the script that computes MESSAGES features and update its output file. Multi-core You can run RAPIDS over multiple cores by modifying the -j argument (e.g. use -j8 to use 8 cores). However , take into account that this means multiple sensor datasets for different participants will be loaded in memory at the same time. If RAPIDS crashes because it ran out of memory, reduce the number of cores and try again. As reference, we have run RAPIDS over 12 cores and 32 Gb of RAM without problems for a study with 200 participants with 14 days of low-frequency smartphone data (no accelerometer, gyroscope, or magnetometer). Deleting RAPIDS output If you want to delete all the output files RAPIDS produces, you can execute the following command: ./rapids -j1 --delete-all-output Forcing a complete rerun or updating your raw data in RAPIDS If you want to update your raw data or rerun the whole pipeline from scratch, run the following commands: ./rapids -j1 --delete-all-output ./rapids -j1","title":"Execution"},{"location":"setup/execution/#execution","text":"After you have installed and configured RAPIDS, use the following command to execute it. ./rapids -j1 Ready to extract behavioral features If you are ready to extract features head over to the Behavioral Features Introduction We wrap Snakemake The script ./rapids is a wrapper around Snakemake so you can pass any parameters that Snakemake accepts (e.g. -j1 ). Updating RAPIDS output after modifying config.yaml Any changes to the config.yaml file will be applied automatically and only the relevant files will be updated. This means that after modifying the features list for PHONE_MESSAGE for example, RAPIDS will execute the script that computes MESSAGES features and update its output file. Multi-core You can run RAPIDS over multiple cores by modifying the -j argument (e.g. use -j8 to use 8 cores). However , take into account that this means multiple sensor datasets for different participants will be loaded in memory at the same time. If RAPIDS crashes because it ran out of memory, reduce the number of cores and try again. As reference, we have run RAPIDS over 12 cores and 32 Gb of RAM without problems for a study with 200 participants with 14 days of low-frequency smartphone data (no accelerometer, gyroscope, or magnetometer). Deleting RAPIDS output If you want to delete all the output files RAPIDS produces, you can execute the following command: ./rapids -j1 --delete-all-output Forcing a complete rerun or updating your raw data in RAPIDS If you want to update your raw data or rerun the whole pipeline from scratch, run the following commands: ./rapids -j1 --delete-all-output ./rapids -j1","title":"Execution"},{"location":"setup/installation/","text":"Installation \u00b6 You can install RAPIDS using Docker (the fastest), or native instructions for MacOS and Linux (Ubuntu). Windows is supported through Docker or WSL. Docker Install Docker Pull our RAPIDS container docker pull moshiresearch/rapids:latest Run RAPIDS' container (after this step is done you should see a prompt in the main RAPIDS folder with its python environment active) docker run -it moshiresearch/rapids:latest Pull the latest version of RAPIDS git pull Make RAPIDS script executable chmod +x rapids Check that RAPIDS is working ./rapids -j1 Optional . You can edit RAPIDS files with vim but we recommend using Visual Studio Code and its Remote Containers extension How to configure Remote Containers extension Make sure RAPIDS container is running Install the Remote - Containers extension Go to the Remote Explorer panel on the left hand sidebar On the top right dropdown menu choose Containers Double click on the moshiresearch/rapids container in the CONTAINERS tree A new VS Code session should open on RAPIDS main folder inside the container. Warning If you installed RAPIDS using Docker for Windows on Windows 10, the container will have limits on the amount of RAM it can use. If you find that RAPIDS crashes due to running out of memory, increase this limit. MacOS We tested these instructions in Catalina and Big Sur M1 Macs RAPIDS can run on M1 Macs, the only changes as of Feb 21, 2021 are: R needs to be installed via brew under Rosetta (x86 arch) due to incompatibility issues with some R libraries. To do this, run your terminal via Rosetta , then proceed with the usual brew installation command. Use x86 brew to install R and restore RAPIDS\u2019 packages ( snakemake -j1 renv_install & snakemake -j1 renv_restore ). There is a bug related to timezone codes. We set the correct TZ_DIR in renv/activate.R (line #19) Sys.setenv(\"TZDIR\" = file.path(R.home(), \"share\", \"zoneinfo\")) (RAPIDS does this automatically). Install brew Install MySQL brew install mysql brew services start mysql Install R 4.0, pandoc and rmarkdown. If you have other instances of R, we recommend uninstalling them brew install r brew install pandoc Rscript --vanilla -e 'install.packages(\"rmarkdown\", repos=\"http://cran.us.r-project.org\")' Install miniconda (restart your terminal afterwards) brew cask install miniconda conda init zsh # (or conda init bash) Clone our repo git clone https://github.com/carissalow/rapids Create a python virtual environment cd rapids conda env create -f environment.yml -n rapids conda activate rapids Install R packages and virtual environment: snakemake -j1 renv_install snakemake -j1 renv_restore Note This step could take several minutes to complete, especially if you have less than 3Gb of RAM or packages need to be compiled from source. Please be patient and let it run until completion. Make RAPIDS script executable chmod +x rapids Check that RAPIDS is working ./rapids -j1 Ubuntu We tested RAPIDS on Ubuntu 18.04 & 20.04. Note that the necessary Python and R packages are available in other Linux distributions, so if you decide to give it a try, let us know and we can update these docs. Install dependencies sudo apt install libcurl4-openssl-dev sudo apt install libssl-dev sudo apt install libxml2-dev sudo apt install libglpk40 Install MySQL sudo apt install libmysqlclient-dev sudo apt install mysql-server Add key for R\u2019s repository. sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 Add R\u2019s repository Ubuntu 18.04 Bionic sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' Ubuntu 20.04 Focal sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' Install R 4.0. If you have other instances of R, we recommend uninstalling them sudo apt update sudo apt install r-base Install Pandoc and rmarkdown sudo apt install pandoc Rscript --vanilla -e 'install.packages(\"rmarkdown\", repos=\"http://cran.us.r-project.org\")' Install git sudo apt install git Install miniconda Restart your current shell Clone our repo: git clone https://github.com/carissalow/rapids Create a python virtual environment: cd rapids conda env create -f environment.yml -n MY_ENV_NAME conda activate MY_ENV_NAME Install the R virtual environment management package (renv) snakemake -j1 renv_install Restore the R virtual environment Ubuntu 18.04 Bionic (fast) Run the following command to restore the R virtual environment using RSPM binaries R -e 'renv::restore(repos = c(CRAN = \"https://packagemanager.rstudio.com/all/__linux__/bionic/latest\"))' Ubuntu 20.04 Focal (fast) Run the following command to restore the R virtual environment using RSPM binaries R -e 'renv::restore(repos = c(CRAN = \"https://packagemanager.rstudio.com/all/__linux__/focal/latest\"))' Ubuntu (slow) If the fast installation command failed for some reason, you can restore the R virtual environment from source: R -e 'renv::restore()' Note This step could take several minutes to complete, especially if you have less than 3Gb of RAM or packages need to be compiled from source. Please be patient and let it run until completion. Make RAPIDS script executable chmod +x rapids Check that RAPIDS is working ./rapids -j1 Windows There are several options varying in complexity: You can use our Docker instructions (tested) You can use our Ubuntu 20.04 instructions on WSL2 (not tested but it will likely work) Native installation (experimental). If you would like to contribute to RAPIDS you could try to install MySQL, miniconda, Python, and R 4.0+ in Windows and restore the Python and R virtual environments using steps 6 and 7 of the instructions for Mac. You can get in touch if you would like to discuss this with the team.","title":"Installation"},{"location":"setup/installation/#installation","text":"You can install RAPIDS using Docker (the fastest), or native instructions for MacOS and Linux (Ubuntu). Windows is supported through Docker or WSL. Docker Install Docker Pull our RAPIDS container docker pull moshiresearch/rapids:latest Run RAPIDS' container (after this step is done you should see a prompt in the main RAPIDS folder with its python environment active) docker run -it moshiresearch/rapids:latest Pull the latest version of RAPIDS git pull Make RAPIDS script executable chmod +x rapids Check that RAPIDS is working ./rapids -j1 Optional . You can edit RAPIDS files with vim but we recommend using Visual Studio Code and its Remote Containers extension How to configure Remote Containers extension Make sure RAPIDS container is running Install the Remote - Containers extension Go to the Remote Explorer panel on the left hand sidebar On the top right dropdown menu choose Containers Double click on the moshiresearch/rapids container in the CONTAINERS tree A new VS Code session should open on RAPIDS main folder inside the container. Warning If you installed RAPIDS using Docker for Windows on Windows 10, the container will have limits on the amount of RAM it can use. If you find that RAPIDS crashes due to running out of memory, increase this limit. MacOS We tested these instructions in Catalina and Big Sur M1 Macs RAPIDS can run on M1 Macs, the only changes as of Feb 21, 2021 are: R needs to be installed via brew under Rosetta (x86 arch) due to incompatibility issues with some R libraries. To do this, run your terminal via Rosetta , then proceed with the usual brew installation command. Use x86 brew to install R and restore RAPIDS\u2019 packages ( snakemake -j1 renv_install & snakemake -j1 renv_restore ). There is a bug related to timezone codes. We set the correct TZ_DIR in renv/activate.R (line #19) Sys.setenv(\"TZDIR\" = file.path(R.home(), \"share\", \"zoneinfo\")) (RAPIDS does this automatically). Install brew Install MySQL brew install mysql brew services start mysql Install R 4.0, pandoc and rmarkdown. If you have other instances of R, we recommend uninstalling them brew install r brew install pandoc Rscript --vanilla -e 'install.packages(\"rmarkdown\", repos=\"http://cran.us.r-project.org\")' Install miniconda (restart your terminal afterwards) brew cask install miniconda conda init zsh # (or conda init bash) Clone our repo git clone https://github.com/carissalow/rapids Create a python virtual environment cd rapids conda env create -f environment.yml -n rapids conda activate rapids Install R packages and virtual environment: snakemake -j1 renv_install snakemake -j1 renv_restore Note This step could take several minutes to complete, especially if you have less than 3Gb of RAM or packages need to be compiled from source. Please be patient and let it run until completion. Make RAPIDS script executable chmod +x rapids Check that RAPIDS is working ./rapids -j1 Ubuntu We tested RAPIDS on Ubuntu 18.04 & 20.04. Note that the necessary Python and R packages are available in other Linux distributions, so if you decide to give it a try, let us know and we can update these docs. Install dependencies sudo apt install libcurl4-openssl-dev sudo apt install libssl-dev sudo apt install libxml2-dev sudo apt install libglpk40 Install MySQL sudo apt install libmysqlclient-dev sudo apt install mysql-server Add key for R\u2019s repository. sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 Add R\u2019s repository Ubuntu 18.04 Bionic sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' Ubuntu 20.04 Focal sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' Install R 4.0. If you have other instances of R, we recommend uninstalling them sudo apt update sudo apt install r-base Install Pandoc and rmarkdown sudo apt install pandoc Rscript --vanilla -e 'install.packages(\"rmarkdown\", repos=\"http://cran.us.r-project.org\")' Install git sudo apt install git Install miniconda Restart your current shell Clone our repo: git clone https://github.com/carissalow/rapids Create a python virtual environment: cd rapids conda env create -f environment.yml -n MY_ENV_NAME conda activate MY_ENV_NAME Install the R virtual environment management package (renv) snakemake -j1 renv_install Restore the R virtual environment Ubuntu 18.04 Bionic (fast) Run the following command to restore the R virtual environment using RSPM binaries R -e 'renv::restore(repos = c(CRAN = \"https://packagemanager.rstudio.com/all/__linux__/bionic/latest\"))' Ubuntu 20.04 Focal (fast) Run the following command to restore the R virtual environment using RSPM binaries R -e 'renv::restore(repos = c(CRAN = \"https://packagemanager.rstudio.com/all/__linux__/focal/latest\"))' Ubuntu (slow) If the fast installation command failed for some reason, you can restore the R virtual environment from source: R -e 'renv::restore()' Note This step could take several minutes to complete, especially if you have less than 3Gb of RAM or packages need to be compiled from source. Please be patient and let it run until completion. Make RAPIDS script executable chmod +x rapids Check that RAPIDS is working ./rapids -j1 Windows There are several options varying in complexity: You can use our Docker instructions (tested) You can use our Ubuntu 20.04 instructions on WSL2 (not tested but it will likely work) Native installation (experimental). If you would like to contribute to RAPIDS you could try to install MySQL, miniconda, Python, and R 4.0+ in Windows and restore the Python and R virtual environments using steps 6 and 7 of the instructions for Mac. You can get in touch if you would like to discuss this with the team.","title":"Installation"},{"location":"setup/overview/","text":"Overview \u00b6 Let\u2019s review some key concepts we use throughout these docs: Definition Description Data Stream Set of sensor data collected using a specific type of device with a specific format and stored in a specific container . For example, smartphone (device) data collected with the AWARE Framework (format) and stored in a MySQL database (container). Device A mobile or wearable device, like smartphones, Fitbit wrist bands, Oura Rings, etc. Sensor A physical or digital module builtin in a device that produces a data stream. For example, a smartphone\u2019s accelerometer or screen. Format A file in RAPIDS that describes how sensor data from a device matches RAPIDS data representation. Container An electronic repository of data, it can be a database, a file, a Web API, etc. RAPIDS connects to containers through container scripts. Participant A person that took part in a monitoring study Behavioral feature A metric computed from raw sensor data quantifying the behavior of a participant. For example, time spent at home computed from location data. These are also known as digital biomarkers Time segment Time segments (or epochs) are the time windows on which RAPIDS extracts behavioral features. For example, you might want to compute participants\u2019 time at home every morning or only during weekends. You define time segments in a CSV file that RAPIDS processes. Time zone A string code like America/New_York that represents a time zone where a device logged data. You can process data collected in single or multiple time zones. Provider A script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . config.yaml A YAML file where you can modify parameters to process data streams and behavioral features. This is the heart of RAPIDS and the file that you will modify the most. credentials.yaml A YAML file where you can define credential groups (user, password, host, etc.) if your data stream needs to connect to a database or Web API Participant file(s) A YAML file that links one or more smartphone or wearable devices that a single participant used. RAPIDS needs one file per participant. What can I do with RAPIDS? You can do one or more of these things with RAPIDS: Extract behavioral features from smartphone, Fitbit, and Empatica\u2019s supported data streams Add your own behavioral features (we can include them in RAPIDS if you want to share them with the community) Add support for new data streams if yours cannot be processed by RAPIDS yet Create visualizations for data quality control and feature inspection Extending RAPIDS to organize your analysis and publish a code repository along with your code Hint In order to follow any of the previous tutorials, you will have to Install , Configure , and learn how to Execute RAPIDS. We recommend you follow the Minimal Example tutorial to get familiar with RAPIDS Email us , leave a comment in these docs, create a Github issue or text us in Slack if you have any questions Frequently Asked Questions \u00b6 General \u00b6 What exactly is RAPIDS? RAPIDS is a group of configuration files and R and Python scripts that are executed by Snakemake . You can get a copy of RAPIDS by cloning our Github repository. RAPIDS is not a web application or server; all the processing is done in your laptop, server, or computer cluster. How does RAPIDS work? You will most of the time only have to modify configuration files in YAML format ( config.yaml , credentials.yaml , and participant files pxx.yaml ), and in CSV format (time zones and time segments). RAPIDS pulls data from different data containers and processes it in steps. The input/output of each step is saved as a CSV file for inspection; you can check the files that are created for each sensor on its documentation page. All data is stored in data/ , and all processing Python and R scripts are stored in src/ . User and File interactions in RAPIDS In the figure below, we represent the interactions between users and files. After a user modifies the configuration files mentioned above, the Snakefile file will search for and execute the Snakemake rules that contain the Python or R scripts necessary to generate or update the required output files (behavioral features, plots, etc.). Interaction diagram between the user, and important files in RAPIDS Data flow in RAPIDS In the figure below, we represent the flow of data in RAPIDS. In broad terms, smartphone and wearable devices log data streams with a certain format to a data container (database, file, etc.). RAPIDS can connect to these containers if it has a format.yaml and a container.[R|py] script used to pull the correct data and mutate it to comply with RAPIDS\u2019 internal data representation. Once the data stream is in RAPIDS, it goes through some basic transformations (scripts), one that assigns a time segment and a time zone to each data row, and another one that creates \u201cepisodes\u201d of data for some sensors that need it (like screen, battery, activity recognition, and sleep intraday data). After this, RAPIDS executes the requested PROVIDER script that computes behavioral features per time segment instance. After every feature is computed, they are joined per sensor, per participant, and study. Visualizations are built based on raw data or based on computed features. Data stream flow in RAPIDS Is my data private? Absolutely, you are processing your data with your own copy of RAPIDS in your laptop, server, or computer cluster, so neither we nor anyone else can have access to your datasets. Do I need to have coding skills to use RAPIDS? If you want to extract the behavioral features or visualizations that RAPIDS offers out of the box, the answer is no. However, you need to be comfortable running commands in your terminal and familiar with editing YAML files and CSV files. If you want to add support for new data streams or behavioral features, you need to be familiar with R or Python. Is RAPIDS open-source or free? Yes, RAPIDS is both open-source and free. How do I cite RAPIDS? Please refer to our Citation guide ; depending on what parts of RAPIDS you used, we also ask you to cite the work of other authors that shared their work. I have a lot of data, can RAPIDS handle it/ is RAPIDS fast enough? Yes, we use Snakemake under the hood, so you can automatically distribute RAPIDS execution over multiple cores or clusters . RAPIDS processes data per sensor and participant, so it can take advantage of this parallel processing. What are the advantages of using RAPIDS over implementing my own analysis code? We believe RAPIDS can benefit your analysis in several ways: RAPIDS has more than 250 behavioral features available, many of them tested and used by other researchers. RAPIDS can extract features in dynamic time segments (for example, every x minutes, x hours, x days, x weeks, x months, etc.). This is handy because you don\u2019t have to deal with time zones, day light saving changes, or date arithmetic. Your analysis is less prone to errors. Every participant sensor dataset is analyzed in the same way and isolated from each other. If you have lots of data, out-of-the-box parallel execution will speed up your analysis and if your computer crashes, RAPIDS will start from where it left of. You can publish your analysis code along with your papers and be sure it will run exactly as it does in your computer. You can still add your own behavioral features and data streams if you need to, and the community will be able to reuse your work. Data Streams \u00b6 Can I process smartphone data collected with Beiwe, PurpleRobot, or app X? Yes, but you need to add a new data stream to RAPIDS (a new format.yaml and container script in R or Python). Follow this tutorial . Email us , create a Github issue or text us in Slack if you have any questions. If you do so, let us know so we can integrate your work into RAPIDS. Can I process data from Oura Rings, Actigraphs, or wearable X? The only wearables we support at the moment are Empatica and Fitbit. However, get in touch if you need to process data from a different wearable. We have limited resources so we add support for different devices on an as-needed basis, but we would be happy to collaborate with you to add new wearables. Email us , create a Github issue or text us in Slack if you have any questions. Can I process smartphone or wearable data stored in PostgreSQL, Oracle, SQLite, CSV files, or data container X? Yes, but you need to add a new data stream to RAPIDS (a new format.yaml and container script in R or Python). Follow this tutorial . If you are processing data streams we already support like AWARE, Fitbit, or Empatica and are just connecting to a different container; you can reuse their format.yaml and only implement a new container script. Email us , create a Github issue or text us in Slack if you have any questions. If you do so, let us know so we can integrate your work into RAPIDS. I have participants that live in different time zones and some that travel; can RAPIDS handle this? Yes, RAPIDS can handle single or multiple timezones per participant. You can use time zone data collected by smartphones or collected by hand. Some of my participants used more than one device during my study; can RAPIDS handle this? Yes, you can link more than one smartphone or wearable device to a single participant. RAPIDS will merge them and sort them automatically. Some of my participants switched from Android to iOS or vice-versa during my study; can RAPIDS handle this? Yes, data from multiple smartphones can be linked to a single participant. All iOS data is converted to Android data before merging it. Extending RAPIDS \u00b6 Can I add my own behavioral features/digital biomarkers? Yes, you can implement your own features in R or Python following this tutorial Can I extract behavioral features based on two or more sensors? Yes, we do this for PHONE_DATA_YIELD (combines all phone sensors), PHONE_LOCATIONS (combines location and data yield data), PHONE_APPLICATIONS_BACKGROUND (combines screen and app usage data), and FITBIT_INTRADAY_STEPS (combines Fitbit and sleep and step data). However, we haven\u2019t come up with a user-friendly way to configure this, and currently, we join sensors on a case-by-case basis. This is mainly because not enough users have needed this functionality so far. Get in touch, and we can set it up together; the more use cases we are aware of, the easier it will be to integrate this into RAPIDS. I know how to program in Python or R but not both. Can I still use or extend RAPIDS? Yes, you don\u2019t need to write any code to use RAPIDS out of the box. If you need to add support for new data streams or behavioral features you can use scripts in either language. I have scripts that clean raw data from X sensor, can I use them with RAPIDS? Yes, you can add them as a [MUTATION][SCRIPT] in the format.yaml of the data stream you are using. You will add a main function that will receive a data frame with the raw data for that sensor that in turn will be used to compute behavioral features.","title":"Overview"},{"location":"setup/overview/#overview","text":"Let\u2019s review some key concepts we use throughout these docs: Definition Description Data Stream Set of sensor data collected using a specific type of device with a specific format and stored in a specific container . For example, smartphone (device) data collected with the AWARE Framework (format) and stored in a MySQL database (container). Device A mobile or wearable device, like smartphones, Fitbit wrist bands, Oura Rings, etc. Sensor A physical or digital module builtin in a device that produces a data stream. For example, a smartphone\u2019s accelerometer or screen. Format A file in RAPIDS that describes how sensor data from a device matches RAPIDS data representation. Container An electronic repository of data, it can be a database, a file, a Web API, etc. RAPIDS connects to containers through container scripts. Participant A person that took part in a monitoring study Behavioral feature A metric computed from raw sensor data quantifying the behavior of a participant. For example, time spent at home computed from location data. These are also known as digital biomarkers Time segment Time segments (or epochs) are the time windows on which RAPIDS extracts behavioral features. For example, you might want to compute participants\u2019 time at home every morning or only during weekends. You define time segments in a CSV file that RAPIDS processes. Time zone A string code like America/New_York that represents a time zone where a device logged data. You can process data collected in single or multiple time zones. Provider A script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . config.yaml A YAML file where you can modify parameters to process data streams and behavioral features. This is the heart of RAPIDS and the file that you will modify the most. credentials.yaml A YAML file where you can define credential groups (user, password, host, etc.) if your data stream needs to connect to a database or Web API Participant file(s) A YAML file that links one or more smartphone or wearable devices that a single participant used. RAPIDS needs one file per participant. What can I do with RAPIDS? You can do one or more of these things with RAPIDS: Extract behavioral features from smartphone, Fitbit, and Empatica\u2019s supported data streams Add your own behavioral features (we can include them in RAPIDS if you want to share them with the community) Add support for new data streams if yours cannot be processed by RAPIDS yet Create visualizations for data quality control and feature inspection Extending RAPIDS to organize your analysis and publish a code repository along with your code Hint In order to follow any of the previous tutorials, you will have to Install , Configure , and learn how to Execute RAPIDS. We recommend you follow the Minimal Example tutorial to get familiar with RAPIDS Email us , leave a comment in these docs, create a Github issue or text us in Slack if you have any questions","title":"Overview"},{"location":"setup/overview/#frequently-asked-questions","text":"","title":"Frequently Asked Questions"},{"location":"setup/overview/#general","text":"What exactly is RAPIDS? RAPIDS is a group of configuration files and R and Python scripts that are executed by Snakemake . You can get a copy of RAPIDS by cloning our Github repository. RAPIDS is not a web application or server; all the processing is done in your laptop, server, or computer cluster. How does RAPIDS work? You will most of the time only have to modify configuration files in YAML format ( config.yaml , credentials.yaml , and participant files pxx.yaml ), and in CSV format (time zones and time segments). RAPIDS pulls data from different data containers and processes it in steps. The input/output of each step is saved as a CSV file for inspection; you can check the files that are created for each sensor on its documentation page. All data is stored in data/ , and all processing Python and R scripts are stored in src/ . User and File interactions in RAPIDS In the figure below, we represent the interactions between users and files. After a user modifies the configuration files mentioned above, the Snakefile file will search for and execute the Snakemake rules that contain the Python or R scripts necessary to generate or update the required output files (behavioral features, plots, etc.). Interaction diagram between the user, and important files in RAPIDS Data flow in RAPIDS In the figure below, we represent the flow of data in RAPIDS. In broad terms, smartphone and wearable devices log data streams with a certain format to a data container (database, file, etc.). RAPIDS can connect to these containers if it has a format.yaml and a container.[R|py] script used to pull the correct data and mutate it to comply with RAPIDS\u2019 internal data representation. Once the data stream is in RAPIDS, it goes through some basic transformations (scripts), one that assigns a time segment and a time zone to each data row, and another one that creates \u201cepisodes\u201d of data for some sensors that need it (like screen, battery, activity recognition, and sleep intraday data). After this, RAPIDS executes the requested PROVIDER script that computes behavioral features per time segment instance. After every feature is computed, they are joined per sensor, per participant, and study. Visualizations are built based on raw data or based on computed features. Data stream flow in RAPIDS Is my data private? Absolutely, you are processing your data with your own copy of RAPIDS in your laptop, server, or computer cluster, so neither we nor anyone else can have access to your datasets. Do I need to have coding skills to use RAPIDS? If you want to extract the behavioral features or visualizations that RAPIDS offers out of the box, the answer is no. However, you need to be comfortable running commands in your terminal and familiar with editing YAML files and CSV files. If you want to add support for new data streams or behavioral features, you need to be familiar with R or Python. Is RAPIDS open-source or free? Yes, RAPIDS is both open-source and free. How do I cite RAPIDS? Please refer to our Citation guide ; depending on what parts of RAPIDS you used, we also ask you to cite the work of other authors that shared their work. I have a lot of data, can RAPIDS handle it/ is RAPIDS fast enough? Yes, we use Snakemake under the hood, so you can automatically distribute RAPIDS execution over multiple cores or clusters . RAPIDS processes data per sensor and participant, so it can take advantage of this parallel processing. What are the advantages of using RAPIDS over implementing my own analysis code? We believe RAPIDS can benefit your analysis in several ways: RAPIDS has more than 250 behavioral features available, many of them tested and used by other researchers. RAPIDS can extract features in dynamic time segments (for example, every x minutes, x hours, x days, x weeks, x months, etc.). This is handy because you don\u2019t have to deal with time zones, day light saving changes, or date arithmetic. Your analysis is less prone to errors. Every participant sensor dataset is analyzed in the same way and isolated from each other. If you have lots of data, out-of-the-box parallel execution will speed up your analysis and if your computer crashes, RAPIDS will start from where it left of. You can publish your analysis code along with your papers and be sure it will run exactly as it does in your computer. You can still add your own behavioral features and data streams if you need to, and the community will be able to reuse your work.","title":"General"},{"location":"setup/overview/#data-streams","text":"Can I process smartphone data collected with Beiwe, PurpleRobot, or app X? Yes, but you need to add a new data stream to RAPIDS (a new format.yaml and container script in R or Python). Follow this tutorial . Email us , create a Github issue or text us in Slack if you have any questions. If you do so, let us know so we can integrate your work into RAPIDS. Can I process data from Oura Rings, Actigraphs, or wearable X? The only wearables we support at the moment are Empatica and Fitbit. However, get in touch if you need to process data from a different wearable. We have limited resources so we add support for different devices on an as-needed basis, but we would be happy to collaborate with you to add new wearables. Email us , create a Github issue or text us in Slack if you have any questions. Can I process smartphone or wearable data stored in PostgreSQL, Oracle, SQLite, CSV files, or data container X? Yes, but you need to add a new data stream to RAPIDS (a new format.yaml and container script in R or Python). Follow this tutorial . If you are processing data streams we already support like AWARE, Fitbit, or Empatica and are just connecting to a different container; you can reuse their format.yaml and only implement a new container script. Email us , create a Github issue or text us in Slack if you have any questions. If you do so, let us know so we can integrate your work into RAPIDS. I have participants that live in different time zones and some that travel; can RAPIDS handle this? Yes, RAPIDS can handle single or multiple timezones per participant. You can use time zone data collected by smartphones or collected by hand. Some of my participants used more than one device during my study; can RAPIDS handle this? Yes, you can link more than one smartphone or wearable device to a single participant. RAPIDS will merge them and sort them automatically. Some of my participants switched from Android to iOS or vice-versa during my study; can RAPIDS handle this? Yes, data from multiple smartphones can be linked to a single participant. All iOS data is converted to Android data before merging it.","title":"Data Streams"},{"location":"setup/overview/#extending-rapids","text":"Can I add my own behavioral features/digital biomarkers? Yes, you can implement your own features in R or Python following this tutorial Can I extract behavioral features based on two or more sensors? Yes, we do this for PHONE_DATA_YIELD (combines all phone sensors), PHONE_LOCATIONS (combines location and data yield data), PHONE_APPLICATIONS_BACKGROUND (combines screen and app usage data), and FITBIT_INTRADAY_STEPS (combines Fitbit and sleep and step data). However, we haven\u2019t come up with a user-friendly way to configure this, and currently, we join sensors on a case-by-case basis. This is mainly because not enough users have needed this functionality so far. Get in touch, and we can set it up together; the more use cases we are aware of, the easier it will be to integrate this into RAPIDS. I know how to program in Python or R but not both. Can I still use or extend RAPIDS? Yes, you don\u2019t need to write any code to use RAPIDS out of the box. If you need to add support for new data streams or behavioral features you can use scripts in either language. I have scripts that clean raw data from X sensor, can I use them with RAPIDS? Yes, you can add them as a [MUTATION][SCRIPT] in the format.yaml of the data stream you are using. You will add a main function that will receive a data frame with the raw data for that sensor that in turn will be used to compute behavioral features.","title":"Extending RAPIDS"},{"location":"snippets/aware_format/","text":"If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"Aware format"},{"location":"snippets/database/","text":"Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine.","title":"Database"},{"location":"snippets/feature_introduction_example/","text":"Sensor section Each sensor (accelerometer, screen, etc.) of every supported device (smartphone, Fitbit, etc.) has a section in the config.yaml with parameters and feature PROVIDERS . Sensor Parameters. Each sensor section has one or more parameters. These are parameters that affect different aspects of how the raw data is pulled, and processed. The CONTAINER parameter exists for every sensor, but some sensors will have extra parameters like [PHONE_LOCATIONS] . We explain these parameters in a table at the top of each sensor documentation page. Sensor Providers Each object in this list represents a feature PROVIDER . Each sensor can have zero, one, or more providers. A PROVIDER is a script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . In this example, there are two accelerometer feature providers RAPIDS and PANDA . PROVIDER Parameters Each PROVIDER has parameters that affect the computation of the behavioral features it offers. These parameters include at least a [COMPUTE] flag that you switch to True to extract a provider\u2019s behavioral features. We explain every provider\u2019s parameter in a table under the Parameters description heading on each provider documentation page. PROVIDER Features Each PROVIDER offers a set of behavioral features. These features are grouped in an array for some providers, like those for RAPIDS provider. For others, they are grouped in a collection of arrays, like those for PANDAS provider. In either case, you can delete the features you are not interested in, and they will not be included in the sensor\u2019s output feature file. We explain each behavioral feature in a table under the Features description heading on each provider documentation page. PROVIDER script Each PROVIDER has a SRC_SCRIPT that points to the script implementing its behavioral features. It has to be a relative path from RAPIDS\u2019 root folder and the script\u2019s parent folder should be named after the provider, e.g. panda .","title":"Feature introduction example"},{"location":"snippets/jsonfitbit_format/","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitjson_csv/format.yaml If you want RAPIDS to process Fitbit sensor data using this stream, you will need to map DEVICE_ID and JSON_FITBIT_COLUMN to your own raw data columns inside each sensor section in format.yaml . FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESOUTOFRANGE FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESFATBURN FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESCARDIO FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESPEAK FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE FLAG_TO_MUTATE HEARTRATE_ZONE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE LOCAL_START_DATE_TIME FLAG_TO_MUTATE LOCAL_END_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id EFFICIENCY FLAG_TO_MUTATE MINUTES_AFTER_WAKEUP FLAG_TO_MUTATE MINUTES_ASLEEP FLAG_TO_MUTATE MINUTES_AWAKE FLAG_TO_MUTATE MINUTES_TO_FALL_ASLEEP FLAG_TO_MUTATE MINUTES_IN_BED FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_summary_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id TYPE_EPISODE_ID FLAG_TO_MUTATE DURATION FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE LEVEL FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2, we support both. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_summary_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_intraday_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API . See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}}","title":"Jsonfitbit format"},{"location":"snippets/parsedfitbit_format/","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitparsed_mysql/format.yaml If you want to use this stream with your data, modify every sensor in format.yaml to map all columns except TIMESTAMP in [RAPIDS_COLUMN_MAPPINGS] to your raw data column names. All columns are mandatory; however, all except device_id and local_date_time can be empty if you don\u2019t have that data. Just have in mind that some features will be empty if some of these columns are empty. FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR heartrate_daily_restinghr HEARTRATE_DAILY_CALORIESOUTOFRANGE heartrate_daily_caloriesoutofrange HEARTRATE_DAILY_CALORIESFATBURN heartrate_daily_caloriesfatburn HEARTRATE_DAILY_CALORIESCARDIO heartrate_daily_caloriescardio HEARTRATE_DAILY_CALORIESPEAK heartrate_daily_caloriespeak MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate_daily_restinghr heartrate_daily_caloriesoutofrange heartrate_daily_caloriesfatburn heartrate_daily_caloriescardio heartrate_daily_caloriespeak a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 72 1200.6102 760.3020 15.2048 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 70 1100.1120 660.0012 23.7088 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 69 750.3615 734.1516 131.8579 0 FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE heartrate HEARTRATE_ZONE heartrate_zone MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate heartrate_zone a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 68 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 67 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 67 outofrange FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time LOCAL_START_DATE_TIME local_start_date_time LOCAL_END_DATE_TIME local_end_date_time DEVICE_ID device_id EFFICIENCY efficiency MINUTES_AFTER_WAKEUP minutes_after_wakeup MINUTES_ASLEEP minutes_asleep MINUTES_AWAKE minutes_awake MINUTES_TO_FALL_ASLEEP minutes_to_fall_asleep MINUTES_IN_BED minutes_in_bed IS_MAIN_SLEEP is_main_sleep TYPE type MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. Example of the expected raw data device_id local_start_date_time local_end_date_time efficiency minutes_after_wakeup minutes_asleep minutes_awake minutes_to_fall_asleep minutes_in_bed is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 15:36:30 2020-10-10 16:37:00 92 0 55 5 0 60 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 01:46:30 2020-10-10 08:10:00 88 0 318 65 0 383 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-11 00:12:30 2020-10-11 11:47:00 89 1 562 132 0 694 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-12 01:31:00 2020-10-12 09:34:30 93 0 415 68 0 483 1 stages FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id TYPE_EPISODE_ID type_episode_id DURATION duration IS_MAIN_SLEEP is_main_sleep TYPE type LEVEL level MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2, we support both. Example of the expected raw data device_id type_episode_id local_date_time duration level is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:36:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:37:30 660 asleep 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:48:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u2026 \u2026 \u2026 \u2026 \u2026 \u2026 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:46:30 420 light 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:53:30 1230 deep 1 stages FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 1775 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 3201 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 998 FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 5 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 3 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 0","title":"Parsedfitbit format"},{"location":"visualizations/data-quality-visualizations/","text":"Data Quality Visualizations \u00b6 We showcase these visualizations with a test study that collected 14 days of smartphone and Fitbit data from two participants (example01 and example02) and extracted behavioral features within five time segments (daily, morning, afternoon, evening, and night). Note Time segments (e.g. daily , morning , etc.) can have multiple instances (day 1, day 2, or morning 1, morning 2, etc.) 1. Histograms of phone data yield \u00b6 RAPIDS provides two histograms that show the number of time segment instances that had a certain ratio of valid yielded minutes and hours , respectively. A valid yielded minute has at least 1 row of data from any smartphone sensor and a valid yielded hour contains at least M valid minutes. These plots can be used as a rough indication of the smartphone monitoring coverage during a study aggregated across all participants. For example, the figure below shows a valid yielded minutes histogram for daily segments and we can infer that the monitoring coverage was very good since almost all segments contain at least 90 to 100% of the expected sensed minutes. Example Click here to see an example of these interactive visualizations in HTML format Histogram of the data yielded minute ratio for a single participant during five time segments (daily, morning, afternoon, evening, and night) 2. Heatmaps of overall data yield \u00b6 These heatmaps are a break down per time segment and per participant of Visualization 1 . Heatmap\u2019s rows represent participants, columns represent time segment instances and the cells\u2019 color represent the valid yielded minute or hour ratio for a participant during a time segment instance. As different participants might join a study on different dates and time segments can be of any length and start on any day, the x-axis can be labelled with the absolute time of the start of each time segment instance or the time delta between the start of each time segment instance minus the start of the first instance. These plots provide a quick study overview of the monitoring coverage per person and per time segment. The figure below shows the heatmap of the valid yielded minute ratio for participants example01 and example02 on daily segments and, as we inferred from the previous histogram, the lighter (yellow) color on most time segment instances (cells) indicate both phones sensed data without interruptions for most days (except for the first and last ones). [ABSOLUTE_TIME] Example Click here to see an example of these interactive visualizations in HTML format Overall compliance heatmap for all participants [RELATIVE_TIME] Example Click here to see an example of these interactive visualizations in HTML format Overall compliance heatmap for all participants 3. Heatmap of recorded phone sensors \u00b6 In these heatmaps rows represent time segment instances, columns represent minutes since the start of a time segment instance, and cells\u2019 color shows the number of phone sensors that logged at least one row of data during those 1-minute windows. RAPIDS creates a plot per participant and per time segment and can be used as a rough indication of whether time-based sensors were following their sensing schedule (e.g. if location was being sensed every 2 minutes). The figure below shows this heatmap for phone sensors collected by participant example01 in daily time segments from Apr 23 rd 2020 to May 4 th 2020. We can infer that for most of the monitoring time, the participant\u2019s phone logged data from at least 7 sensors each minute. Example Click here to see an example of these interactive visualizations in HTML format Heatmap of the recorded phone sensors per minute and per time segment of a single participant 4. Heatmap of sensor row count \u00b6 These heatmaps are a per-sensor breakdown of Visualization 1 and Visualization 2 . Note that the second row (ratio of valid yielded minutes) of this heatmap matches the respective participant (bottom) row the screenshot in Visualization 2. In these heatmaps rows represent phone or Fitbit sensors, columns represent time segment instances and cell\u2019s color shows the normalized (0 to 1) row count of each sensor within a time segment instance. RAPIDS creates one heatmap per participant and they can be used to judge missing data on a per participant and per sensor basis. The figure below shows data for 14 phone sensors (including data yield) of example01\u2019s daily segments. From the top two rows, we can see that the phone was sensing data for most of the monitoring period (as suggested by Figure 3 and Figure 4). We can also infer how phone usage influenced the different sensor streams; there are peaks of screen events during the first day (Apr 23 rd ), peaks of location coordinates on Apr 26 th and Apr 30 th , and no sent or received SMS except for Apr 23 rd , Apr 29 th and Apr 30 th (unlabeled row between screen and locations). Example Click here to see an example of these interactive visualizations in HTML format Heatmap of the sensor row count per time segment of a single participant","title":"Data Quality"},{"location":"visualizations/data-quality-visualizations/#data-quality-visualizations","text":"We showcase these visualizations with a test study that collected 14 days of smartphone and Fitbit data from two participants (example01 and example02) and extracted behavioral features within five time segments (daily, morning, afternoon, evening, and night). Note Time segments (e.g. daily , morning , etc.) can have multiple instances (day 1, day 2, or morning 1, morning 2, etc.)","title":"Data Quality Visualizations"},{"location":"visualizations/data-quality-visualizations/#1-histograms-of-phone-data-yield","text":"RAPIDS provides two histograms that show the number of time segment instances that had a certain ratio of valid yielded minutes and hours , respectively. A valid yielded minute has at least 1 row of data from any smartphone sensor and a valid yielded hour contains at least M valid minutes. These plots can be used as a rough indication of the smartphone monitoring coverage during a study aggregated across all participants. For example, the figure below shows a valid yielded minutes histogram for daily segments and we can infer that the monitoring coverage was very good since almost all segments contain at least 90 to 100% of the expected sensed minutes. Example Click here to see an example of these interactive visualizations in HTML format Histogram of the data yielded minute ratio for a single participant during five time segments (daily, morning, afternoon, evening, and night)","title":"1. Histograms of phone data yield"},{"location":"visualizations/data-quality-visualizations/#2-heatmaps-of-overall-data-yield","text":"These heatmaps are a break down per time segment and per participant of Visualization 1 . Heatmap\u2019s rows represent participants, columns represent time segment instances and the cells\u2019 color represent the valid yielded minute or hour ratio for a participant during a time segment instance. As different participants might join a study on different dates and time segments can be of any length and start on any day, the x-axis can be labelled with the absolute time of the start of each time segment instance or the time delta between the start of each time segment instance minus the start of the first instance. These plots provide a quick study overview of the monitoring coverage per person and per time segment. The figure below shows the heatmap of the valid yielded minute ratio for participants example01 and example02 on daily segments and, as we inferred from the previous histogram, the lighter (yellow) color on most time segment instances (cells) indicate both phones sensed data without interruptions for most days (except for the first and last ones). [ABSOLUTE_TIME] Example Click here to see an example of these interactive visualizations in HTML format Overall compliance heatmap for all participants [RELATIVE_TIME] Example Click here to see an example of these interactive visualizations in HTML format Overall compliance heatmap for all participants","title":"2. Heatmaps of overall data yield"},{"location":"visualizations/data-quality-visualizations/#3-heatmap-of-recorded-phone-sensors","text":"In these heatmaps rows represent time segment instances, columns represent minutes since the start of a time segment instance, and cells\u2019 color shows the number of phone sensors that logged at least one row of data during those 1-minute windows. RAPIDS creates a plot per participant and per time segment and can be used as a rough indication of whether time-based sensors were following their sensing schedule (e.g. if location was being sensed every 2 minutes). The figure below shows this heatmap for phone sensors collected by participant example01 in daily time segments from Apr 23 rd 2020 to May 4 th 2020. We can infer that for most of the monitoring time, the participant\u2019s phone logged data from at least 7 sensors each minute. Example Click here to see an example of these interactive visualizations in HTML format Heatmap of the recorded phone sensors per minute and per time segment of a single participant","title":"3. Heatmap of recorded phone sensors"},{"location":"visualizations/data-quality-visualizations/#4-heatmap-of-sensor-row-count","text":"These heatmaps are a per-sensor breakdown of Visualization 1 and Visualization 2 . Note that the second row (ratio of valid yielded minutes) of this heatmap matches the respective participant (bottom) row the screenshot in Visualization 2. In these heatmaps rows represent phone or Fitbit sensors, columns represent time segment instances and cell\u2019s color shows the normalized (0 to 1) row count of each sensor within a time segment instance. RAPIDS creates one heatmap per participant and they can be used to judge missing data on a per participant and per sensor basis. The figure below shows data for 14 phone sensors (including data yield) of example01\u2019s daily segments. From the top two rows, we can see that the phone was sensing data for most of the monitoring period (as suggested by Figure 3 and Figure 4). We can also infer how phone usage influenced the different sensor streams; there are peaks of screen events during the first day (Apr 23 rd ), peaks of location coordinates on Apr 26 th and Apr 30 th , and no sent or received SMS except for Apr 23 rd , Apr 29 th and Apr 30 th (unlabeled row between screen and locations). Example Click here to see an example of these interactive visualizations in HTML format Heatmap of the sensor row count per time segment of a single participant","title":"4. Heatmap of sensor row count"},{"location":"visualizations/feature-visualizations/","text":"Feature Visualizations \u00b6 1. Heatmap Correlation Matrix \u00b6 Columns and rows are the behavioral features computed in RAPIDS, cells\u2019 color represents the correlation coefficient between all days of data for every pair of features of all participants. The user can specify a minimum number of observations ( time segment instances) required to compute the correlation between two features using the MIN_ROWS_RATIO parameter (0.5 by default) and the correlation method (Pearson, Spearman or Kendall) with the CORR_METHOD parameter. In addition, this plot can be configured to only display correlation coefficients above a threshold using the CORR_THRESHOLD parameter (0.1 by default). Example Click here to see an example of these interactive visualizations in HTML format Correlation matrix heatmap for all the features of all participants","title":"Features"},{"location":"visualizations/feature-visualizations/#feature-visualizations","text":"","title":"Feature Visualizations"},{"location":"visualizations/feature-visualizations/#1-heatmap-correlation-matrix","text":"Columns and rows are the behavioral features computed in RAPIDS, cells\u2019 color represents the correlation coefficient between all days of data for every pair of features of all participants. The user can specify a minimum number of observations ( time segment instances) required to compute the correlation between two features using the MIN_ROWS_RATIO parameter (0.5 by default) and the correlation method (Pearson, Spearman or Kendall) with the CORR_METHOD parameter. In addition, this plot can be configured to only display correlation coefficients above a threshold using the CORR_THRESHOLD parameter (0.1 by default). Example Click here to see an example of these interactive visualizations in HTML format Correlation matrix heatmap for all the features of all participants","title":"1. Heatmap Correlation Matrix"},{"location":"workflow-examples/analysis/","text":"Analysis Workflow Example \u00b6 TL;DR In addition to using RAPIDS to extract behavioral features and create plots, you can structure your data analysis within RAPIDS (i.e. cleaning your features and creating ML/statistical models) We include an analysis example in RAPIDS that covers raw data processing, cleaning, feature extraction, machine learning modeling, and evaluation Use this example as a guide to structure your own analysis within RAPIDS RAPIDS analysis workflows are compatible with your favorite data science tools and libraries RAPIDS analysis workflows are reproducible and we encourage you to publish them along with your research papers Why should I integrate my analysis in RAPIDS? \u00b6 Even though the bulk of RAPIDS current functionality is related to the computation of behavioral features, we recommend RAPIDS as a complementary tool to create a mobile data analysis workflow. This is because the cookiecutter data science file organization guidelines, the use of Snakemake, the provided behavioral features, and the reproducible R and Python development environments allow researchers to divide an analysis workflow into small parts that can be audited, shared in an online repository, reproduced in other computers, and understood by other people as they follow a familiar and consistent structure. We believe these advantages outweigh the time needed to learn how to create these workflows in RAPIDS. We clarify that to create analysis workflows in RAPIDS, researchers can still use any data manipulation tools, editors, libraries or languages they are already familiar with. RAPIDS is meant to be the final destination of analysis code that was developed in interactive notebooks or stand-alone scripts. For example, a user can compute call and location features using RAPIDS, then, they can use Jupyter notebooks to explore feature cleaning approaches and once the cleaning code is final, it can be moved to RAPIDS as a new step in the pipeline. In turn, the output of this cleaning step can be used to explore machine learning models and once a model is finished, it can also be transferred to RAPIDS as a step of its own. The idea is that when it is time to publish a piece of research, a RAPIDS workflow can be shared in a public repository as is. In the following sections we share an example of how we structured an analysis workflow in RAPIDS. Analysis workflow structure \u00b6 To accurately reflect the complexity of a real-world modeling scenario, we decided not to oversimplify this example. Importantly, every step in this example follows a basic structure: an input file and parameters are manipulated by an R or Python script that saves the results to an output file. Input files, parameters, output files and scripts are grouped into Snakemake rules that are described on smk files in the rules folder (we point the reader to the relevant rule(s) of each step). Researchers can use these rules and scripts as a guide to create their own as it is expected every modeling project will have different requirements, data and goals but ultimately most follow a similar chainned pattern. Hint The example\u2019s config file is example_profile/example_config.yaml and its Snakefile is in example_profile/Snakefile . The config file is already configured to process the sensor data as explained in Analysis workflow modules . Description of the study modeled in our analysis workflow example \u00b6 Our example is based on a hypothetical study that recruited 2 participants that underwent surgery and collected mobile data for at least one week before and one week after the procedure. Participants wore a Fitbit device and installed the AWARE client in their personal Android and iOS smartphones to collect mobile data 24/7. In addition, participants completed daily severity ratings of 12 common symptoms on a scale from 0 to 10 that we summed up into a daily symptom burden score. The goal of this workflow is to find out if we can predict the daily symptom burden score of a participant. Thus, we framed this question as a binary classification problem with two classes, high and low symptom burden based on the scores above and below average of each participant. We also want to compare the performance of individual (personalized) models vs a population model. In total, our example workflow has nine steps that are in charge of sensor data preprocessing, feature extraction, feature cleaning, machine learning model training and model evaluation (see figure below). We ship this workflow with RAPIDS and share files with test data in an Open Science Framework repository. Modules of RAPIDS example workflow, from raw data to model evaluation Configure and run the analysis workflow example \u00b6 Install RAPIDS Unzip the CSV files inside rapids_example_csv.zip in data/external/example_workflow/*.csv . Create the participant files for this example by running: ./rapids -j1 create_example_participant_files Run the example pipeline with: ./rapids -j1 --profile example_profile Note you will see a lot of warning messages, you can ignore them since they happen because we ran ML algorithms with a small fake dataset. Modules of our analysis workflow example \u00b6 1. Feature extraction We extract daily behavioral features for data yield, received and sent messages, missed, incoming and outgoing calls, resample fused location data using Doryab provider, activity recognition, battery, Bluetooth, screen, light, applications foreground, conversations, Wi-Fi connected, Wi-Fi visible, Fitbit heart rate summary and intraday data, Fitbit sleep summary data, and Fitbit step summary and intraday data without excluding sleep periods with an active bout threshold of 10 steps. In total, we obtained 237 daily sensor features over 12 days per participant. 2. Extract demographic data. It is common to have demographic data in addition to mobile and target (ground truth) data. In this example we include participants\u2019 age, gender and the number of days they spent in hospital after their surgery as features in our model. We extract these three columns from the data/external/example_workflow/participant_info.csv file. As these three features remain the same within participants, they are used only on the population model. Refer to the demographic_features rule in rules/models.smk . 3. Create target labels. The two classes for our machine learning binary classification problem are high and low symptom burden. Target values are already stored in the data/external/example_workflow/participant_target.csv file. A new rule/script can be created if further manipulation is necessary. Refer to the parse_targets rule in rules/models.smk . 4. Feature merging. These daily features are stored on a CSV file per sensor, a CSV file per participant, and a CSV file including all features from all participants (in every case each column represents a feature and each row represents a day). Refer to the merge_sensor_features_for_individual_participants and merge_sensor_features_for_all_participants rules in rules/features.smk . 5. Data visualization. At this point the user can use the five plots RAPIDS provides (or implement new ones) to explore and understand the quality of the raw data and extracted features and decide what sensors, days, or participants to include and exclude. Refer to rules/reports.smk to find the rules that generate these plots. 6. Feature cleaning. In this stage we perform four steps to clean our sensor feature file. First, we discard days with a data yield hour ratio less than or equal to 0.75, i.e. we include days with at least 18 hours of data. Second, we drop columns (features) with more than 30% of missing rows. Third, we drop columns with zero variance. Fourth, we drop rows (days) with more than 30% of missing columns (features). In this cleaning stage several parameters are created and exposed in example_profile/example_config.yaml . After this step, we kept 161 features over 11 days for the individual model of p01, 101 features over 12 days for the individual model of p02 and 107 features over 20 days for the population model. Note that the difference in the number of features between p01 and p02 is mostly due to iOS restrictions that stops researchers from collecting the same number of sensors than in Android phones. Feature cleaning for the individual models is done in the clean_sensor_features_for_individual_participants rule and for the population model in the clean_sensor_features_for_all_participants rule in rules/models.smk . 7. Merge features and targets. In this step we merge the cleaned features and target labels for our individual models in the merge_features_and_targets_for_individual_model rule in rules/models.smk . Additionally, we merge the cleaned features, target labels, and demographic features of our two participants for the population model in the merge_features_and_targets_for_population_model rule in rules/models.smk . These two merged files are the input for our individual and population models. 8. Modelling. This stage has three phases: model building, training and evaluation. In the building phase we impute, normalize and oversample our dataset. Missing numeric values in each column are imputed with their mean and we impute missing categorical values with their mode. We normalize each numeric column with one of three strategies (min-max, z-score, and scikit-learn package\u2019s robust scaler) and we one-hot encode each categorial feature as a numerical array. We oversample our imbalanced dataset using SMOTE (Synthetic Minority Over-sampling Technique) or a Random Over sampler from scikit-learn. All these parameters are exposed in example_profile/example_config.yaml . In the training phase, we create eight models: logistic regression, k-nearest neighbors, support vector machine, decision tree, random forest, gradient boosting classifier, extreme gradient boosting classifier and a light gradient boosting machine. We cross-validate each model with an inner cycle to tune hyper-parameters based on the Macro F1 score and an outer cycle to predict the test set on a model with the best hyper-parameters. Both cross-validation cycles use a leave-one-out strategy. Parameters for each model like weights and learning rates are exposed in example_profile/example_config.yaml . Finally, in the evaluation phase we compute the accuracy, Macro F1, kappa, area under the curve and per class precision, recall and F1 score of all folds of the outer cross-validation cycle. Refer to the modelling_for_individual_participants rule for the individual modeling and to the modelling_for_all_participants rule for the population modeling, both in rules/models.smk . 9. Compute model baselines. We create three baselines to evaluate our classification models. First, a majority classifier that labels each test sample with the majority class of our training data. Second, a random weighted classifier that predicts each test observation sampling at random from a binomial distribution based on the ratio of our target labels. Third, a decision tree classifier based solely on the demographic features of each participant. As we do not have demographic features for individual model, this baseline is only available for population model. Our baseline metrics (e.g. accuracy, precision, etc.) are saved into a CSV file, ready to be compared to our modeling results. Refer to the baselines_for_individual_model rule for the individual model baselines and to the baselines_for_population_model rule for population model baselines, both in rules/models.smk .","title":"Complete Example"},{"location":"workflow-examples/analysis/#analysis-workflow-example","text":"TL;DR In addition to using RAPIDS to extract behavioral features and create plots, you can structure your data analysis within RAPIDS (i.e. cleaning your features and creating ML/statistical models) We include an analysis example in RAPIDS that covers raw data processing, cleaning, feature extraction, machine learning modeling, and evaluation Use this example as a guide to structure your own analysis within RAPIDS RAPIDS analysis workflows are compatible with your favorite data science tools and libraries RAPIDS analysis workflows are reproducible and we encourage you to publish them along with your research papers","title":"Analysis Workflow Example"},{"location":"workflow-examples/analysis/#why-should-i-integrate-my-analysis-in-rapids","text":"Even though the bulk of RAPIDS current functionality is related to the computation of behavioral features, we recommend RAPIDS as a complementary tool to create a mobile data analysis workflow. This is because the cookiecutter data science file organization guidelines, the use of Snakemake, the provided behavioral features, and the reproducible R and Python development environments allow researchers to divide an analysis workflow into small parts that can be audited, shared in an online repository, reproduced in other computers, and understood by other people as they follow a familiar and consistent structure. We believe these advantages outweigh the time needed to learn how to create these workflows in RAPIDS. We clarify that to create analysis workflows in RAPIDS, researchers can still use any data manipulation tools, editors, libraries or languages they are already familiar with. RAPIDS is meant to be the final destination of analysis code that was developed in interactive notebooks or stand-alone scripts. For example, a user can compute call and location features using RAPIDS, then, they can use Jupyter notebooks to explore feature cleaning approaches and once the cleaning code is final, it can be moved to RAPIDS as a new step in the pipeline. In turn, the output of this cleaning step can be used to explore machine learning models and once a model is finished, it can also be transferred to RAPIDS as a step of its own. The idea is that when it is time to publish a piece of research, a RAPIDS workflow can be shared in a public repository as is. In the following sections we share an example of how we structured an analysis workflow in RAPIDS.","title":"Why should I integrate my analysis in RAPIDS?"},{"location":"workflow-examples/analysis/#analysis-workflow-structure","text":"To accurately reflect the complexity of a real-world modeling scenario, we decided not to oversimplify this example. Importantly, every step in this example follows a basic structure: an input file and parameters are manipulated by an R or Python script that saves the results to an output file. Input files, parameters, output files and scripts are grouped into Snakemake rules that are described on smk files in the rules folder (we point the reader to the relevant rule(s) of each step). Researchers can use these rules and scripts as a guide to create their own as it is expected every modeling project will have different requirements, data and goals but ultimately most follow a similar chainned pattern. Hint The example\u2019s config file is example_profile/example_config.yaml and its Snakefile is in example_profile/Snakefile . The config file is already configured to process the sensor data as explained in Analysis workflow modules .","title":"Analysis workflow structure"},{"location":"workflow-examples/analysis/#description-of-the-study-modeled-in-our-analysis-workflow-example","text":"Our example is based on a hypothetical study that recruited 2 participants that underwent surgery and collected mobile data for at least one week before and one week after the procedure. Participants wore a Fitbit device and installed the AWARE client in their personal Android and iOS smartphones to collect mobile data 24/7. In addition, participants completed daily severity ratings of 12 common symptoms on a scale from 0 to 10 that we summed up into a daily symptom burden score. The goal of this workflow is to find out if we can predict the daily symptom burden score of a participant. Thus, we framed this question as a binary classification problem with two classes, high and low symptom burden based on the scores above and below average of each participant. We also want to compare the performance of individual (personalized) models vs a population model. In total, our example workflow has nine steps that are in charge of sensor data preprocessing, feature extraction, feature cleaning, machine learning model training and model evaluation (see figure below). We ship this workflow with RAPIDS and share files with test data in an Open Science Framework repository. Modules of RAPIDS example workflow, from raw data to model evaluation","title":"Description of the study modeled in our analysis workflow example"},{"location":"workflow-examples/analysis/#configure-and-run-the-analysis-workflow-example","text":"Install RAPIDS Unzip the CSV files inside rapids_example_csv.zip in data/external/example_workflow/*.csv . Create the participant files for this example by running: ./rapids -j1 create_example_participant_files Run the example pipeline with: ./rapids -j1 --profile example_profile Note you will see a lot of warning messages, you can ignore them since they happen because we ran ML algorithms with a small fake dataset.","title":"Configure and run the analysis workflow example"},{"location":"workflow-examples/analysis/#modules-of-our-analysis-workflow-example","text":"1. Feature extraction We extract daily behavioral features for data yield, received and sent messages, missed, incoming and outgoing calls, resample fused location data using Doryab provider, activity recognition, battery, Bluetooth, screen, light, applications foreground, conversations, Wi-Fi connected, Wi-Fi visible, Fitbit heart rate summary and intraday data, Fitbit sleep summary data, and Fitbit step summary and intraday data without excluding sleep periods with an active bout threshold of 10 steps. In total, we obtained 237 daily sensor features over 12 days per participant. 2. Extract demographic data. It is common to have demographic data in addition to mobile and target (ground truth) data. In this example we include participants\u2019 age, gender and the number of days they spent in hospital after their surgery as features in our model. We extract these three columns from the data/external/example_workflow/participant_info.csv file. As these three features remain the same within participants, they are used only on the population model. Refer to the demographic_features rule in rules/models.smk . 3. Create target labels. The two classes for our machine learning binary classification problem are high and low symptom burden. Target values are already stored in the data/external/example_workflow/participant_target.csv file. A new rule/script can be created if further manipulation is necessary. Refer to the parse_targets rule in rules/models.smk . 4. Feature merging. These daily features are stored on a CSV file per sensor, a CSV file per participant, and a CSV file including all features from all participants (in every case each column represents a feature and each row represents a day). Refer to the merge_sensor_features_for_individual_participants and merge_sensor_features_for_all_participants rules in rules/features.smk . 5. Data visualization. At this point the user can use the five plots RAPIDS provides (or implement new ones) to explore and understand the quality of the raw data and extracted features and decide what sensors, days, or participants to include and exclude. Refer to rules/reports.smk to find the rules that generate these plots. 6. Feature cleaning. In this stage we perform four steps to clean our sensor feature file. First, we discard days with a data yield hour ratio less than or equal to 0.75, i.e. we include days with at least 18 hours of data. Second, we drop columns (features) with more than 30% of missing rows. Third, we drop columns with zero variance. Fourth, we drop rows (days) with more than 30% of missing columns (features). In this cleaning stage several parameters are created and exposed in example_profile/example_config.yaml . After this step, we kept 161 features over 11 days for the individual model of p01, 101 features over 12 days for the individual model of p02 and 107 features over 20 days for the population model. Note that the difference in the number of features between p01 and p02 is mostly due to iOS restrictions that stops researchers from collecting the same number of sensors than in Android phones. Feature cleaning for the individual models is done in the clean_sensor_features_for_individual_participants rule and for the population model in the clean_sensor_features_for_all_participants rule in rules/models.smk . 7. Merge features and targets. In this step we merge the cleaned features and target labels for our individual models in the merge_features_and_targets_for_individual_model rule in rules/models.smk . Additionally, we merge the cleaned features, target labels, and demographic features of our two participants for the population model in the merge_features_and_targets_for_population_model rule in rules/models.smk . These two merged files are the input for our individual and population models. 8. Modelling. This stage has three phases: model building, training and evaluation. In the building phase we impute, normalize and oversample our dataset. Missing numeric values in each column are imputed with their mean and we impute missing categorical values with their mode. We normalize each numeric column with one of three strategies (min-max, z-score, and scikit-learn package\u2019s robust scaler) and we one-hot encode each categorial feature as a numerical array. We oversample our imbalanced dataset using SMOTE (Synthetic Minority Over-sampling Technique) or a Random Over sampler from scikit-learn. All these parameters are exposed in example_profile/example_config.yaml . In the training phase, we create eight models: logistic regression, k-nearest neighbors, support vector machine, decision tree, random forest, gradient boosting classifier, extreme gradient boosting classifier and a light gradient boosting machine. We cross-validate each model with an inner cycle to tune hyper-parameters based on the Macro F1 score and an outer cycle to predict the test set on a model with the best hyper-parameters. Both cross-validation cycles use a leave-one-out strategy. Parameters for each model like weights and learning rates are exposed in example_profile/example_config.yaml . Finally, in the evaluation phase we compute the accuracy, Macro F1, kappa, area under the curve and per class precision, recall and F1 score of all folds of the outer cross-validation cycle. Refer to the modelling_for_individual_participants rule for the individual modeling and to the modelling_for_all_participants rule for the population modeling, both in rules/models.smk . 9. Compute model baselines. We create three baselines to evaluate our classification models. First, a majority classifier that labels each test sample with the majority class of our training data. Second, a random weighted classifier that predicts each test observation sampling at random from a binomial distribution based on the ratio of our target labels. Third, a decision tree classifier based solely on the demographic features of each participant. As we do not have demographic features for individual model, this baseline is only available for population model. Our baseline metrics (e.g. accuracy, precision, etc.) are saved into a CSV file, ready to be compared to our modeling results. Refer to the baselines_for_individual_model rule for the individual model baselines and to the baselines_for_population_model rule for population model baselines, both in rules/models.smk .","title":"Modules of our analysis workflow example"},{"location":"workflow-examples/minimal/","text":"Minimal Working Example \u00b6 This is a quick guide for creating and running a simple pipeline to extract missing, outgoing, and incoming call features for 24 hr ( 00:00:00 to 23:59:59 ) and night ( 00:00:00 to 05:59:59 ) time segments of every day of data of one participant that was monitored on the US East coast with an Android smartphone. Install RAPIDS and make sure your conda environment is active (see Installation ) Download this CSV file and save it as data/external/aware_csv/calls.csv Make the changes listed below for the corresponding Configuration step (we provide an example of what the relevant sections in your config.yml will look like after you are done) Required configuration changes ( click to expand ) Supported data streams . Based on the docs, we decided to use the aware_csv data stream because we are processing aware data saved in a CSV file. We will use this label in a later step; there\u2019s no need to type it or save it anywhere yet. Create your participants file . Since we are processing data from a single participant, you only need to create a single participant file called p01.yaml in data/external/participant_files . This participant file only has a PHONE section because this hypothetical participant was only monitored with a smartphone. Note that for a real analysis, you can do this automatically with a CSV file Add p01 to [PIDS] in config.yaml Create a file in data/external/participant_files/p01.yaml with the following content: PHONE : DEVICE_IDS : [ a748ee1a-1d0b-4ae9-9074-279a2b6ba524 ] # the participant's AWARE device id PLATFORMS : [ android ] # or ios LABEL : MyTestP01 # any string START_DATE : 2020-01-01 # this can also be empty END_DATE : 2021-01-01 # this can also be empty Select what time segments you want to extract features on. Set [TIME_SEGMENTS][FILE] to data/external/timesegments_periodic.csv Create a file in data/external/timesegments_periodic.csv with the following content label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 night,00:00:00,5H 59M 59S,every_day,0 Choose the timezone of your study . We will use the default time zone settings since this example is processing data collected on the US East Coast ( America/New_York ) TIMEZONE : TYPE : SINGLE SINGLE : TZCODE : America/New_York Modify your device data stream configuration Set [PHONE_DATA_STREAMS][USE] to aware_csv . We will use the default value for [PHONE_DATA_STREAMS][aware_csv][FOLDER] since we already stored the test calls CSV file there. Select what sensors and features you want to process. Set [PHONE_CALLS][CONTAINER] to calls.csv in the config.yaml file. Set [PHONE_CALLS][PROVIDERS][RAPIDS][COMPUTE] to True in the config.yaml file. Example of the config.yaml sections after the changes outlined above This will be your config.yaml after following the instructions above. Click on the numbered markers to know more. PIDS : [ p01 ] # (1) TIMEZONE : TYPE : SINGLE # (2) SINGLE : TZCODE : America/New_York # ... other irrelevant sections TIME_SEGMENTS : &time_segments TYPE : PERIODIC # (3) FILE : \"data/external/timesegments_periodic.csv\" # (4) INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE PHONE_DATA_STREAMS : USE : aware_csv # (5) aware_csv : FOLDER : data/external/aware_csv # (6) # ... other irrelevant sections ############## PHONE ########################################################### ################################################################################ # ... other irrelevant sections # Communication call features config, TYPES and FEATURES keys need to match PHONE_CALLS : CONTAINER : calls.csv # (7) PROVIDERS : RAPIDS : COMPUTE : True # (8) CALL_TYPES : ... We added p01 to PIDS after creating the participant file: data/external/participant_files/p01.yaml With the following content: PHONE : DEVICE_IDS : [ a748ee1a-1d0b-4ae9-9074-279a2b6ba524 ] # the participant's AWARE device id PLATFORMS : [ android ] # or ios LABEL : MyTestP01 # any string START_DATE : 2020-01-01 # this can also be empty END_DATE : 2021-01-01 # this can also be empty We use the default SINGLE time zone. We use the default PERIODIC time segment [TYPE] We created this time segments file with these lines: label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 night,001:00:00,5H 59M 59S,every_day,0 We set [USE] to aware_device to tell RAPIDS to process sensor data collected with the AWARE Framework stored in CSV files. We used the default [FOLDER] for awre_csv since we already stored our test calls.csv file there We changed [CONTAINER] to calls.csv to process our test call data. We flipped [COMPUTE] to True to extract call behavioral features using the RAPIDS feature provider. Run RAPIDS ./rapids -j1 The call features for daily and morning time segments will be in data/processed/features/all_participants/all_sensor_features.csv","title":"Minimal Example"},{"location":"workflow-examples/minimal/#minimal-working-example","text":"This is a quick guide for creating and running a simple pipeline to extract missing, outgoing, and incoming call features for 24 hr ( 00:00:00 to 23:59:59 ) and night ( 00:00:00 to 05:59:59 ) time segments of every day of data of one participant that was monitored on the US East coast with an Android smartphone. Install RAPIDS and make sure your conda environment is active (see Installation ) Download this CSV file and save it as data/external/aware_csv/calls.csv Make the changes listed below for the corresponding Configuration step (we provide an example of what the relevant sections in your config.yml will look like after you are done) Required configuration changes ( click to expand ) Supported data streams . Based on the docs, we decided to use the aware_csv data stream because we are processing aware data saved in a CSV file. We will use this label in a later step; there\u2019s no need to type it or save it anywhere yet. Create your participants file . Since we are processing data from a single participant, you only need to create a single participant file called p01.yaml in data/external/participant_files . This participant file only has a PHONE section because this hypothetical participant was only monitored with a smartphone. Note that for a real analysis, you can do this automatically with a CSV file Add p01 to [PIDS] in config.yaml Create a file in data/external/participant_files/p01.yaml with the following content: PHONE : DEVICE_IDS : [ a748ee1a-1d0b-4ae9-9074-279a2b6ba524 ] # the participant's AWARE device id PLATFORMS : [ android ] # or ios LABEL : MyTestP01 # any string START_DATE : 2020-01-01 # this can also be empty END_DATE : 2021-01-01 # this can also be empty Select what time segments you want to extract features on. Set [TIME_SEGMENTS][FILE] to data/external/timesegments_periodic.csv Create a file in data/external/timesegments_periodic.csv with the following content label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 night,00:00:00,5H 59M 59S,every_day,0 Choose the timezone of your study . We will use the default time zone settings since this example is processing data collected on the US East Coast ( America/New_York ) TIMEZONE : TYPE : SINGLE SINGLE : TZCODE : America/New_York Modify your device data stream configuration Set [PHONE_DATA_STREAMS][USE] to aware_csv . We will use the default value for [PHONE_DATA_STREAMS][aware_csv][FOLDER] since we already stored the test calls CSV file there. Select what sensors and features you want to process. Set [PHONE_CALLS][CONTAINER] to calls.csv in the config.yaml file. Set [PHONE_CALLS][PROVIDERS][RAPIDS][COMPUTE] to True in the config.yaml file. Example of the config.yaml sections after the changes outlined above This will be your config.yaml after following the instructions above. Click on the numbered markers to know more. PIDS : [ p01 ] # (1) TIMEZONE : TYPE : SINGLE # (2) SINGLE : TZCODE : America/New_York # ... other irrelevant sections TIME_SEGMENTS : &time_segments TYPE : PERIODIC # (3) FILE : \"data/external/timesegments_periodic.csv\" # (4) INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE PHONE_DATA_STREAMS : USE : aware_csv # (5) aware_csv : FOLDER : data/external/aware_csv # (6) # ... other irrelevant sections ############## PHONE ########################################################### ################################################################################ # ... other irrelevant sections # Communication call features config, TYPES and FEATURES keys need to match PHONE_CALLS : CONTAINER : calls.csv # (7) PROVIDERS : RAPIDS : COMPUTE : True # (8) CALL_TYPES : ... We added p01 to PIDS after creating the participant file: data/external/participant_files/p01.yaml With the following content: PHONE : DEVICE_IDS : [ a748ee1a-1d0b-4ae9-9074-279a2b6ba524 ] # the participant's AWARE device id PLATFORMS : [ android ] # or ios LABEL : MyTestP01 # any string START_DATE : 2020-01-01 # this can also be empty END_DATE : 2021-01-01 # this can also be empty We use the default SINGLE time zone. We use the default PERIODIC time segment [TYPE] We created this time segments file with these lines: label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 night,001:00:00,5H 59M 59S,every_day,0 We set [USE] to aware_device to tell RAPIDS to process sensor data collected with the AWARE Framework stored in CSV files. We used the default [FOLDER] for awre_csv since we already stored our test calls.csv file there We changed [CONTAINER] to calls.csv to process our test call data. We flipped [COMPUTE] to True to extract call behavioral features using the RAPIDS feature provider. Run RAPIDS ./rapids -j1 The call features for daily and morning time segments will be in data/processed/features/all_participants/all_sensor_features.csv","title":"Minimal Working Example"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Welcome to RAPIDS documentation \u00b6 Reproducible Analysis Pipeline for Data Streams (RAPIDS) allows you to process smartphone and wearable data to extract and create behavioral features (a.k.a. digital biomarkers), visualize mobile sensor data, and structure your analysis into reproducible workflows. RAPIDS is open source, documented, modular, tested, and reproducible. At the moment, we support data streams logged by smartphones, Fitbit wearables, and, in collaboration with the DBDP , Empatica wearables (but you can add your own too). If you want to know more head over to Overview Tip Questions or feedback can be posted on the #rapids channel in AWARE Framework's slack . Bugs and feature requests should be posted on Github . Join our discussions on our algorithms and assumptions for feature processing . Are you upgrading from RAPIDS 0.4.x or older? Follow this guide Ready? Go to Overview . What are the benefits of using RAPIDS? \u00b6 Consistent analysis . Every participant sensor dataset is analyzed in the same way and isolated from each other. Efficient analysis . Every analysis step is executed only once. Whenever your data or configuration changes, only the affected files are updated. Parallel execution . Thanks to Snakemake, your analysis can be executed over multiple cores without changing your code. Code-free features . Extract any of the behavioral features offered by RAPIDS without writing any code. Extensible code . You can easily add your own data streams or behavioral features in R or Python, share them with the community, and keep authorship and citations. Timezone aware . Your data is adjusted to one or more time zones per participant. Flexible time segments . You can extract behavioral features on time windows of any length (e.g., 5 minutes, 3 hours, 2 days), on every day or particular days (e.g., weekends, Mondays, the 1 st of each month, etc.), or around events of interest (e.g., surveys or clinical relapses). Tested code . We are continually adding tests to make sure our behavioral features are correct. Reproducible code . If you structure your analysis within RAPIDS, you can be sure your code will run in other computers as intended, thanks to R and Python virtual environments. You can share your analysis code along with your publications without any overhead. Private . All your data is processed locally.","title":"Home"},{"location":"#welcome-to-rapids-documentation","text":"Reproducible Analysis Pipeline for Data Streams (RAPIDS) allows you to process smartphone and wearable data to extract and create behavioral features (a.k.a. digital biomarkers), visualize mobile sensor data, and structure your analysis into reproducible workflows. RAPIDS is open source, documented, modular, tested, and reproducible. At the moment, we support data streams logged by smartphones, Fitbit wearables, and, in collaboration with the DBDP , Empatica wearables (but you can add your own too). If you want to know more head over to Overview Tip Questions or feedback can be posted on the #rapids channel in AWARE Framework's slack . Bugs and feature requests should be posted on Github . Join our discussions on our algorithms and assumptions for feature processing . Are you upgrading from RAPIDS 0.4.x or older? Follow this guide Ready? Go to Overview .","title":"Welcome to RAPIDS documentation"},{"location":"#what-are-the-benefits-of-using-rapids","text":"Consistent analysis . Every participant sensor dataset is analyzed in the same way and isolated from each other. Efficient analysis . Every analysis step is executed only once. Whenever your data or configuration changes, only the affected files are updated. Parallel execution . Thanks to Snakemake, your analysis can be executed over multiple cores without changing your code. Code-free features . Extract any of the behavioral features offered by RAPIDS without writing any code. Extensible code . You can easily add your own data streams or behavioral features in R or Python, share them with the community, and keep authorship and citations. Timezone aware . Your data is adjusted to one or more time zones per participant. Flexible time segments . You can extract behavioral features on time windows of any length (e.g., 5 minutes, 3 hours, 2 days), on every day or particular days (e.g., weekends, Mondays, the 1 st of each month, etc.), or around events of interest (e.g., surveys or clinical relapses). Tested code . We are continually adding tests to make sure our behavioral features are correct. Reproducible code . If you structure your analysis within RAPIDS, you can be sure your code will run in other computers as intended, thanks to R and Python virtual environments. You can share your analysis code along with your publications without any overhead. Private . All your data is processed locally.","title":"What are the benefits of using RAPIDS?"},{"location":"change-log/","text":"Change Log \u00b6 v1.0.1 \u00b6 Fix crash in chunk_episodes of utils.py for multi time zone data Fix crash in BT Doryab provider when the number of clusters is 2 Fix Fitbit multi time zone inference from phone data (simplify) Fix missing columns when the input for phone data yield is empty Fix wrong date time labels for event segments for multi time zone data (all labels are computed based on a single tz) Fix periodic segment crash when there are no segments to assign (only affects wday, mday, qday, or yday) Fix crash in Analysis Workflow with new suffix in segments\u2019 labels v1.0.0 \u00b6 Add a new Overview page. You can extend RAPIDS with your own data streams . Data streams are data collected with other sensing apps besides AWARE (like Beiwe, mindLAMP), and stored in other data containers (databases, files) besides MySQL. Support to analyze Empatica wearable data (thanks to Joe Kim and Brinnae Bent from the DBDP ) Support to analyze AWARE data stored in CSV files and InfluxDB databases Support to analyze data collected over multiple time zones Support for sleep intraday features from the core team and also from the community (thanks to Stephen Price) Users can comment on the documentation (powered by utterances). SCR_SCRIPT and SRC_LANGUAGE are replaced by SRC_SCRIPT . Add RAPIDS new logo Move Citation and Minimal Example page to the Setup section Add config.yaml validation schema and documentation. Now it\u2019s more difficult to modify the config.yaml file with invalid values. Add new time at home Doryab location feature Add and home coordinates to the location data file so location providers can build features based on it. If you are migrating from RAPIDS 0.4.3 or older, check this guide v0.4.3 \u00b6 Fix bug when any of the rows from any sensor do not belong a time segment v0.4.2 \u00b6 Update battery testing Fix location processing bug when certain columns don\u2019t exist Fix HR intraday bug when minutesonZONE features were 0 Update FAQs Fix HR summary bug when restinghr=0 (ignore those rows) Fix ROG, location entropy and normalized entropy in Doryab location provider Remove sampling frequency dependance in Doryab location provider Update documentation of Doryab location provider Add new FITBIT_DATA_YIELD RAPIDS provider Deprecate Doryab circadian movement feature until it is fixed v0.4.1 \u00b6 Fix bug when no error message was displayed for an empty [PHONE_DATA_YIELD][SENSORS] when resampling location data v0.4.0 \u00b6 Add four new phone sensors that can be used for PHONE_DATA_YIELD Add code so new feature providers can be added for the new four sensors Add new clustering algorithm (OPTICS) for Doryab features Update default EPS parameter for Doryab location clustering Add clearer error message for invalid phone data yield sensors Add ALL_RESAMPLED flag and accuracy limit for location features Add FAQ about null characters in phone tables Reactivate light and wifi tests and update testing docs Fix bug when parsing Fitbit steps data Fix bugs when merging features from empty time segments Fix minor issues in the documentation v0.3.2 \u00b6 Update docker and linux instructions to use RSPM binary repo for for faster installation Update CI to create a release on a tagged push that passes the tests Clarify in DB credential configuration that we only support MySQL Add Windows installation instructions Fix bugs in the create_participants_file script Fix bugs in Fitbit data parsing. Fixed Doryab location features context of clustering. Fixed the wrong shifting while calculating distance in Doryab location features. Refactored the haversine function v0.3.1 \u00b6 Update installation docs for RAPIDS\u2019 docker container Fix example analysis use of accelerometer data in a plot Update FAQ Update minimal example documentation Minor doc updates v0.3.0 \u00b6 Update R and Python virtual environments Add GH actions CI support for tests and docker Add release and test badges to README v0.2.6 \u00b6 Fix old versions banner on nested pages v0.2.5 \u00b6 Fix docs deploy typo v0.2.4 \u00b6 Fix broken links in landing page and docs deploy v0.2.3 \u00b6 Fix participant IDS in the example analysis workflow v0.2.2 \u00b6 Fix readme link to docs v0.2.1 \u00b6 FIx link to the most recent version in the old version banner v0.2.0 \u00b6 Add new PHONE_BLUETOOTH DORYAB provider Deprecate PHONE_BLUETOOTH RAPIDS provider Fix bug in filter_data_by_segment for Python when dataset was empty Minor doc updates New FAQ item v0.1.0 \u00b6 New and more consistent docs (this website). The previous docs are marked as beta Consolidate configuration instructions Flexible time segments Simplify Fitbit behavioral feature extraction and documentation Sensor\u2019s configuration and output is more consistent Update visualizations to handle flexible day segments Create a RAPIDS execution script that allows re-computation of the pipeline after configuration changes Add citation guide Update virtual environment guide Update analysis workflow example Add a Code of Conduct Update Team page","title":"Change Log"},{"location":"change-log/#change-log","text":"","title":"Change Log"},{"location":"change-log/#v101","text":"Fix crash in chunk_episodes of utils.py for multi time zone data Fix crash in BT Doryab provider when the number of clusters is 2 Fix Fitbit multi time zone inference from phone data (simplify) Fix missing columns when the input for phone data yield is empty Fix wrong date time labels for event segments for multi time zone data (all labels are computed based on a single tz) Fix periodic segment crash when there are no segments to assign (only affects wday, mday, qday, or yday) Fix crash in Analysis Workflow with new suffix in segments\u2019 labels","title":"v1.0.1"},{"location":"change-log/#v100","text":"Add a new Overview page. You can extend RAPIDS with your own data streams . Data streams are data collected with other sensing apps besides AWARE (like Beiwe, mindLAMP), and stored in other data containers (databases, files) besides MySQL. Support to analyze Empatica wearable data (thanks to Joe Kim and Brinnae Bent from the DBDP ) Support to analyze AWARE data stored in CSV files and InfluxDB databases Support to analyze data collected over multiple time zones Support for sleep intraday features from the core team and also from the community (thanks to Stephen Price) Users can comment on the documentation (powered by utterances). SCR_SCRIPT and SRC_LANGUAGE are replaced by SRC_SCRIPT . Add RAPIDS new logo Move Citation and Minimal Example page to the Setup section Add config.yaml validation schema and documentation. Now it\u2019s more difficult to modify the config.yaml file with invalid values. Add new time at home Doryab location feature Add and home coordinates to the location data file so location providers can build features based on it. If you are migrating from RAPIDS 0.4.3 or older, check this guide","title":"v1.0.0"},{"location":"change-log/#v043","text":"Fix bug when any of the rows from any sensor do not belong a time segment","title":"v0.4.3"},{"location":"change-log/#v042","text":"Update battery testing Fix location processing bug when certain columns don\u2019t exist Fix HR intraday bug when minutesonZONE features were 0 Update FAQs Fix HR summary bug when restinghr=0 (ignore those rows) Fix ROG, location entropy and normalized entropy in Doryab location provider Remove sampling frequency dependance in Doryab location provider Update documentation of Doryab location provider Add new FITBIT_DATA_YIELD RAPIDS provider Deprecate Doryab circadian movement feature until it is fixed","title":"v0.4.2"},{"location":"change-log/#v041","text":"Fix bug when no error message was displayed for an empty [PHONE_DATA_YIELD][SENSORS] when resampling location data","title":"v0.4.1"},{"location":"change-log/#v040","text":"Add four new phone sensors that can be used for PHONE_DATA_YIELD Add code so new feature providers can be added for the new four sensors Add new clustering algorithm (OPTICS) for Doryab features Update default EPS parameter for Doryab location clustering Add clearer error message for invalid phone data yield sensors Add ALL_RESAMPLED flag and accuracy limit for location features Add FAQ about null characters in phone tables Reactivate light and wifi tests and update testing docs Fix bug when parsing Fitbit steps data Fix bugs when merging features from empty time segments Fix minor issues in the documentation","title":"v0.4.0"},{"location":"change-log/#v032","text":"Update docker and linux instructions to use RSPM binary repo for for faster installation Update CI to create a release on a tagged push that passes the tests Clarify in DB credential configuration that we only support MySQL Add Windows installation instructions Fix bugs in the create_participants_file script Fix bugs in Fitbit data parsing. Fixed Doryab location features context of clustering. Fixed the wrong shifting while calculating distance in Doryab location features. Refactored the haversine function","title":"v0.3.2"},{"location":"change-log/#v031","text":"Update installation docs for RAPIDS\u2019 docker container Fix example analysis use of accelerometer data in a plot Update FAQ Update minimal example documentation Minor doc updates","title":"v0.3.1"},{"location":"change-log/#v030","text":"Update R and Python virtual environments Add GH actions CI support for tests and docker Add release and test badges to README","title":"v0.3.0"},{"location":"change-log/#v026","text":"Fix old versions banner on nested pages","title":"v0.2.6"},{"location":"change-log/#v025","text":"Fix docs deploy typo","title":"v0.2.5"},{"location":"change-log/#v024","text":"Fix broken links in landing page and docs deploy","title":"v0.2.4"},{"location":"change-log/#v023","text":"Fix participant IDS in the example analysis workflow","title":"v0.2.3"},{"location":"change-log/#v022","text":"Fix readme link to docs","title":"v0.2.2"},{"location":"change-log/#v021","text":"FIx link to the most recent version in the old version banner","title":"v0.2.1"},{"location":"change-log/#v020","text":"Add new PHONE_BLUETOOTH DORYAB provider Deprecate PHONE_BLUETOOTH RAPIDS provider Fix bug in filter_data_by_segment for Python when dataset was empty Minor doc updates New FAQ item","title":"v0.2.0"},{"location":"change-log/#v010","text":"New and more consistent docs (this website). The previous docs are marked as beta Consolidate configuration instructions Flexible time segments Simplify Fitbit behavioral feature extraction and documentation Sensor\u2019s configuration and output is more consistent Update visualizations to handle flexible day segments Create a RAPIDS execution script that allows re-computation of the pipeline after configuration changes Add citation guide Update virtual environment guide Update analysis workflow example Add a Code of Conduct Update Team page","title":"v0.1.0"},{"location":"citation/","text":"Cite RAPIDS and providers \u00b6 RAPIDS and the community RAPIDS is a community effort and as such we want to continue recognizing the contributions from other researchers. Besides citing RAPIDS, we ask you to cite any of the authors listed below if you used those sensor providers in your analysis, thank you! RAPIDS \u00b6 If you used RAPIDS, please cite this paper . RAPIDS et al. citation Vega J, Li M, Aguillera K, Goel N, Joshi E, Durica KC, Kunta AR, Low CA RAPIDS: Reproducible Analysis Pipeline for Data Streams Collected with Mobile Devices JMIR Preprints. 18/08/2020:23246 DOI: 10.2196/preprints.23246 URL: https://preprints.jmir.org/preprint/23246 DBDP (all Empatica sensors) \u00b6 If you computed features using the provider [DBDP] of any of the Empatica sensors (accelerometer, heart rate, temperature, EDA, BVP, IBI, tags) cite this paper in addition to RAPIDS. Bent et al. citation Bent, B., Wang, K., Grzesiak, E., Jiang, C., Qi, Y., Jiang, Y., Cho, P., Zingler, K., Ogbeide, F.I., Zhao, A., Runge, R., Sim, I., Dunn, J. (2020). The Digital Biomarker Discovery Pipeline: An open source software platform for the development of digital biomarkers using mHealth and wearables data. Journal of Clinical and Translational Science, 1-28. doi:10.1017/cts.2020.511 Panda (accelerometer) \u00b6 If you computed accelerometer features using the provider [PHONE_ACCLEROMETER][PANDA] cite this paper in addition to RAPIDS. Panda et al. citation Panda N, Solsky I, Huang EJ, Lipsitz S, Pradarelli JC, Delisle M, Cusack JC, Gadd MA, Lubitz CC, Mullen JT, Qadan M, Smith BL, Specht M, Stephen AE, Tanabe KK, Gawande AA, Onnela JP, Haynes AB. Using Smartphones to Capture Novel Recovery Metrics After Cancer Surgery. JAMA Surg. 2020 Feb 1;155(2):123-129. doi: 10.1001/jamasurg.2019.4702. PMID: 31657854; PMCID: PMC6820047. Stachl (applications foreground) \u00b6 If you computed applications foreground features using the app category (genre) catalogue in [PHONE_APPLICATIONS_FOREGROUND][RAPIDS] cite this paper in addition to RAPIDS. Stachl et al. citation Clemens Stachl, Quay Au, Ramona Schoedel, Samuel D. Gosling, Gabriella M. Harari, Daniel Buschek, Sarah Theres V\u00f6lkel, Tobias Schuwerk, Michelle Oldemeier, Theresa Ullmann, Heinrich Hussmann, Bernd Bischl, Markus B\u00fchner. Proceedings of the National Academy of Sciences Jul 2020, 117 (30) 17680-17687; DOI: 10.1073/pnas.1920484117 Doryab (bluetooth) \u00b6 If you computed bluetooth features using the provider [PHONE_BLUETOOTH][DORYAB] cite this paper in addition to RAPIDS. Doryab et al. citation Doryab, A., Chikarsel, P., Liu, X., & Dey, A. K. (2019). Extraction of Behavioral Features from Smartphone and Wearable Data. ArXiv:1812.10394 [Cs, Stat]. http://arxiv.org/abs/1812.10394 Barnett (locations) \u00b6 If you computed locations features using the provider [PHONE_LOCATIONS][BARNETT] cite this paper and this paper in addition to RAPIDS. Barnett et al. citation Ian Barnett, Jukka-Pekka Onnela, Inferring mobility measures from GPS traces with missing data, Biostatistics, Volume 21, Issue 2, April 2020, Pages e98\u2013e112, https://doi.org/10.1093/biostatistics/kxy059 Canzian et al. citation Luca Canzian and Mirco Musolesi. 2015. Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis. In Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp \u201815). Association for Computing Machinery, New York, NY, USA, 1293\u20131304. DOI: https://doi.org/10.1145/2750858.2805845 Doryab (locations) \u00b6 If you computed locations features using the provider [PHONE_LOCATIONS][DORYAB] cite this paper and this paper in addition to RAPIDS. Doryab et al. citation Doryab, A., Chikarsel, P., Liu, X., & Dey, A. K. (2019). Extraction of Behavioral Features from Smartphone and Wearable Data. ArXiv:1812.10394 [Cs, Stat]. http://arxiv.org/abs/1812.10394 Canzian et al. citation Luca Canzian and Mirco Musolesi. 2015. Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis. In Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp \u201815). Association for Computing Machinery, New York, NY, USA, 1293\u20131304. DOI: https://doi.org/10.1145/2750858.2805845","title":"Citation"},{"location":"citation/#cite-rapids-and-providers","text":"RAPIDS and the community RAPIDS is a community effort and as such we want to continue recognizing the contributions from other researchers. Besides citing RAPIDS, we ask you to cite any of the authors listed below if you used those sensor providers in your analysis, thank you!","title":"Cite RAPIDS and providers"},{"location":"citation/#rapids","text":"If you used RAPIDS, please cite this paper . RAPIDS et al. citation Vega J, Li M, Aguillera K, Goel N, Joshi E, Durica KC, Kunta AR, Low CA RAPIDS: Reproducible Analysis Pipeline for Data Streams Collected with Mobile Devices JMIR Preprints. 18/08/2020:23246 DOI: 10.2196/preprints.23246 URL: https://preprints.jmir.org/preprint/23246","title":"RAPIDS"},{"location":"citation/#dbdp-all-empatica-sensors","text":"If you computed features using the provider [DBDP] of any of the Empatica sensors (accelerometer, heart rate, temperature, EDA, BVP, IBI, tags) cite this paper in addition to RAPIDS. Bent et al. citation Bent, B., Wang, K., Grzesiak, E., Jiang, C., Qi, Y., Jiang, Y., Cho, P., Zingler, K., Ogbeide, F.I., Zhao, A., Runge, R., Sim, I., Dunn, J. (2020). The Digital Biomarker Discovery Pipeline: An open source software platform for the development of digital biomarkers using mHealth and wearables data. Journal of Clinical and Translational Science, 1-28. doi:10.1017/cts.2020.511","title":"DBDP (all Empatica sensors)"},{"location":"citation/#panda-accelerometer","text":"If you computed accelerometer features using the provider [PHONE_ACCLEROMETER][PANDA] cite this paper in addition to RAPIDS. Panda et al. citation Panda N, Solsky I, Huang EJ, Lipsitz S, Pradarelli JC, Delisle M, Cusack JC, Gadd MA, Lubitz CC, Mullen JT, Qadan M, Smith BL, Specht M, Stephen AE, Tanabe KK, Gawande AA, Onnela JP, Haynes AB. Using Smartphones to Capture Novel Recovery Metrics After Cancer Surgery. JAMA Surg. 2020 Feb 1;155(2):123-129. doi: 10.1001/jamasurg.2019.4702. PMID: 31657854; PMCID: PMC6820047.","title":"Panda (accelerometer)"},{"location":"citation/#stachl-applications-foreground","text":"If you computed applications foreground features using the app category (genre) catalogue in [PHONE_APPLICATIONS_FOREGROUND][RAPIDS] cite this paper in addition to RAPIDS. Stachl et al. citation Clemens Stachl, Quay Au, Ramona Schoedel, Samuel D. Gosling, Gabriella M. Harari, Daniel Buschek, Sarah Theres V\u00f6lkel, Tobias Schuwerk, Michelle Oldemeier, Theresa Ullmann, Heinrich Hussmann, Bernd Bischl, Markus B\u00fchner. Proceedings of the National Academy of Sciences Jul 2020, 117 (30) 17680-17687; DOI: 10.1073/pnas.1920484117","title":"Stachl (applications foreground)"},{"location":"citation/#doryab-bluetooth","text":"If you computed bluetooth features using the provider [PHONE_BLUETOOTH][DORYAB] cite this paper in addition to RAPIDS. Doryab et al. citation Doryab, A., Chikarsel, P., Liu, X., & Dey, A. K. (2019). Extraction of Behavioral Features from Smartphone and Wearable Data. ArXiv:1812.10394 [Cs, Stat]. http://arxiv.org/abs/1812.10394","title":"Doryab (bluetooth)"},{"location":"citation/#barnett-locations","text":"If you computed locations features using the provider [PHONE_LOCATIONS][BARNETT] cite this paper and this paper in addition to RAPIDS. Barnett et al. citation Ian Barnett, Jukka-Pekka Onnela, Inferring mobility measures from GPS traces with missing data, Biostatistics, Volume 21, Issue 2, April 2020, Pages e98\u2013e112, https://doi.org/10.1093/biostatistics/kxy059 Canzian et al. citation Luca Canzian and Mirco Musolesi. 2015. Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis. In Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp \u201815). Association for Computing Machinery, New York, NY, USA, 1293\u20131304. DOI: https://doi.org/10.1145/2750858.2805845","title":"Barnett (locations)"},{"location":"citation/#doryab-locations","text":"If you computed locations features using the provider [PHONE_LOCATIONS][DORYAB] cite this paper and this paper in addition to RAPIDS. Doryab et al. citation Doryab, A., Chikarsel, P., Liu, X., & Dey, A. K. (2019). Extraction of Behavioral Features from Smartphone and Wearable Data. ArXiv:1812.10394 [Cs, Stat]. http://arxiv.org/abs/1812.10394 Canzian et al. citation Luca Canzian and Mirco Musolesi. 2015. Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis. In Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp \u201815). Association for Computing Machinery, New York, NY, USA, 1293\u20131304. DOI: https://doi.org/10.1145/2750858.2805845","title":"Doryab (locations)"},{"location":"code_of_conduct/","text":"Contributor Covenant Code of Conduct \u00b6 Our Pledge \u00b6 We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. Our Standards \u00b6 Examples of behavior that contributes to a positive environment for our community include: Demonstrating empathy and kindness toward other people Being respectful of differing opinions, viewpoints, and experiences Giving and gracefully accepting constructive feedback Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: The use of sexualized language or imagery, and sexual attention or advances of any kind Trolling, insulting or derogatory comments, and personal or political attacks Public or private harassment Publishing others\u2019 private information, such as a physical or email address, without their explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting Enforcement Responsibilities \u00b6 Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. Scope \u00b6 This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Enforcement \u00b6 Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at moshi@pitt.edu . All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. Enforcement Guidelines \u00b6 Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: 1. Correction \u00b6 Community Impact : Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. Consequence : A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. 2. Warning \u00b6 Community Impact : A violation through a single incident or series of actions. Consequence : A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. 3. Temporary Ban \u00b6 Community Impact : A serious violation of community standards, including sustained inappropriate behavior. Consequence : A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. 4. Permanent Ban \u00b6 Community Impact : Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. Consequence : A permanent ban from any sort of public interaction within the community. Attribution \u00b6 This Code of Conduct is adapted from the Contributor Covenant , version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html . Community Impact Guidelines were inspired by Mozilla\u2019s code of conduct enforcement ladder . For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq . Translations are available at https://www.contributor-covenant.org/translations .","title":"Code of Conduct"},{"location":"code_of_conduct/#contributor-covenant-code-of-conduct","text":"","title":"Contributor Covenant Code of Conduct"},{"location":"code_of_conduct/#our-pledge","text":"We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.","title":"Our Pledge"},{"location":"code_of_conduct/#our-standards","text":"Examples of behavior that contributes to a positive environment for our community include: Demonstrating empathy and kindness toward other people Being respectful of differing opinions, viewpoints, and experiences Giving and gracefully accepting constructive feedback Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: The use of sexualized language or imagery, and sexual attention or advances of any kind Trolling, insulting or derogatory comments, and personal or political attacks Public or private harassment Publishing others\u2019 private information, such as a physical or email address, without their explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting","title":"Our Standards"},{"location":"code_of_conduct/#enforcement-responsibilities","text":"Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.","title":"Enforcement Responsibilities"},{"location":"code_of_conduct/#scope","text":"This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.","title":"Scope"},{"location":"code_of_conduct/#enforcement","text":"Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at moshi@pitt.edu . All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident.","title":"Enforcement"},{"location":"code_of_conduct/#enforcement-guidelines","text":"Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:","title":"Enforcement Guidelines"},{"location":"code_of_conduct/#1-correction","text":"Community Impact : Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. Consequence : A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.","title":"1. Correction"},{"location":"code_of_conduct/#2-warning","text":"Community Impact : A violation through a single incident or series of actions. Consequence : A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.","title":"2. Warning"},{"location":"code_of_conduct/#3-temporary-ban","text":"Community Impact : A serious violation of community standards, including sustained inappropriate behavior. Consequence : A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.","title":"3. Temporary Ban"},{"location":"code_of_conduct/#4-permanent-ban","text":"Community Impact : Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. Consequence : A permanent ban from any sort of public interaction within the community.","title":"4. Permanent Ban"},{"location":"code_of_conduct/#attribution","text":"This Code of Conduct is adapted from the Contributor Covenant , version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html . Community Impact Guidelines were inspired by Mozilla\u2019s code of conduct enforcement ladder . For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq . Translations are available at https://www.contributor-covenant.org/translations .","title":"Attribution"},{"location":"common-errors/","text":"Common Errors \u00b6 Cannot connect to your MySQL server \u00b6 Problem **Error in .local ( drv, \\. .. ) :** **Failed to connect to database: Error: Can \\' t initialize character set unknown ( path: compiled \\_ in ) ** : Calls: dbConnect -> dbConnect -> .local -> .Call Execution halted [ Tue Mar 10 19 :40:15 2020 ] Error in rule download_dataset: jobid: 531 output: data/raw/p60/locations_raw.csv RuleException: CalledProcessError in line 20 of /home/ubuntu/rapids/rules/preprocessing.snakefile: Command 'set -euo pipefail; Rscript --vanilla /home/ubuntu/rapids/.snakemake/scripts/tmp_2jnvqs7.download_dataset.R' returned non-zero exit status 1 . File \"/home/ubuntu/rapids/rules/preprocessing.snakefile\" , line 20 , in __rule_download_dataset File \"/home/ubuntu/anaconda3/envs/moshi-env/lib/python3.7/concurrent/futures/thread.py\" , line 57 , in run Shutting down, this might take some time. Exiting because a job execution failed. Look above for error message Solution Please make sure the DATABASE_GROUP in config.yaml matches your DB credentials group in .env . Cannot start mysql in linux via brew services start mysql \u00b6 Problem Cannot start mysql in linux via brew services start mysql Solution Use mysql.server start Every time I run force the download_dataset rule all rules are executed \u00b6 Problem When running snakemake -j1 -R pull_phone_data or ./rapids -j1 -R pull_phone_data all the rules and files are re-computed Solution This is expected behavior. The advantage of using snakemake under the hood is that every time a file containing data is modified every rule that depends on that file will be re-executed to update their results. In this case, since download_dataset updates all the raw data, and you are forcing the rule with the flag -R every single rule that depends on those raw files will be executed. Error Table XXX doesn't exist while running the download_phone_data or download_fitbit_data rule. \u00b6 Problem Error in .local ( conn, statement, ... ) : could not run statement: Table 'db_name.table_name' doesn ' t exist Calls: colnames ... .local -> dbSendQuery -> dbSendQuery -> .local -> .Call Execution halted Solution Please make sure the sensors listed in [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] and the [CONTAINER] of each sensor you activated in config.yaml match your database tables or files. How do I install RAPIDS on Ubuntu 16.04 \u00b6 Solution Install dependencies (Homebrew - if not installed): sudo apt-get install libmariadb-client-lgpl-dev libxml2-dev libssl-dev Install brew for linux and add the following line to ~/.bashrc : export PATH=$HOME/.linuxbrew/bin:$PATH source ~/.bashrc Install MySQL brew install mysql brew services start mysql Install R, pandoc and rmarkdown: brew install r brew install gcc@6 (needed due to this bug ) HOMEBREW_CC=gcc-6 brew install pandoc Install miniconda using these instructions Clone our repo: git clone https://github.com/carissalow/rapids Create a python virtual environment: cd rapids conda env create -f environment.yml -n MY_ENV_NAME conda activate MY_ENV_NAME Install R packages and virtual environment: snakemake renv_install snakemake renv_init snakemake renv_restore This step could take several minutes to complete. Please be patient and let it run until completion. mysql.h cannot be found \u00b6 Problem -------------------------- [ ERROR MESSAGE ] ---------------------------- <stdin>:1:10: fatal error: mysql.h: No such file or directory compilation terminated. ----------------------------------------------------------------------- ERROR: configuration failed for package 'RMySQL' Solution sudo apt install libmariadbclient-dev No package libcurl found \u00b6 Problem libcurl cannot be found Solution Install libcurl sudo apt install libcurl4-openssl-dev Configuration failed because openssl was not found. \u00b6 Problem openssl cannot be found Solution Install openssl sudo apt install libssl-dev Configuration failed because libxml-2.0 was not found \u00b6 Problem libxml-2.0 cannot be found Solution Install libxml-2.0 sudo apt install libxml2-dev SSL connection error when running RAPIDS \u00b6 Problem You are getting the following error message when running RAPIDS: Error: Failed to connect: SSL connection error: error:1425F102:SSL routines:ssl_choose_client_version:unsupported protocol. Solution This is a bug in Ubuntu 20.04 when trying to connect to an old MySQL server with MySQL client 8.0. You should get the same error message if you try to connect from the command line. There you can add the option --ssl-mode=DISABLED but we can't do this from the R connector. If you can't update your server, the quickest solution would be to import your database to another server or to a local environment. Alternatively, you could replace mysql-client and libmysqlclient-dev with mariadb-client and libmariadbclient-dev and reinstall renv. More info about this issue here DB_TABLES key not found \u00b6 Problem If you get the following error KeyError in line 43 of preprocessing.smk: 'PHONE_SENSORS' , it means that the indentation of the key [PHONE_SENSORS] is not matching the other child elements of PHONE_VALID_SENSED_BINS Solution You need to add or remove any leading whitespaces as needed on that line. PHONE_VALID_SENSED_BINS : COMPUTE : False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features BIN_SIZE : &bin_size 5 # (in minutes) PHONE_SENSORS : [] Error while updating your conda environment in Ubuntu \u00b6 Problem You get the following error: CondaMultiError: CondaVerificationError: The package for tk located at /home/ubuntu/miniconda2/pkgs/tk-8.6.9-hed695b0_1003 appears to be corrupted. The path 'include/mysqlStubs.h' specified in the package manifest cannot be found. ClobberError: This transaction has incompatible packages due to a shared path. packages: conda-forge/linux-64::llvm-openmp-10.0.0-hc9558a2_0, anaconda/linux-64::intel-openmp-2019.4-243 path: 'lib/libiomp5.so' Solution Reinstall conda Embedded nul in string \u00b6 Problem You get the following error when downloading sensor data: Error in result_fetch ( res@ptr, n = n ) : embedded nul in string: Solution This problem is due to the way RMariaDB handles a mismatch between data types in R and MySQL (see this issue ). Since it seems this problem won\u2019t be handled by RMariaDB , you have two options: Remove the the null character from the conflictive table cell(s). You can adapt the following query on a MySQL server 8.0 or older update YOUR_TABLE set YOUR_COLUMN = regexp_replace ( YOUR_COLUMN , '\\0' , '' ); If it\u2019s not feasible to modify your data you can try swapping RMariaDB with RMySQL . Just have in mind you might have problems connecting to modern MySQL servers running in Linux: Add RMySQL to the renv environment by running the following command in a terminal open on RAPIDS root folder R -e 'renv::install(\"RMySQL\")' Go to src/data/streams/pull_phone_data.R or src/data/streams/pull_fitbit_data.R and replace library(RMariaDB) with library(RMySQL) In the same file(s) replace dbEngine <- dbConnect(MariaDB(), default.file = \"./.env\", group = group) with dbEngine <- dbConnect(MySQL(), default.file = \"./.env\", group = group) There is no package called RMariaDB \u00b6 Problem You get the following error when executing RAPIDS: Error in library ( RMariaDB ) : there is no package called 'RMariaDB' Execution halted Solution In RAPIDS v0.1.0 we replaced RMySQL R package with RMariaDB , this error means your R virtual environment is out of date, to update it run snakemake -j1 renv_restore Unrecognized output timezone \u201cAmerica/New_York\u201d \u00b6 Problem When running RAPIDS with R 4.0.3 on MacOS on M1, lubridate may throw an error associated with the timezone. Error in C_force_tz ( time, tz = tzone, roll ) : CCTZ: Unrecognized output timezone: \"America/New_York\" Calls: get_timestamp_filter ... .parse_date_time -> .strptime -> force_tz -> C_force_tz Solution This is because R timezone library is not set. Please add Sys.setenv(\u201cTZDIR\u201d = file.path(R.home(), \u201cshare\u201d, \u201czoneinfo\u201d)) to the file active.R in renv folder to set the timezone library. For further details on how to test if TZDIR is properly set, please refer to https://github.com/tidyverse/lubridate/issues/928#issuecomment-720059233 . Unimplemented MAX_NO_FIELD_TYPES \u00b6 Problem You get the following error when downloading Fitbit data: Error: Unimplemented MAX_NO_FIELD_TYPES Execution halted Solution At the moment RMariaDB cannot handle MySQL columns of JSON type. Change the type of your Fitbit data column to longtext (note that the content will not change and will still be a JSON object just interpreted as a string). Running RAPIDS on Apple Silicon M1 Mac \u00b6 Problem You get the following error when installing pandoc or running rapids: MoSHI/rapids/renv/staging/1/00LOCK-KernSmooth/00new/KernSmooth/libs/KernSmooth.so: mach-0, but wrong architecture Solution As of Feb 2020 in M1 macs, R needs to be installed via brew under Rosetta (x86 arch) due to some incompatibility with selected R libraries. To do this, run your terminal via Rosetta , then proceed with the usual brew installation command. x86 homebrew should be installed in /usr/local/bin/brew , you can check which brew you are using by typing which brew . Then use x86 homebrew to install R and restore RAPIDS packages ( renv_restore ).","title":"Common Errors"},{"location":"common-errors/#common-errors","text":"","title":"Common Errors"},{"location":"common-errors/#cannot-connect-to-your-mysql-server","text":"Problem **Error in .local ( drv, \\. .. ) :** **Failed to connect to database: Error: Can \\' t initialize character set unknown ( path: compiled \\_ in ) ** : Calls: dbConnect -> dbConnect -> .local -> .Call Execution halted [ Tue Mar 10 19 :40:15 2020 ] Error in rule download_dataset: jobid: 531 output: data/raw/p60/locations_raw.csv RuleException: CalledProcessError in line 20 of /home/ubuntu/rapids/rules/preprocessing.snakefile: Command 'set -euo pipefail; Rscript --vanilla /home/ubuntu/rapids/.snakemake/scripts/tmp_2jnvqs7.download_dataset.R' returned non-zero exit status 1 . File \"/home/ubuntu/rapids/rules/preprocessing.snakefile\" , line 20 , in __rule_download_dataset File \"/home/ubuntu/anaconda3/envs/moshi-env/lib/python3.7/concurrent/futures/thread.py\" , line 57 , in run Shutting down, this might take some time. Exiting because a job execution failed. Look above for error message Solution Please make sure the DATABASE_GROUP in config.yaml matches your DB credentials group in .env .","title":"Cannot connect to your MySQL server"},{"location":"common-errors/#cannot-start-mysql-in-linux-via-brew-services-start-mysql","text":"Problem Cannot start mysql in linux via brew services start mysql Solution Use mysql.server start","title":"Cannot start mysql in linux via brew services start mysql"},{"location":"common-errors/#every-time-i-run-force-the-download_dataset-rule-all-rules-are-executed","text":"Problem When running snakemake -j1 -R pull_phone_data or ./rapids -j1 -R pull_phone_data all the rules and files are re-computed Solution This is expected behavior. The advantage of using snakemake under the hood is that every time a file containing data is modified every rule that depends on that file will be re-executed to update their results. In this case, since download_dataset updates all the raw data, and you are forcing the rule with the flag -R every single rule that depends on those raw files will be executed.","title":"Every time I run force the download_dataset rule all rules are executed"},{"location":"common-errors/#error-table-xxx-doesnt-exist-while-running-the-download_phone_data-or-download_fitbit_data-rule","text":"Problem Error in .local ( conn, statement, ... ) : could not run statement: Table 'db_name.table_name' doesn ' t exist Calls: colnames ... .local -> dbSendQuery -> dbSendQuery -> .local -> .Call Execution halted Solution Please make sure the sensors listed in [PHONE_VALID_SENSED_BINS][PHONE_SENSORS] and the [CONTAINER] of each sensor you activated in config.yaml match your database tables or files.","title":"Error Table XXX doesn't exist while running the download_phone_data or download_fitbit_data rule."},{"location":"common-errors/#how-do-i-install-rapids-on-ubuntu-1604","text":"Solution Install dependencies (Homebrew - if not installed): sudo apt-get install libmariadb-client-lgpl-dev libxml2-dev libssl-dev Install brew for linux and add the following line to ~/.bashrc : export PATH=$HOME/.linuxbrew/bin:$PATH source ~/.bashrc Install MySQL brew install mysql brew services start mysql Install R, pandoc and rmarkdown: brew install r brew install gcc@6 (needed due to this bug ) HOMEBREW_CC=gcc-6 brew install pandoc Install miniconda using these instructions Clone our repo: git clone https://github.com/carissalow/rapids Create a python virtual environment: cd rapids conda env create -f environment.yml -n MY_ENV_NAME conda activate MY_ENV_NAME Install R packages and virtual environment: snakemake renv_install snakemake renv_init snakemake renv_restore This step could take several minutes to complete. Please be patient and let it run until completion.","title":"How do I install RAPIDS on Ubuntu 16.04"},{"location":"common-errors/#mysqlh-cannot-be-found","text":"Problem -------------------------- [ ERROR MESSAGE ] ---------------------------- <stdin>:1:10: fatal error: mysql.h: No such file or directory compilation terminated. ----------------------------------------------------------------------- ERROR: configuration failed for package 'RMySQL' Solution sudo apt install libmariadbclient-dev","title":"mysql.h cannot be found"},{"location":"common-errors/#no-package-libcurl-found","text":"Problem libcurl cannot be found Solution Install libcurl sudo apt install libcurl4-openssl-dev","title":"No package libcurl found"},{"location":"common-errors/#configuration-failed-because-openssl-was-not-found","text":"Problem openssl cannot be found Solution Install openssl sudo apt install libssl-dev","title":"Configuration failed because openssl was not found."},{"location":"common-errors/#configuration-failed-because-libxml-20-was-not-found","text":"Problem libxml-2.0 cannot be found Solution Install libxml-2.0 sudo apt install libxml2-dev","title":"Configuration failed because libxml-2.0 was not found"},{"location":"common-errors/#ssl-connection-error-when-running-rapids","text":"Problem You are getting the following error message when running RAPIDS: Error: Failed to connect: SSL connection error: error:1425F102:SSL routines:ssl_choose_client_version:unsupported protocol. Solution This is a bug in Ubuntu 20.04 when trying to connect to an old MySQL server with MySQL client 8.0. You should get the same error message if you try to connect from the command line. There you can add the option --ssl-mode=DISABLED but we can't do this from the R connector. If you can't update your server, the quickest solution would be to import your database to another server or to a local environment. Alternatively, you could replace mysql-client and libmysqlclient-dev with mariadb-client and libmariadbclient-dev and reinstall renv. More info about this issue here","title":"SSL connection error when running RAPIDS"},{"location":"common-errors/#db_tables-key-not-found","text":"Problem If you get the following error KeyError in line 43 of preprocessing.smk: 'PHONE_SENSORS' , it means that the indentation of the key [PHONE_SENSORS] is not matching the other child elements of PHONE_VALID_SENSED_BINS Solution You need to add or remove any leading whitespaces as needed on that line. PHONE_VALID_SENSED_BINS : COMPUTE : False # This flag is automatically ignored (set to True) if you are extracting PHONE_VALID_SENSED_DAYS or screen or Barnett's location features BIN_SIZE : &bin_size 5 # (in minutes) PHONE_SENSORS : []","title":"DB_TABLES key not found"},{"location":"common-errors/#error-while-updating-your-conda-environment-in-ubuntu","text":"Problem You get the following error: CondaMultiError: CondaVerificationError: The package for tk located at /home/ubuntu/miniconda2/pkgs/tk-8.6.9-hed695b0_1003 appears to be corrupted. The path 'include/mysqlStubs.h' specified in the package manifest cannot be found. ClobberError: This transaction has incompatible packages due to a shared path. packages: conda-forge/linux-64::llvm-openmp-10.0.0-hc9558a2_0, anaconda/linux-64::intel-openmp-2019.4-243 path: 'lib/libiomp5.so' Solution Reinstall conda","title":"Error while updating your conda environment in Ubuntu"},{"location":"common-errors/#embedded-nul-in-string","text":"Problem You get the following error when downloading sensor data: Error in result_fetch ( res@ptr, n = n ) : embedded nul in string: Solution This problem is due to the way RMariaDB handles a mismatch between data types in R and MySQL (see this issue ). Since it seems this problem won\u2019t be handled by RMariaDB , you have two options: Remove the the null character from the conflictive table cell(s). You can adapt the following query on a MySQL server 8.0 or older update YOUR_TABLE set YOUR_COLUMN = regexp_replace ( YOUR_COLUMN , '\\0' , '' ); If it\u2019s not feasible to modify your data you can try swapping RMariaDB with RMySQL . Just have in mind you might have problems connecting to modern MySQL servers running in Linux: Add RMySQL to the renv environment by running the following command in a terminal open on RAPIDS root folder R -e 'renv::install(\"RMySQL\")' Go to src/data/streams/pull_phone_data.R or src/data/streams/pull_fitbit_data.R and replace library(RMariaDB) with library(RMySQL) In the same file(s) replace dbEngine <- dbConnect(MariaDB(), default.file = \"./.env\", group = group) with dbEngine <- dbConnect(MySQL(), default.file = \"./.env\", group = group)","title":"Embedded nul in string"},{"location":"common-errors/#there-is-no-package-called-rmariadb","text":"Problem You get the following error when executing RAPIDS: Error in library ( RMariaDB ) : there is no package called 'RMariaDB' Execution halted Solution In RAPIDS v0.1.0 we replaced RMySQL R package with RMariaDB , this error means your R virtual environment is out of date, to update it run snakemake -j1 renv_restore","title":"There is no package called RMariaDB"},{"location":"common-errors/#unrecognized-output-timezone-americanew_york","text":"Problem When running RAPIDS with R 4.0.3 on MacOS on M1, lubridate may throw an error associated with the timezone. Error in C_force_tz ( time, tz = tzone, roll ) : CCTZ: Unrecognized output timezone: \"America/New_York\" Calls: get_timestamp_filter ... .parse_date_time -> .strptime -> force_tz -> C_force_tz Solution This is because R timezone library is not set. Please add Sys.setenv(\u201cTZDIR\u201d = file.path(R.home(), \u201cshare\u201d, \u201czoneinfo\u201d)) to the file active.R in renv folder to set the timezone library. For further details on how to test if TZDIR is properly set, please refer to https://github.com/tidyverse/lubridate/issues/928#issuecomment-720059233 .","title":"Unrecognized output timezone \"America/New_York\""},{"location":"common-errors/#unimplemented-max_no_field_types","text":"Problem You get the following error when downloading Fitbit data: Error: Unimplemented MAX_NO_FIELD_TYPES Execution halted Solution At the moment RMariaDB cannot handle MySQL columns of JSON type. Change the type of your Fitbit data column to longtext (note that the content will not change and will still be a JSON object just interpreted as a string).","title":"Unimplemented MAX_NO_FIELD_TYPES"},{"location":"common-errors/#running-rapids-on-apple-silicon-m1-mac","text":"Problem You get the following error when installing pandoc or running rapids: MoSHI/rapids/renv/staging/1/00LOCK-KernSmooth/00new/KernSmooth/libs/KernSmooth.so: mach-0, but wrong architecture Solution As of Feb 2020 in M1 macs, R needs to be installed via brew under Rosetta (x86 arch) due to some incompatibility with selected R libraries. To do this, run your terminal via Rosetta , then proceed with the usual brew installation command. x86 homebrew should be installed in /usr/local/bin/brew , you can check which brew you are using by typing which brew . Then use x86 homebrew to install R and restore RAPIDS packages ( renv_restore ).","title":"Running RAPIDS on Apple Silicon M1 Mac"},{"location":"migrating-from-old-versions/","text":"Migration guides \u00b6 Migrating from RAPIDS 0.4.x or older \u00b6 There are four actions that you need to take if you were using RAPIDS 0.4.3 or older ( before Feb 9 th , 2021 ): Check the new Overview page Check the new Overview page. Hopefully, it is a better overview of RAPIDS and provides answers to Frequently Asked Questions. Deploy RAPIDS in a new folder Clone RAPIDS 1.x in a new folder (do not pull the updates in your current folder) Activate your conda environment Install renv again snakemake -j1 renv_install (for Ubuntu take advantage of the platform specific R renv instructions ) Restore renv packages snakemake -j1 renv_restore (for Ubuntu take advantage of the platform specific R renv instructions ) Move your participant files pxx.yaml to the new folder Move your time segment files to the new folder Move your .env file to the new folder Migrate your .env file to the new credentials.yaml format The .env file is not used anymore, the same credential groups are stored in credentials.yaml , migrate your .env file by running: python tools/update_format_env.py Reconfigure your config.yaml Reconfigure your config.yaml file by hand (don\u2019t copy and paste the old one). Some keys and values changed but the defaults should be compatible with the things you know from RAPIDS 0.x (see below). The most relevant changes to RAPIDS that you need to know about are: We introduced the concept of data streams RAPIDS abstracts sensor data logged by different devices, platforms and stored in different data containers as data streams . The default data stream for PHONE is aware_mysql , and the default for FITBIT is fitbitjson_mysql . This is compatible with the old functionality (AWARE and JSON Fitbit data stored in MySQL). These values are set in [PHONE_DATA_STREAMS][USE] and [FITBIT_DATA_STREAMS][USE] . You can add new data stream formats (sensing apps) and containers (database engines, file types, etc.). If you were processing your Fitbit data either in JSON or plain text (parsed) format, and it was stored in MySQL or CSV files, the changes that you made to your raw data will be compatible. Just choose fitbitjson_mysql , fitbitparsed_mysql , fitbitjson_csv , fitbitparsed_csv accordingly and set it in [FITBIT_DATA_STREAMS][USE] . In the future, you will not have to change your raw data; you will be able to just change column mappings/values in the data stream\u2019s format.yaml file. We introduced multiple time zones You can now process data from participants that visited multiple time zones. The default is still a single time zone (America/New_York). See how to handle multiple time zones The keyword multiple is now infer When processing data from smartphones, RAPIDS allows you to infer the OS of a smartphone by using the keyword multiple in the [PLATFORM] key of participant files. Now RAPIDS uses infer instead of multiple Nonetheless, multiple still works for backward compatibility. A global DATABASE_GROUP does not exist anymore There is no global DATABASE_GROUP anymore. Each data stream that needs credentials to connect to a database has its own DATABASE_GROUP config key . The groups are defined in credentials.yaml instead of the .env . [DEVICE_SENSOR][TABLE] is now [DEVICE_SENSOR][CONTAINER] We renamed the keys [DEVICE_SENSOR][TABLE] to [DEVICE_SENSOR][CONTAINER] to reflect that, with the introduction of data streams, they can point to a database table, file, or any other data container. Creating participant files from the AWARE_DEVICE_TABLE is deprecated In previous versions of RAPIDS, you could create participant files automatically using the aware_device table. We deprecated this option but you can still achieve the same results if you export the output of the following SQL query as a CSV file and follow the instructions to create participant files from CSV files : SELECT device_id , device_id as fitbit_id , CONCAT ( \"p\" , _id ) as empatica_id , CONCAT ( \"p\" , _id ) as pid , if ( brand = \"iPhone\" , \"ios\" , \"android\" ) as platform , CONCAT ( \"p\" , _id ) as label , DATE_FORMAT ( FROM_UNIXTIME (( timestamp / 1000 ) - 86400 ), \"%Y-%m-%d\" ) as start_date , CURRENT_DATE as end_date from aware_device order by _id ; SCR_SCRIPT and SRC_LANGUAGE are replaced by SRC_SCRIPT The attributes SCR_SCRIPT and SRC_LANGUAGE of every sensor PROVIDER are replaced by SRC_SCRIPT . SRC_SCRIPT is a relative path from the RAPIDS root folder to that provider\u2019s feature script. We did this to simplify and clarify where the features scripts are stored. There are no actions to take unless you created your own feature provider; update it with your feature script path. Migrating from RAPIDS beta \u00b6 If you were relying on the old docs and the most recent version of RAPIDS you are working with is from or before Oct 13, 2020 you are using the beta version of RAPIDS. You can start using the RAPIDS 0.1.0 right away, just take into account the following: Deploy RAPIDS in a new folder Install a new copy of RAPIDS (the R and Python virtual environments didn\u2019t change so the cached versions will be reused) Make sure you don\u2019t skip a new Installation step to give execution permissions to the RAPIDS script: chmod +x rapids Move your old .env file Move your participant files Migrate your participant files You can migrate your old participant files to the new YAML format: python tools/update_format_participant_files.py Follow the new Configuration guide Follow the new Configuration guide Learn more about the new way to run RAPIDS Get familiar with the new way of Executing RAPIDS","title":"Migrating from an old version"},{"location":"migrating-from-old-versions/#migration-guides","text":"","title":"Migration guides"},{"location":"migrating-from-old-versions/#migrating-from-rapids-04x-or-older","text":"There are four actions that you need to take if you were using RAPIDS 0.4.3 or older ( before Feb 9 th , 2021 ): Check the new Overview page Check the new Overview page. Hopefully, it is a better overview of RAPIDS and provides answers to Frequently Asked Questions. Deploy RAPIDS in a new folder Clone RAPIDS 1.x in a new folder (do not pull the updates in your current folder) Activate your conda environment Install renv again snakemake -j1 renv_install (for Ubuntu take advantage of the platform specific R renv instructions ) Restore renv packages snakemake -j1 renv_restore (for Ubuntu take advantage of the platform specific R renv instructions ) Move your participant files pxx.yaml to the new folder Move your time segment files to the new folder Move your .env file to the new folder Migrate your .env file to the new credentials.yaml format The .env file is not used anymore, the same credential groups are stored in credentials.yaml , migrate your .env file by running: python tools/update_format_env.py Reconfigure your config.yaml Reconfigure your config.yaml file by hand (don\u2019t copy and paste the old one). Some keys and values changed but the defaults should be compatible with the things you know from RAPIDS 0.x (see below). The most relevant changes to RAPIDS that you need to know about are: We introduced the concept of data streams RAPIDS abstracts sensor data logged by different devices, platforms and stored in different data containers as data streams . The default data stream for PHONE is aware_mysql , and the default for FITBIT is fitbitjson_mysql . This is compatible with the old functionality (AWARE and JSON Fitbit data stored in MySQL). These values are set in [PHONE_DATA_STREAMS][USE] and [FITBIT_DATA_STREAMS][USE] . You can add new data stream formats (sensing apps) and containers (database engines, file types, etc.). If you were processing your Fitbit data either in JSON or plain text (parsed) format, and it was stored in MySQL or CSV files, the changes that you made to your raw data will be compatible. Just choose fitbitjson_mysql , fitbitparsed_mysql , fitbitjson_csv , fitbitparsed_csv accordingly and set it in [FITBIT_DATA_STREAMS][USE] . In the future, you will not have to change your raw data; you will be able to just change column mappings/values in the data stream\u2019s format.yaml file. We introduced multiple time zones You can now process data from participants that visited multiple time zones. The default is still a single time zone (America/New_York). See how to handle multiple time zones The keyword multiple is now infer When processing data from smartphones, RAPIDS allows you to infer the OS of a smartphone by using the keyword multiple in the [PLATFORM] key of participant files. Now RAPIDS uses infer instead of multiple Nonetheless, multiple still works for backward compatibility. A global DATABASE_GROUP does not exist anymore There is no global DATABASE_GROUP anymore. Each data stream that needs credentials to connect to a database has its own DATABASE_GROUP config key . The groups are defined in credentials.yaml instead of the .env . [DEVICE_SENSOR][TABLE] is now [DEVICE_SENSOR][CONTAINER] We renamed the keys [DEVICE_SENSOR][TABLE] to [DEVICE_SENSOR][CONTAINER] to reflect that, with the introduction of data streams, they can point to a database table, file, or any other data container. Creating participant files from the AWARE_DEVICE_TABLE is deprecated In previous versions of RAPIDS, you could create participant files automatically using the aware_device table. We deprecated this option but you can still achieve the same results if you export the output of the following SQL query as a CSV file and follow the instructions to create participant files from CSV files : SELECT device_id , device_id as fitbit_id , CONCAT ( \"p\" , _id ) as empatica_id , CONCAT ( \"p\" , _id ) as pid , if ( brand = \"iPhone\" , \"ios\" , \"android\" ) as platform , CONCAT ( \"p\" , _id ) as label , DATE_FORMAT ( FROM_UNIXTIME (( timestamp / 1000 ) - 86400 ), \"%Y-%m-%d\" ) as start_date , CURRENT_DATE as end_date from aware_device order by _id ; SCR_SCRIPT and SRC_LANGUAGE are replaced by SRC_SCRIPT The attributes SCR_SCRIPT and SRC_LANGUAGE of every sensor PROVIDER are replaced by SRC_SCRIPT . SRC_SCRIPT is a relative path from the RAPIDS root folder to that provider\u2019s feature script. We did this to simplify and clarify where the features scripts are stored. There are no actions to take unless you created your own feature provider; update it with your feature script path.","title":"Migrating from RAPIDS 0.4.x or older"},{"location":"migrating-from-old-versions/#migrating-from-rapids-beta","text":"If you were relying on the old docs and the most recent version of RAPIDS you are working with is from or before Oct 13, 2020 you are using the beta version of RAPIDS. You can start using the RAPIDS 0.1.0 right away, just take into account the following: Deploy RAPIDS in a new folder Install a new copy of RAPIDS (the R and Python virtual environments didn\u2019t change so the cached versions will be reused) Make sure you don\u2019t skip a new Installation step to give execution permissions to the RAPIDS script: chmod +x rapids Move your old .env file Move your participant files Migrate your participant files You can migrate your old participant files to the new YAML format: python tools/update_format_participant_files.py Follow the new Configuration guide Follow the new Configuration guide Learn more about the new way to run RAPIDS Get familiar with the new way of Executing RAPIDS","title":"Migrating from RAPIDS beta"},{"location":"team/","text":"RAPIDS Team \u00b6 If you are interested in contributing feel free to submit a pull request or contact us. Core Team \u00b6 Julio Vega (Designer and Lead Developer) \u00b6 About Julio Vega is a postdoctoral associate at the Mobile Sensing + Health Institute. He is interested in personalized methodologies to monitor chronic conditions that affect daily human behavior using mobile and wearable data. vegaju at upmc . edu Personal Website Meng Li \u00b6 About Meng Li received her Master of Science degree in Information Science from the University of Pittsburgh. She is interested in applying machine learning algorithms to the medical field. lim11 at upmc . edu Linkedin Profile Github Profile Abhineeth Reddy Kunta \u00b6 About Abhineeth Reddy Kunta is a Senior Software Engineer with the Mobile Sensing + Health Institute. He is experienced in software development and specializes in building solutions using machine learning. Abhineeth likes exploring ways to leverage technology in advancing medicine and education. Previously he worked as a Computer Programmer at Georgia Department of Public Health. He has a master\u2019s degree in Computer Science from George Mason University. Kwesi Aguillera \u00b6 About Kwesi Aguillera is currently in his first year at the University of Pittsburgh pursuing a Master of Sciences in Information Science specializing in Big Data Analytics. He received his Bachelor of Science degree in Computer Science and Management from the University of the West Indies. Kwesi considers himself a full stack developer and looks forward to applying this knowledge to big data analysis. Linkedin Profile Echhit Joshi \u00b6 About Echhit Joshi is a Masters student at the School of Computing and Information at University of Pittsburgh. His areas of interest are Machine/Deep Learning, Data Mining, and Analytics. Linkedin Profile Nicolas Leo \u00b6 About Nicolas is a rising senior studying computer science at the University of Pittsburgh. His academic interests include databases, machine learning, and application development. After completing his undergraduate degree, he plans to attend graduate school for a MS in Computer Science with a focus on Intelligent Systems. Nikunj Goel \u00b6 About Nik is a graduate student at the University of Pittsburgh pursuing Master of Science in Information Science. He earned his Bachelor of Technology degree in Information Technology from India. He is a Data Enthusiasts and passionate about finding the meaning out of raw data. In a long term, his goal is to create a breakthrough in Data Science and Deep Learning. Linkedin Profile Community Contributors \u00b6 Agam Kumar \u00b6 About Agam is a junior at Carnegie Mellon University studying Statistics and Machine Learning and pursuing an additional major in Computer Science. He is a member of the Data Science team in the Health and Human Performance Lab at CMU and has keen interests in software development and data science. His research interests include ML applications in medicine. Linkedin Profile Github Profile Yasaman S. Sefidgar \u00b6 About Linkedin Profile Joe Kim \u00b6 About Personal Website Brinnae Bent \u00b6 About Personal Website Stephen Price \u00b6 About Carnegie Mellon University Neil Singh \u00b6 About University of Virginia Advisors \u00b6 Afsaneh Doryab \u00b6 About Personal Website Carissa Low \u00b6 About Profile","title":"Team"},{"location":"team/#rapids-team","text":"If you are interested in contributing feel free to submit a pull request or contact us.","title":"RAPIDS Team"},{"location":"team/#core-team","text":"","title":"Core Team"},{"location":"team/#julio-vega-designer-and-lead-developer","text":"About Julio Vega is a postdoctoral associate at the Mobile Sensing + Health Institute. He is interested in personalized methodologies to monitor chronic conditions that affect daily human behavior using mobile and wearable data. vegaju at upmc . edu Personal Website","title":"Julio Vega (Designer and Lead Developer)"},{"location":"team/#meng-li","text":"About Meng Li received her Master of Science degree in Information Science from the University of Pittsburgh. She is interested in applying machine learning algorithms to the medical field. lim11 at upmc . edu Linkedin Profile Github Profile","title":"Meng Li"},{"location":"team/#abhineeth-reddy-kunta","text":"About Abhineeth Reddy Kunta is a Senior Software Engineer with the Mobile Sensing + Health Institute. He is experienced in software development and specializes in building solutions using machine learning. Abhineeth likes exploring ways to leverage technology in advancing medicine and education. Previously he worked as a Computer Programmer at Georgia Department of Public Health. He has a master\u2019s degree in Computer Science from George Mason University.","title":"Abhineeth Reddy Kunta"},{"location":"team/#kwesi-aguillera","text":"About Kwesi Aguillera is currently in his first year at the University of Pittsburgh pursuing a Master of Sciences in Information Science specializing in Big Data Analytics. He received his Bachelor of Science degree in Computer Science and Management from the University of the West Indies. Kwesi considers himself a full stack developer and looks forward to applying this knowledge to big data analysis. Linkedin Profile","title":"Kwesi Aguillera"},{"location":"team/#echhit-joshi","text":"About Echhit Joshi is a Masters student at the School of Computing and Information at University of Pittsburgh. His areas of interest are Machine/Deep Learning, Data Mining, and Analytics. Linkedin Profile","title":"Echhit Joshi"},{"location":"team/#nicolas-leo","text":"About Nicolas is a rising senior studying computer science at the University of Pittsburgh. His academic interests include databases, machine learning, and application development. After completing his undergraduate degree, he plans to attend graduate school for a MS in Computer Science with a focus on Intelligent Systems.","title":"Nicolas Leo"},{"location":"team/#nikunj-goel","text":"About Nik is a graduate student at the University of Pittsburgh pursuing Master of Science in Information Science. He earned his Bachelor of Technology degree in Information Technology from India. He is a Data Enthusiasts and passionate about finding the meaning out of raw data. In a long term, his goal is to create a breakthrough in Data Science and Deep Learning. Linkedin Profile","title":"Nikunj Goel"},{"location":"team/#community-contributors","text":"","title":"Community Contributors"},{"location":"team/#agam-kumar","text":"About Agam is a junior at Carnegie Mellon University studying Statistics and Machine Learning and pursuing an additional major in Computer Science. He is a member of the Data Science team in the Health and Human Performance Lab at CMU and has keen interests in software development and data science. His research interests include ML applications in medicine. Linkedin Profile Github Profile","title":"Agam Kumar"},{"location":"team/#yasaman-s-sefidgar","text":"About Linkedin Profile","title":"Yasaman S. Sefidgar"},{"location":"team/#joe-kim","text":"About Personal Website","title":"Joe Kim"},{"location":"team/#brinnae-bent","text":"About Personal Website","title":"Brinnae Bent"},{"location":"team/#stephen-price","text":"About Carnegie Mellon University","title":"Stephen Price"},{"location":"team/#neil-singh","text":"About University of Virginia","title":"Neil Singh"},{"location":"team/#advisors","text":"","title":"Advisors"},{"location":"team/#afsaneh-doryab","text":"About Personal Website","title":"Afsaneh Doryab"},{"location":"team/#carissa-low","text":"About Profile","title":"Carissa Low"},{"location":"datastreams/add-new-data-streams/","text":"Add New Data Streams \u00b6 A data stream is a set of sensor data collected using a specific type of device with a specific format and stored in a specific container . RAPIDS is agnostic to data streams\u2019 formats and container; see the Data Streams Introduction for a list of supported streams. A container is queried with an R or Python script that connects to the database, API or file where your stream\u2019s raw data is stored. A format is described using a format.yaml file that specifies how to map and mutate your stream\u2019s raw data to match the data and format RAPIDS needs. The most common cases when you would want to implement a new data stream are: You collected data with a mobile sensing app RAPIDS does not support yet. For example, Beiwe data stored in MySQL. You will need to define a new format file and a new container script. You collected data with a mobile sensing app RAPIDS supports, but this data is stored in a container that RAPIDS can\u2019t connect to yet. For example, AWARE data stored in PostgreSQL. In this case, you can reuse the format file of the aware_mysql stream, but you will need to implement a new container script. Hint Both the container.[R|py] and the format.yaml are stored in ./src/data/streams/[stream_name] where [stream_name] can be aware_mysql for example. Implement a Container \u00b6 The container script of a data stream can be implemented in R (strongly recommended) or python. This script must have two functions if you are implementing a stream for phone data or one function otherwise. The script can contain other auxiliary functions. First of all, add any parameters your script might need in config.yaml under (device)_DATA_STREAMS . These parameters will be available in the stream_parameters argument of the one or two functions you implement. For example, if you are adding support for Beiwe data stored in PostgreSQL and your container needs a set of credentials to connect to a database, your new data stream configuration would be: PHONE_DATA_STREAMS : USE : aware_python # AVAILABLE: aware_mysql : DATABASE_GROUP : MY_GROUP beiwe_postgresql : DATABASE_GROUP : MY_GROUP # users define this group (user, password, host, etc.) in credentials.yaml Then implement one or both of the following functions: pull_data This function returns the data columns for a specific sensor and participant. It has the following parameters: Param Description stream_parameters Any parameters (keys/values) set by the user in any [DEVICE_DATA_STREAMS][stream_name] key of config.yaml . For example, [DATABASE_GROUP] inside [FITBIT_DATA_STREAMS][fitbitjson_mysql] sensor_container The value set by the user in any [DEVICE_SENSOR][CONTAINER] key of config.yaml . It can be a table, file path, or whatever data source you want to support that contains the data from a single sensor for all participants . For example, [PHONE_ACCELEROMETER][CONTAINER] device The device id that you need to get the data for (this is set by the user in the participant files ). For example, in AWARE this device id is a uuid columns A list of the columns that you need to get from sensor_container . You specify these columns in your stream\u2019s format.yaml Example This is the pull_data function we implemented for aware_mysql . Note that we can message , warn or stop the user during execution. pull_data <- function ( stream_parameters , device , sensor_container , columns ){ # get_db_engine is an auxiliary function not shown here for brevity bu can be found in src/data/streams/aware_mysql/container.R dbEngine <- get_db_engine ( stream_parameters $ DATABASE_GROUP ) query <- paste0 ( \"SELECT \" , paste ( columns , collapse = \",\" ), \" FROM \" , sensor_container , \" WHERE device_id = '\" , device , \"'\" ) # Letting the user know what we are doing message ( paste0 ( \"Executing the following query to download data: \" , query )) sensor_data <- dbGetQuery ( dbEngine , query ) dbDisconnect ( dbEngine ) if ( nrow ( sensor_data ) == 0 ) warning ( paste ( \"The device '\" , device , \"' did not have data in \" , sensor_container )) return ( sensor_data ) } infer_device_os Warning This function is only necessary for phone data streams. RAPIDS allows users to use the keyword infer (previously multiple ) to automatically infer the mobile Operative System a phone was running. If you have a way to infer the OS of a device id, implement this function. For example, for AWARE data we use the aware_device table. If you don\u2019t have a way to infer the OS, call stop(\"Error Message\") so other users know they can\u2019t use infer or the inference failed, and they have to assign the OS manually in the participant file. This function returns the operative system ( android or ios ) for a specific phone device id. It has the following parameters: Param Description stream_parameters Any parameters (keys/values) set by the user in any [DEVICE_DATA_STREAMS][stream_name] key of config.yaml . For example, [DATABASE_GROUP] inside [FITBIT_DATA_STREAMS][fitbitjson_mysql] device The device id that you need to infer the OS for (this is set by the user in the participant files ). For example, in AWARE this device id is a uuid Example This is the infer_device_os function we implemented for aware_mysql . Note that we can message , warn or stop the user during execution. infer_device_os <- function ( stream_parameters , device ){ # get_db_engine is an auxiliary function not shown here for brevity bu can be found in src/data/streams/aware_mysql/container.R group <- stream_parameters $ DATABASE_GROUP dbEngine <- dbConnect ( MariaDB (), default.file = \"./.env\" , group = group ) query <- paste0 ( \"SELECT device_id,brand FROM aware_device WHERE device_id = '\" , device , \"'\" ) message ( paste0 ( \"Executing the following query to infer phone OS: \" , query )) os <- dbGetQuery ( dbEngine , query ) dbDisconnect ( dbEngine ) if ( nrow ( os ) > 0 ) return ( os %>% mutate ( os = ifelse ( brand == \"iPhone\" , \"ios\" , \"android\" )) %>% pull ( os )) else stop ( paste ( \"We cannot infer the OS of the following device id because it does not exist in the aware_device table:\" , device )) return ( os ) } Implement a Format \u00b6 A format file format.yaml describes the mapping between your stream\u2019s raw data and the data that RAPIDS needs. This file has a section per sensor (e.g. PHONE_ACCELEROMETER ), and each section has two attributes (keys): RAPIDS_COLUMN_MAPPINGS are mappings between the columns RAPIDS needs and the columns your raw data already has. The reserved keyword FLAG_TO_MUTATE flags columns that RAPIDS requires but that are not initially present in your container (database, CSV file). These columns have to be created by your mutation scripts. MUTATION . Sometimes your raw data needs to be transformed to match the format RAPIDS can handle (including creating columns marked as FLAG_TO_MUTATE ) COLUMN_MAPPINGS are mappings between the columns a mutation SCRIPT needs and the columns your raw data has. SCRIPTS are a collection of R or Python scripts that transform one or more raw data columns into the format RAPIDS needs. Hint [RAPIDS_COLUMN_MAPPINGS] and [MUTATE][COLUMN_MAPPINGS] have a key (left-hand side string) and a value (right-hand side string). The values are the names used to pulled columns from a container (e.g., columns in a database table). All values are renamed to their keys in lower case. The renamed columns are sent to every mutation script within the data argument, and the final output is the input RAPIDS process further. For example, let\u2019s assume we are implementing beiwe_mysql and defining the following format for PHONE_FAKESENSOR : PHONE_FAKESENSOR : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID MAGNITUDE_SQUARED : FLAG_TO_MUTATE MUTATE : COLUMN_MAPPINGS : MAGNITUDE : beiwe_value SCRIPTS : - src/data/streams/mutations/phone/square_magnitude.py RAPIDS will: Download beiwe_timestamp , beiwe_deviceID , and beiwe_value from the container of beiwe_mysql (MySQL DB) Rename these columns to timestamp , device_id , and magnitude , respectively. Execute square_magnitude.py with a data frame as an argument containing the renamed columns. This script will square magnitude and rename it to magnitude_squared Verify the data frame returned by square_magnitude.py has the columns RAPIDS needs timestamp , device_id , and magnitude_squared . Use this data frame as the input to be processed in the pipeline. Note that although RAPIDS_COLUMN_MAPPINGS and [MUTATE][COLUMN_MAPPINGS] keys are in capital letters for readability (e.g. MAGNITUDE_SQUARED ), the names of the final columns you mutate in your scripts should be lower case. Let\u2019s explain in more depth this column mapping with examples. Name mapping \u00b6 The mapping for some sensors is straightforward. For example, accelerometer data most of the time has a timestamp, three axes (x,y,z), and a device id that produced it. AWARE and a different sensing app like Beiwe likely logged accelerometer data in the same way but with different column names. In this case, we only need to match Beiwe data columns to RAPIDS columns one-to-one: PHONE_ACCELEROMETER : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID DOUBLE_VALUES_0 : beiwe_x DOUBLE_VALUES_1 : beiwe_y DOUBLE_VALUES_2 : beiwe_z MUTATE : COLUMN_MAPPINGS : SCRIPTS : # it's ok if this is empty Value mapping \u00b6 For some sensors, we need to map column names and values. For example, screen data has ON and OFF events; let\u2019s suppose Beiwe represents an ON event with the number 1, but RAPIDS identifies ON events with the number 2 . In this case, we need to mutate the raw data coming from Beiwe and replace all 1 s with 2 s. We do this by listing one or more R or Python scripts in MUTATION_SCRIPTS that will be executed in order. We usually store all mutation scripts under src/data/streams/mutations/[device]/[platform]/ and they can be reused across data streams. PHONE_SCREEN : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID EVENT : beiwe_event MUTATE : COLUMN_MAPPINGS : SCRIPTS : - src/data/streams/mutations/phone/beiwe/beiwe_screen_map.py Hint A MUTATION_SCRIPT can also be used to clean/preprocess your data before extracting behavioral features. A mutation script has to have a main function that receives two arguments, data and stream_parameters . The stream_parameters argument contains the config.yaml key/values of your data stream (this is the same argument that your container.[py|R] script receives, see Implement a Container ). python Example of a python mutation script import pandas as pd def main ( data , stream_parameters ): # mutate data return ( data ) R Example of a R mutation script source ( \"renv/activate.R\" ) # needed to use RAPIDS renv environment library ( dplyr ) main <- function ( data , stream_parameters ){ # mutate data return ( data ) } Complex mapping \u00b6 Sometimes, your raw data doesn\u2019t even have the same columns RAPIDS expects for a sensor. For example, let\u2019s pretend Beiwe stores PHONE_ACCELEROMETER axis data in a single column called acc_col instead of three. You have to create a MUTATION_SCRIPT to split acc_col into three columns x , y , and z . For this, you mark the three axes columns RAPIDS needs in [RAPIDS_COLUMN_MAPPINGS] with the word FLAG_TO_MUTATE , map acc_col in [MUTATION][COLUMN_MAPPINGS] , and list a Python script under [MUTATION][SCRIPTS] with the code to split acc_col . See an example below. RAPIDS expects that every column mapped as FLAG_TO_MUTATE will be generated by your mutation script, so it won\u2019t try to retrieve them from your container (database, CSV file, etc.). In our example, acc_col will be fetched from the stream\u2019s container and renamed to JOINED_AXES because beiwe_split_acc.py will split it into double_values_0 , double_values_1 , and double_values_2 . PHONE_ACCELEROMETER : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID DOUBLE_VALUES_0 : FLAG_TO_MUTATE DOUBLE_VALUES_1 : FLAG_TO_MUTATE DOUBLE_VALUES_2 : FLAG_TO_MUTATE MUTATE : COLUMN_MAPPINGS : JOINED_AXES : acc_col SCRIPTS : - src/data/streams/mutations/phone/beiwe/beiwe_split_acc.py This is a draft of beiwe_split_acc.py MUTATION_SCRIPT : import pandas as pd def main ( data , stream_parameters ): # data has the acc_col # split acc_col into three columns: double_values_0, double_values_1, double_values_2 to match RAPIDS format # remove acc_col since we don't need it anymore return ( data ) OS complex mapping \u00b6 There is a special case for a complex mapping scenario for smartphone data streams. The Android and iOS sensor APIs return data in different formats for certain sensors (like screen, activity recognition, battery, among others). In case you didn\u2019t notice, the examples we have used so far are grouped under an ANDROID key, which means they will be applied to data collected by Android phones. Additionally, each sensor has an IOS key for a similar purpose. We use the complex mapping described above to transform iOS data into an Android format (it\u2019s always iOS to Android and any new phone data stream must do the same). For example, this is the format.yaml key for PHONE_ACTVITY_RECOGNITION . Note that the ANDROID mapping is simple (one-to-one) but the IOS mapping is complex with three FLAG_TO_MUTATE columns, two [MUTATE][COLUMN_MAPPINGS] mappings, and one [MUTATION][SCRIPT] . PHONE_ACTIVITY_RECOGNITION : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : timestamp DEVICE_ID : device_id ACTIVITY_TYPE : activity_type ACTIVITY_NAME : activity_name CONFIDENCE : confidence MUTATION : COLUMN_MAPPINGS : SCRIPTS : IOS : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : timestamp DEVICE_ID : device_id ACTIVITY_TYPE : FLAG_TO_MUTATE ACTIVITY_NAME : FLAG_TO_MUTATE CONFIDENCE : FLAG_TO_MUTATE MUTATION : COLUMN_MAPPINGS : ACTIVITIES : activities CONFIDENCE : confidence SCRIPTS : - \"src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R\" Example activity_recogniton_ios_unification.R In this MUTATION_SCRIPT we create ACTIVITY_NAME and ACTIVITY_TYPE based on activities , and map confidence iOS values to Android values. source ( \"renv/activate.R\" ) library ( \"dplyr\" , warn.conflicts = F ) library ( stringr ) clean_ios_activity_column <- function ( ios_gar ){ ios_gar <- ios_gar %>% mutate ( activities = str_replace_all ( activities , pattern = '(\"|\\\\[|\\\\])' , replacement = \"\" )) existent_multiple_activities <- ios_gar %>% filter ( str_detect ( activities , \",\" )) %>% group_by ( activities ) %>% summarise ( mutiple_activities = unique ( activities ), .groups = \"drop_last\" ) %>% pull ( mutiple_activities ) known_multiple_activities <- c ( \"stationary,automotive\" ) unkown_multiple_actvities <- setdiff ( existent_multiple_activities , known_multiple_activities ) if ( length ( unkown_multiple_actvities ) > 0 ){ stop ( paste0 ( \"There are unkwown combinations of ios activities, you need to implement the decision of the ones to keep: \" , unkown_multiple_actvities )) } ios_gar <- ios_gar %>% mutate ( activities = str_replace_all ( activities , pattern = \"stationary,automotive\" , replacement = \"automotive\" )) return ( ios_gar ) } unify_ios_activity_recognition <- function ( ios_gar ){ # We only need to unify Google Activity Recognition data for iOS # discard rows where activities column is blank ios_gar <- ios_gar [ - which ( ios_gar $ activities == \"\" ), ] # clean \"activities\" column of ios_gar ios_gar <- clean_ios_activity_column ( ios_gar ) # make it compatible with android version: generate \"activity_name\" and \"activity_type\" columns ios_gar <- ios_gar %>% mutate ( activity_name = case_when ( activities == \"automotive\" ~ \"in_vehicle\" , activities == \"cycling\" ~ \"on_bicycle\" , activities == \"walking\" ~ \"walking\" , activities == \"running\" ~ \"running\" , activities == \"stationary\" ~ \"still\" ), activity_type = case_when ( activities == \"automotive\" ~ 0 , activities == \"cycling\" ~ 1 , activities == \"walking\" ~ 7 , activities == \"running\" ~ 8 , activities == \"stationary\" ~ 3 , activities == \"unknown\" ~ 4 ), confidence = case_when ( confidence == 0 ~ 0 , confidence == 1 ~ 50 , confidence == 2 ~ 100 ) ) %>% select ( - activities ) return ( ios_gar ) } main <- function ( data , stream_parameters ){ return ( unify_ios_activity_recognition ( data , stream_parameters )) }","title":"Add New Data Streams"},{"location":"datastreams/add-new-data-streams/#add-new-data-streams","text":"A data stream is a set of sensor data collected using a specific type of device with a specific format and stored in a specific container . RAPIDS is agnostic to data streams\u2019 formats and container; see the Data Streams Introduction for a list of supported streams. A container is queried with an R or Python script that connects to the database, API or file where your stream\u2019s raw data is stored. A format is described using a format.yaml file that specifies how to map and mutate your stream\u2019s raw data to match the data and format RAPIDS needs. The most common cases when you would want to implement a new data stream are: You collected data with a mobile sensing app RAPIDS does not support yet. For example, Beiwe data stored in MySQL. You will need to define a new format file and a new container script. You collected data with a mobile sensing app RAPIDS supports, but this data is stored in a container that RAPIDS can\u2019t connect to yet. For example, AWARE data stored in PostgreSQL. In this case, you can reuse the format file of the aware_mysql stream, but you will need to implement a new container script. Hint Both the container.[R|py] and the format.yaml are stored in ./src/data/streams/[stream_name] where [stream_name] can be aware_mysql for example.","title":"Add New Data Streams"},{"location":"datastreams/add-new-data-streams/#implement-a-container","text":"The container script of a data stream can be implemented in R (strongly recommended) or python. This script must have two functions if you are implementing a stream for phone data or one function otherwise. The script can contain other auxiliary functions. First of all, add any parameters your script might need in config.yaml under (device)_DATA_STREAMS . These parameters will be available in the stream_parameters argument of the one or two functions you implement. For example, if you are adding support for Beiwe data stored in PostgreSQL and your container needs a set of credentials to connect to a database, your new data stream configuration would be: PHONE_DATA_STREAMS : USE : aware_python # AVAILABLE: aware_mysql : DATABASE_GROUP : MY_GROUP beiwe_postgresql : DATABASE_GROUP : MY_GROUP # users define this group (user, password, host, etc.) in credentials.yaml Then implement one or both of the following functions: pull_data This function returns the data columns for a specific sensor and participant. It has the following parameters: Param Description stream_parameters Any parameters (keys/values) set by the user in any [DEVICE_DATA_STREAMS][stream_name] key of config.yaml . For example, [DATABASE_GROUP] inside [FITBIT_DATA_STREAMS][fitbitjson_mysql] sensor_container The value set by the user in any [DEVICE_SENSOR][CONTAINER] key of config.yaml . It can be a table, file path, or whatever data source you want to support that contains the data from a single sensor for all participants . For example, [PHONE_ACCELEROMETER][CONTAINER] device The device id that you need to get the data for (this is set by the user in the participant files ). For example, in AWARE this device id is a uuid columns A list of the columns that you need to get from sensor_container . You specify these columns in your stream\u2019s format.yaml Example This is the pull_data function we implemented for aware_mysql . Note that we can message , warn or stop the user during execution. pull_data <- function ( stream_parameters , device , sensor_container , columns ){ # get_db_engine is an auxiliary function not shown here for brevity bu can be found in src/data/streams/aware_mysql/container.R dbEngine <- get_db_engine ( stream_parameters $ DATABASE_GROUP ) query <- paste0 ( \"SELECT \" , paste ( columns , collapse = \",\" ), \" FROM \" , sensor_container , \" WHERE device_id = '\" , device , \"'\" ) # Letting the user know what we are doing message ( paste0 ( \"Executing the following query to download data: \" , query )) sensor_data <- dbGetQuery ( dbEngine , query ) dbDisconnect ( dbEngine ) if ( nrow ( sensor_data ) == 0 ) warning ( paste ( \"The device '\" , device , \"' did not have data in \" , sensor_container )) return ( sensor_data ) } infer_device_os Warning This function is only necessary for phone data streams. RAPIDS allows users to use the keyword infer (previously multiple ) to automatically infer the mobile Operative System a phone was running. If you have a way to infer the OS of a device id, implement this function. For example, for AWARE data we use the aware_device table. If you don\u2019t have a way to infer the OS, call stop(\"Error Message\") so other users know they can\u2019t use infer or the inference failed, and they have to assign the OS manually in the participant file. This function returns the operative system ( android or ios ) for a specific phone device id. It has the following parameters: Param Description stream_parameters Any parameters (keys/values) set by the user in any [DEVICE_DATA_STREAMS][stream_name] key of config.yaml . For example, [DATABASE_GROUP] inside [FITBIT_DATA_STREAMS][fitbitjson_mysql] device The device id that you need to infer the OS for (this is set by the user in the participant files ). For example, in AWARE this device id is a uuid Example This is the infer_device_os function we implemented for aware_mysql . Note that we can message , warn or stop the user during execution. infer_device_os <- function ( stream_parameters , device ){ # get_db_engine is an auxiliary function not shown here for brevity bu can be found in src/data/streams/aware_mysql/container.R group <- stream_parameters $ DATABASE_GROUP dbEngine <- dbConnect ( MariaDB (), default.file = \"./.env\" , group = group ) query <- paste0 ( \"SELECT device_id,brand FROM aware_device WHERE device_id = '\" , device , \"'\" ) message ( paste0 ( \"Executing the following query to infer phone OS: \" , query )) os <- dbGetQuery ( dbEngine , query ) dbDisconnect ( dbEngine ) if ( nrow ( os ) > 0 ) return ( os %>% mutate ( os = ifelse ( brand == \"iPhone\" , \"ios\" , \"android\" )) %>% pull ( os )) else stop ( paste ( \"We cannot infer the OS of the following device id because it does not exist in the aware_device table:\" , device )) return ( os ) }","title":"Implement a Container"},{"location":"datastreams/add-new-data-streams/#implement-a-format","text":"A format file format.yaml describes the mapping between your stream\u2019s raw data and the data that RAPIDS needs. This file has a section per sensor (e.g. PHONE_ACCELEROMETER ), and each section has two attributes (keys): RAPIDS_COLUMN_MAPPINGS are mappings between the columns RAPIDS needs and the columns your raw data already has. The reserved keyword FLAG_TO_MUTATE flags columns that RAPIDS requires but that are not initially present in your container (database, CSV file). These columns have to be created by your mutation scripts. MUTATION . Sometimes your raw data needs to be transformed to match the format RAPIDS can handle (including creating columns marked as FLAG_TO_MUTATE ) COLUMN_MAPPINGS are mappings between the columns a mutation SCRIPT needs and the columns your raw data has. SCRIPTS are a collection of R or Python scripts that transform one or more raw data columns into the format RAPIDS needs. Hint [RAPIDS_COLUMN_MAPPINGS] and [MUTATE][COLUMN_MAPPINGS] have a key (left-hand side string) and a value (right-hand side string). The values are the names used to pulled columns from a container (e.g., columns in a database table). All values are renamed to their keys in lower case. The renamed columns are sent to every mutation script within the data argument, and the final output is the input RAPIDS process further. For example, let\u2019s assume we are implementing beiwe_mysql and defining the following format for PHONE_FAKESENSOR : PHONE_FAKESENSOR : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID MAGNITUDE_SQUARED : FLAG_TO_MUTATE MUTATE : COLUMN_MAPPINGS : MAGNITUDE : beiwe_value SCRIPTS : - src/data/streams/mutations/phone/square_magnitude.py RAPIDS will: Download beiwe_timestamp , beiwe_deviceID , and beiwe_value from the container of beiwe_mysql (MySQL DB) Rename these columns to timestamp , device_id , and magnitude , respectively. Execute square_magnitude.py with a data frame as an argument containing the renamed columns. This script will square magnitude and rename it to magnitude_squared Verify the data frame returned by square_magnitude.py has the columns RAPIDS needs timestamp , device_id , and magnitude_squared . Use this data frame as the input to be processed in the pipeline. Note that although RAPIDS_COLUMN_MAPPINGS and [MUTATE][COLUMN_MAPPINGS] keys are in capital letters for readability (e.g. MAGNITUDE_SQUARED ), the names of the final columns you mutate in your scripts should be lower case. Let\u2019s explain in more depth this column mapping with examples.","title":"Implement a Format"},{"location":"datastreams/add-new-data-streams/#name-mapping","text":"The mapping for some sensors is straightforward. For example, accelerometer data most of the time has a timestamp, three axes (x,y,z), and a device id that produced it. AWARE and a different sensing app like Beiwe likely logged accelerometer data in the same way but with different column names. In this case, we only need to match Beiwe data columns to RAPIDS columns one-to-one: PHONE_ACCELEROMETER : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID DOUBLE_VALUES_0 : beiwe_x DOUBLE_VALUES_1 : beiwe_y DOUBLE_VALUES_2 : beiwe_z MUTATE : COLUMN_MAPPINGS : SCRIPTS : # it's ok if this is empty","title":"Name mapping"},{"location":"datastreams/add-new-data-streams/#value-mapping","text":"For some sensors, we need to map column names and values. For example, screen data has ON and OFF events; let\u2019s suppose Beiwe represents an ON event with the number 1, but RAPIDS identifies ON events with the number 2 . In this case, we need to mutate the raw data coming from Beiwe and replace all 1 s with 2 s. We do this by listing one or more R or Python scripts in MUTATION_SCRIPTS that will be executed in order. We usually store all mutation scripts under src/data/streams/mutations/[device]/[platform]/ and they can be reused across data streams. PHONE_SCREEN : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID EVENT : beiwe_event MUTATE : COLUMN_MAPPINGS : SCRIPTS : - src/data/streams/mutations/phone/beiwe/beiwe_screen_map.py Hint A MUTATION_SCRIPT can also be used to clean/preprocess your data before extracting behavioral features. A mutation script has to have a main function that receives two arguments, data and stream_parameters . The stream_parameters argument contains the config.yaml key/values of your data stream (this is the same argument that your container.[py|R] script receives, see Implement a Container ). python Example of a python mutation script import pandas as pd def main ( data , stream_parameters ): # mutate data return ( data ) R Example of a R mutation script source ( \"renv/activate.R\" ) # needed to use RAPIDS renv environment library ( dplyr ) main <- function ( data , stream_parameters ){ # mutate data return ( data ) }","title":"Value mapping"},{"location":"datastreams/add-new-data-streams/#complex-mapping","text":"Sometimes, your raw data doesn\u2019t even have the same columns RAPIDS expects for a sensor. For example, let\u2019s pretend Beiwe stores PHONE_ACCELEROMETER axis data in a single column called acc_col instead of three. You have to create a MUTATION_SCRIPT to split acc_col into three columns x , y , and z . For this, you mark the three axes columns RAPIDS needs in [RAPIDS_COLUMN_MAPPINGS] with the word FLAG_TO_MUTATE , map acc_col in [MUTATION][COLUMN_MAPPINGS] , and list a Python script under [MUTATION][SCRIPTS] with the code to split acc_col . See an example below. RAPIDS expects that every column mapped as FLAG_TO_MUTATE will be generated by your mutation script, so it won\u2019t try to retrieve them from your container (database, CSV file, etc.). In our example, acc_col will be fetched from the stream\u2019s container and renamed to JOINED_AXES because beiwe_split_acc.py will split it into double_values_0 , double_values_1 , and double_values_2 . PHONE_ACCELEROMETER : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : beiwe_timestamp DEVICE_ID : beiwe_deviceID DOUBLE_VALUES_0 : FLAG_TO_MUTATE DOUBLE_VALUES_1 : FLAG_TO_MUTATE DOUBLE_VALUES_2 : FLAG_TO_MUTATE MUTATE : COLUMN_MAPPINGS : JOINED_AXES : acc_col SCRIPTS : - src/data/streams/mutations/phone/beiwe/beiwe_split_acc.py This is a draft of beiwe_split_acc.py MUTATION_SCRIPT : import pandas as pd def main ( data , stream_parameters ): # data has the acc_col # split acc_col into three columns: double_values_0, double_values_1, double_values_2 to match RAPIDS format # remove acc_col since we don't need it anymore return ( data )","title":"Complex mapping"},{"location":"datastreams/add-new-data-streams/#os-complex-mapping","text":"There is a special case for a complex mapping scenario for smartphone data streams. The Android and iOS sensor APIs return data in different formats for certain sensors (like screen, activity recognition, battery, among others). In case you didn\u2019t notice, the examples we have used so far are grouped under an ANDROID key, which means they will be applied to data collected by Android phones. Additionally, each sensor has an IOS key for a similar purpose. We use the complex mapping described above to transform iOS data into an Android format (it\u2019s always iOS to Android and any new phone data stream must do the same). For example, this is the format.yaml key for PHONE_ACTVITY_RECOGNITION . Note that the ANDROID mapping is simple (one-to-one) but the IOS mapping is complex with three FLAG_TO_MUTATE columns, two [MUTATE][COLUMN_MAPPINGS] mappings, and one [MUTATION][SCRIPT] . PHONE_ACTIVITY_RECOGNITION : ANDROID : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : timestamp DEVICE_ID : device_id ACTIVITY_TYPE : activity_type ACTIVITY_NAME : activity_name CONFIDENCE : confidence MUTATION : COLUMN_MAPPINGS : SCRIPTS : IOS : RAPIDS_COLUMN_MAPPINGS : TIMESTAMP : timestamp DEVICE_ID : device_id ACTIVITY_TYPE : FLAG_TO_MUTATE ACTIVITY_NAME : FLAG_TO_MUTATE CONFIDENCE : FLAG_TO_MUTATE MUTATION : COLUMN_MAPPINGS : ACTIVITIES : activities CONFIDENCE : confidence SCRIPTS : - \"src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R\" Example activity_recogniton_ios_unification.R In this MUTATION_SCRIPT we create ACTIVITY_NAME and ACTIVITY_TYPE based on activities , and map confidence iOS values to Android values. source ( \"renv/activate.R\" ) library ( \"dplyr\" , warn.conflicts = F ) library ( stringr ) clean_ios_activity_column <- function ( ios_gar ){ ios_gar <- ios_gar %>% mutate ( activities = str_replace_all ( activities , pattern = '(\"|\\\\[|\\\\])' , replacement = \"\" )) existent_multiple_activities <- ios_gar %>% filter ( str_detect ( activities , \",\" )) %>% group_by ( activities ) %>% summarise ( mutiple_activities = unique ( activities ), .groups = \"drop_last\" ) %>% pull ( mutiple_activities ) known_multiple_activities <- c ( \"stationary,automotive\" ) unkown_multiple_actvities <- setdiff ( existent_multiple_activities , known_multiple_activities ) if ( length ( unkown_multiple_actvities ) > 0 ){ stop ( paste0 ( \"There are unkwown combinations of ios activities, you need to implement the decision of the ones to keep: \" , unkown_multiple_actvities )) } ios_gar <- ios_gar %>% mutate ( activities = str_replace_all ( activities , pattern = \"stationary,automotive\" , replacement = \"automotive\" )) return ( ios_gar ) } unify_ios_activity_recognition <- function ( ios_gar ){ # We only need to unify Google Activity Recognition data for iOS # discard rows where activities column is blank ios_gar <- ios_gar [ - which ( ios_gar $ activities == \"\" ), ] # clean \"activities\" column of ios_gar ios_gar <- clean_ios_activity_column ( ios_gar ) # make it compatible with android version: generate \"activity_name\" and \"activity_type\" columns ios_gar <- ios_gar %>% mutate ( activity_name = case_when ( activities == \"automotive\" ~ \"in_vehicle\" , activities == \"cycling\" ~ \"on_bicycle\" , activities == \"walking\" ~ \"walking\" , activities == \"running\" ~ \"running\" , activities == \"stationary\" ~ \"still\" ), activity_type = case_when ( activities == \"automotive\" ~ 0 , activities == \"cycling\" ~ 1 , activities == \"walking\" ~ 7 , activities == \"running\" ~ 8 , activities == \"stationary\" ~ 3 , activities == \"unknown\" ~ 4 ), confidence = case_when ( confidence == 0 ~ 0 , confidence == 1 ~ 50 , confidence == 2 ~ 100 ) ) %>% select ( - activities ) return ( ios_gar ) } main <- function ( data , stream_parameters ){ return ( unify_ios_activity_recognition ( data , stream_parameters )) }","title":"OS complex mapping"},{"location":"datastreams/aware-csv/","text":"aware_csv \u00b6 This data stream handles iOS and Android sensor data collected with the AWARE Framework and stored in CSV files. Warning The CSV files have to use , as separator, \\ as escape character (do not escape \" with \"\" ), and wrap any string columns with \" . See examples in the CSV files inside rapids_example_csv.zip Example of a valid CSV file \"_id\",\"timestamp\",\"device_id\",\"activities\",\"confidence\",\"stationary\",\"walking\",\"running\",\"automotive\",\"cycling\",\"unknown\",\"label\" 1,1587528000000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"\" 2,1587528060000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 3,1587528120000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 4,1587528180000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 5,1587528240000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 6,1587528300000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 7,1587528360000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" Container \u00b6 A CSV file per sensor, each containing the data for all participants. The script to connect and download data from this container is at: src/data/streams/aware_csv/container.R Format \u00b6 If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"aware_csv"},{"location":"datastreams/aware-csv/#aware_csv","text":"This data stream handles iOS and Android sensor data collected with the AWARE Framework and stored in CSV files. Warning The CSV files have to use , as separator, \\ as escape character (do not escape \" with \"\" ), and wrap any string columns with \" . See examples in the CSV files inside rapids_example_csv.zip Example of a valid CSV file \"_id\",\"timestamp\",\"device_id\",\"activities\",\"confidence\",\"stationary\",\"walking\",\"running\",\"automotive\",\"cycling\",\"unknown\",\"label\" 1,1587528000000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"\" 2,1587528060000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 3,1587528120000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 4,1587528180000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 5,1587528240000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 6,1587528300000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\" 7,1587528360000,\"13dbc8a3-dae3-4834-823a-4bc96a7d459d\",\"[\\\"stationary\\\"]\",2,1,0,0,0,0,0,\"supplement\"","title":"aware_csv"},{"location":"datastreams/aware-csv/#container","text":"A CSV file per sensor, each containing the data for all participants. The script to connect and download data from this container is at: src/data/streams/aware_csv/container.R","title":"Container"},{"location":"datastreams/aware-csv/#format","text":"If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"Format"},{"location":"datastreams/aware-influxdb/","text":"aware_influxdb (beta) \u00b6 Warning This data stream is being released in beta while we test it thoroughly. This data stream handles iOS and Android sensor data collected with the AWARE Framework and stored in an InfluxDB database. Container \u00b6 An InfluxDB database with a table per sensor, each containing the data for all participants. The script to connect and download data from this container is at: src/data/streams/aware_influxdb/container.R Format \u00b6 If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"aware_influxdb (beta)"},{"location":"datastreams/aware-influxdb/#aware_influxdb-beta","text":"Warning This data stream is being released in beta while we test it thoroughly. This data stream handles iOS and Android sensor data collected with the AWARE Framework and stored in an InfluxDB database.","title":"aware_influxdb (beta)"},{"location":"datastreams/aware-influxdb/#container","text":"An InfluxDB database with a table per sensor, each containing the data for all participants. The script to connect and download data from this container is at: src/data/streams/aware_influxdb/container.R","title":"Container"},{"location":"datastreams/aware-influxdb/#format","text":"If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"Format"},{"location":"datastreams/aware-mysql/","text":"aware_mysql \u00b6 This data stream handles iOS and Android sensor data collected with the AWARE Framework and stored in a MySQL database. Container \u00b6 A MySQL database with a table per sensor, each containing the data for all participants. This is the default database created by the old PHP AWARE server (as opposed to the new JavaScript Micro server). The script to connect and download data from this container is at: src/data/streams/aware_mysql/container.R Format \u00b6 If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"aware_mysql"},{"location":"datastreams/aware-mysql/#aware_mysql","text":"This data stream handles iOS and Android sensor data collected with the AWARE Framework and stored in a MySQL database.","title":"aware_mysql"},{"location":"datastreams/aware-mysql/#container","text":"A MySQL database with a table per sensor, each containing the data for all participants. This is the default database created by the old PHP AWARE server (as opposed to the new JavaScript Micro server). The script to connect and download data from this container is at: src/data/streams/aware_mysql/container.R","title":"Container"},{"location":"datastreams/aware-mysql/#format","text":"If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"Format"},{"location":"datastreams/data-streams-introduction/","text":"Data Streams Introduction \u00b6 A data stream is a set of sensor data collected using a specific type of device with a specific format and stored in a specific container . For example, the aware_mysql data stream handles smartphone data ( device ) collected with the AWARE Framework ( format ) stored in a MySQL database ( container ). Similarly, smartphone data collected with Beiwe will have a different format and could be stored in a container like a PostgreSQL database or a CSV file. If you want to process a data stream using RAPIDS, make sure that your data is stored in a supported format and container (see table below). If RAPIDS doesn\u2019t support your data stream yet (e.g. Beiwe data stored in PostgreSQL, or AWARE data stored in SQLite), you can always implement a new data stream . If it\u2019s something you think other people might be interested on, we will be happy to include your new data stream in RAPIDS, so get in touch!. Hint Currently, you can add new data streams for smartphones, Fitbit, and Empatica devices. If you need RAPIDS to process data from other devices , like Oura Rings or Actigraph wearables, get in touch. It is a more complicated process that could take a couple of days to implement for someone familiar with R or Python, but we would be happy to work on it together. For reference, these are the data streams we currently support: Data Stream Device Format Container Docs aware_mysql Phone AWARE app MySQL link aware_csv Phone AWARE app CSV files link aware_influxdb (beta) Phone AWARE app InfluxDB link fitbitjson_mysql Fitbit JSON (per Fitbit\u2019s API ) MySQL link fitbitjson_csv Fitbit JSON (per Fitbit\u2019s API ) CSV files link fitbitparsed_mysql Fitbit Parsed (parsed API data) MySQL link fitbitparsed_csv Fitbit Parsed (parsed API data) CSV files link empatica_zip Empatica E4 Connect ZIP files link","title":"Introduction"},{"location":"datastreams/data-streams-introduction/#data-streams-introduction","text":"A data stream is a set of sensor data collected using a specific type of device with a specific format and stored in a specific container . For example, the aware_mysql data stream handles smartphone data ( device ) collected with the AWARE Framework ( format ) stored in a MySQL database ( container ). Similarly, smartphone data collected with Beiwe will have a different format and could be stored in a container like a PostgreSQL database or a CSV file. If you want to process a data stream using RAPIDS, make sure that your data is stored in a supported format and container (see table below). If RAPIDS doesn\u2019t support your data stream yet (e.g. Beiwe data stored in PostgreSQL, or AWARE data stored in SQLite), you can always implement a new data stream . If it\u2019s something you think other people might be interested on, we will be happy to include your new data stream in RAPIDS, so get in touch!. Hint Currently, you can add new data streams for smartphones, Fitbit, and Empatica devices. If you need RAPIDS to process data from other devices , like Oura Rings or Actigraph wearables, get in touch. It is a more complicated process that could take a couple of days to implement for someone familiar with R or Python, but we would be happy to work on it together. For reference, these are the data streams we currently support: Data Stream Device Format Container Docs aware_mysql Phone AWARE app MySQL link aware_csv Phone AWARE app CSV files link aware_influxdb (beta) Phone AWARE app InfluxDB link fitbitjson_mysql Fitbit JSON (per Fitbit\u2019s API ) MySQL link fitbitjson_csv Fitbit JSON (per Fitbit\u2019s API ) CSV files link fitbitparsed_mysql Fitbit Parsed (parsed API data) MySQL link fitbitparsed_csv Fitbit Parsed (parsed API data) CSV files link empatica_zip Empatica E4 Connect ZIP files link","title":"Data Streams Introduction"},{"location":"datastreams/empatica-zip/","text":"empatica_zip \u00b6 This data stream handles Empatica sensor data downloaded as zip files using the E4 Connect . Container \u00b6 You need to create a subfolder for every participant named after their device id inside the folder specified by [EMPATICA_DATA_STREAMS][empatica_zipfiles][FOLDER] . You can add one or more Empatica zip files to any subfolder. The script to connect and download data from this container is at: src/data/streams/empatica_zip/container.R Format \u00b6 The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Empatica sensors . This file is at: src/data/streams/empatica_zip/format.yaml All columns are mutated from the raw data in the zip files so you don\u2019t need to modify any column mappings. EMPATICA_ACCELEROMETER RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_HEARTRATE RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id HEARTRATE heartrate MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_TEMPERATURE RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id TEMPERATURE temperature MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_ELECTRODERMAL_ACTIVITY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ELECTRODERMAL_ACTIVITY electrodermal_activity MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_BLOOD_VOLUME_PULSE RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BLOOD_VOLUME_PULSE blood_volume_pulse MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_INTER_BEAT_INTERVAL RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id INTER_BEAT_INTERVAL inter_beat_interval MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_EMPATICA_TAGS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id TAGS tags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None)","title":"empatica_zip"},{"location":"datastreams/empatica-zip/#empatica_zip","text":"This data stream handles Empatica sensor data downloaded as zip files using the E4 Connect .","title":"empatica_zip"},{"location":"datastreams/empatica-zip/#container","text":"You need to create a subfolder for every participant named after their device id inside the folder specified by [EMPATICA_DATA_STREAMS][empatica_zipfiles][FOLDER] . You can add one or more Empatica zip files to any subfolder. The script to connect and download data from this container is at: src/data/streams/empatica_zip/container.R","title":"Container"},{"location":"datastreams/empatica-zip/#format","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Empatica sensors . This file is at: src/data/streams/empatica_zip/format.yaml All columns are mutated from the raw data in the zip files so you don\u2019t need to modify any column mappings. EMPATICA_ACCELEROMETER RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_HEARTRATE RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id HEARTRATE heartrate MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_TEMPERATURE RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id TEMPERATURE temperature MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_ELECTRODERMAL_ACTIVITY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ELECTRODERMAL_ACTIVITY electrodermal_activity MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_BLOOD_VOLUME_PULSE RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BLOOD_VOLUME_PULSE blood_volume_pulse MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_INTER_BEAT_INTERVAL RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id INTER_BEAT_INTERVAL inter_beat_interval MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) EMPATICA_EMPATICA_TAGS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id TAGS tags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None)","title":"Format"},{"location":"datastreams/fitbitjson-csv/","text":"fitbitjson_csv \u00b6 This data stream handles Fitbit sensor data downloaded using the Fitbit Web API and stored in a CSV file. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your sensor data in a CSV file, RAPIDS can process it. Warning The CSV files have to use , as separator, \\ as escape character (do not escape \" with \"\" ), and wrap any string columns with \" . Example of a valid CSV file \"timestamp\",\"device_id\",\"label\",\"fitbit_id\",\"fitbit_data_type\",\"fitbit_data\" 1587614400000,\"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",\"5S\",\"5ZKN9B\",\"steps\",\"{\\\"activities-steps\\\":[{\\\"dateTime\\\":\\\"2020-04-23\\\",\\\"value\\\":\\\"7881\\\"}]\" Container \u00b6 The container should be a CSV file per Fitbit sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitjson_csv/container.R Format \u00b6 The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitjson_csv/format.yaml If you want RAPIDS to process Fitbit sensor data using this stream, you will need to map DEVICE_ID and JSON_FITBIT_COLUMN to your own raw data columns inside each sensor section in format.yaml . FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESOUTOFRANGE FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESFATBURN FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESCARDIO FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESPEAK FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE FLAG_TO_MUTATE HEARTRATE_ZONE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE LOCAL_START_DATE_TIME FLAG_TO_MUTATE LOCAL_END_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id EFFICIENCY FLAG_TO_MUTATE MINUTES_AFTER_WAKEUP FLAG_TO_MUTATE MINUTES_ASLEEP FLAG_TO_MUTATE MINUTES_AWAKE FLAG_TO_MUTATE MINUTES_TO_FALL_ASLEEP FLAG_TO_MUTATE MINUTES_IN_BED FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_summary_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id TYPE_EPISODE_ID FLAG_TO_MUTATE DURATION FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE LEVEL FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2, we support both. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_summary_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_intraday_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API . See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}}","title":"fitbitjson_csv"},{"location":"datastreams/fitbitjson-csv/#fitbitjson_csv","text":"This data stream handles Fitbit sensor data downloaded using the Fitbit Web API and stored in a CSV file. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your sensor data in a CSV file, RAPIDS can process it. Warning The CSV files have to use , as separator, \\ as escape character (do not escape \" with \"\" ), and wrap any string columns with \" . Example of a valid CSV file \"timestamp\",\"device_id\",\"label\",\"fitbit_id\",\"fitbit_data_type\",\"fitbit_data\" 1587614400000,\"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",\"5S\",\"5ZKN9B\",\"steps\",\"{\\\"activities-steps\\\":[{\\\"dateTime\\\":\\\"2020-04-23\\\",\\\"value\\\":\\\"7881\\\"}]\"","title":"fitbitjson_csv"},{"location":"datastreams/fitbitjson-csv/#container","text":"The container should be a CSV file per Fitbit sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitjson_csv/container.R","title":"Container"},{"location":"datastreams/fitbitjson-csv/#format","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitjson_csv/format.yaml If you want RAPIDS to process Fitbit sensor data using this stream, you will need to map DEVICE_ID and JSON_FITBIT_COLUMN to your own raw data columns inside each sensor section in format.yaml . FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESOUTOFRANGE FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESFATBURN FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESCARDIO FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESPEAK FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE FLAG_TO_MUTATE HEARTRATE_ZONE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE LOCAL_START_DATE_TIME FLAG_TO_MUTATE LOCAL_END_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id EFFICIENCY FLAG_TO_MUTATE MINUTES_AFTER_WAKEUP FLAG_TO_MUTATE MINUTES_ASLEEP FLAG_TO_MUTATE MINUTES_AWAKE FLAG_TO_MUTATE MINUTES_TO_FALL_ASLEEP FLAG_TO_MUTATE MINUTES_IN_BED FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_summary_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id TYPE_EPISODE_ID FLAG_TO_MUTATE DURATION FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE LEVEL FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2, we support both. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_summary_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_intraday_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API . See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}}","title":"Format"},{"location":"datastreams/fitbitjson-mysql/","text":"fitbitjson_mysql \u00b6 This data stream handles Fitbit sensor data downloaded using the Fitbit Web API and stored in a MySQL database. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your sensor data in a MySQL database, RAPIDS can process it. Container \u00b6 The container should be a MySQL database with a table per sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitjson_mysql/container.R Format \u00b6 The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitjson_csv/format.yaml If you want RAPIDS to process Fitbit sensor data using this stream, you will need to map DEVICE_ID and JSON_FITBIT_COLUMN to your own raw data columns inside each sensor section in format.yaml . FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESOUTOFRANGE FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESFATBURN FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESCARDIO FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESPEAK FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE FLAG_TO_MUTATE HEARTRATE_ZONE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE LOCAL_START_DATE_TIME FLAG_TO_MUTATE LOCAL_END_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id EFFICIENCY FLAG_TO_MUTATE MINUTES_AFTER_WAKEUP FLAG_TO_MUTATE MINUTES_ASLEEP FLAG_TO_MUTATE MINUTES_AWAKE FLAG_TO_MUTATE MINUTES_TO_FALL_ASLEEP FLAG_TO_MUTATE MINUTES_IN_BED FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_summary_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id TYPE_EPISODE_ID FLAG_TO_MUTATE DURATION FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE LEVEL FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2, we support both. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_summary_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_intraday_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API . See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}}","title":"fitbitjson_mysql"},{"location":"datastreams/fitbitjson-mysql/#fitbitjson_mysql","text":"This data stream handles Fitbit sensor data downloaded using the Fitbit Web API and stored in a MySQL database. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your sensor data in a MySQL database, RAPIDS can process it.","title":"fitbitjson_mysql"},{"location":"datastreams/fitbitjson-mysql/#container","text":"The container should be a MySQL database with a table per sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitjson_mysql/container.R","title":"Container"},{"location":"datastreams/fitbitjson-mysql/#format","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitjson_csv/format.yaml If you want RAPIDS to process Fitbit sensor data using this stream, you will need to map DEVICE_ID and JSON_FITBIT_COLUMN to your own raw data columns inside each sensor section in format.yaml . FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESOUTOFRANGE FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESFATBURN FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESCARDIO FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESPEAK FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE FLAG_TO_MUTATE HEARTRATE_ZONE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE LOCAL_START_DATE_TIME FLAG_TO_MUTATE LOCAL_END_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id EFFICIENCY FLAG_TO_MUTATE MINUTES_AFTER_WAKEUP FLAG_TO_MUTATE MINUTES_ASLEEP FLAG_TO_MUTATE MINUTES_AWAKE FLAG_TO_MUTATE MINUTES_TO_FALL_ASLEEP FLAG_TO_MUTATE MINUTES_IN_BED FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_summary_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id TYPE_EPISODE_ID FLAG_TO_MUTATE DURATION FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE LEVEL FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2, we support both. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_summary_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_intraday_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API . See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}}","title":"Format"},{"location":"datastreams/fitbitparsed-csv/","text":"fitbitparsed_csv \u00b6 This data stream handles Fitbit sensor data downloaded using the Fitbit Web API , parsed , and stored in a CSV file. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your parsed sensor data in a CSV file, RAPIDS can process it. What is the difference between JSON and plain data streams Most people will only need fitbitjson_* because they downloaded and stored their data directly from Fitbit\u2019s API. However, if, for some reason, you don\u2019t have access to that JSON data and instead only have the parsed data (columns and rows), you can use this data stream. Warning The CSV files have to use , as separator, \\ as escape character (do not escape \" with \"\" ), and wrap any string columns with \" . Example of a valid CSV file \"device_id\",\"heartrate\",\"heartrate_zone\",\"local_date_time\",\"timestamp\" \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",69,\"outofrange\",\"2020-04-23 00:00:00\",0 \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",69,\"outofrange\",\"2020-04-23 00:01:00\",0 \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",67,\"outofrange\",\"2020-04-23 00:02:00\",0 \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",69,\"outofrange\",\"2020-04-23 00:03:00\",0 Container \u00b6 The container should be a CSV file per sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitparsed_csv/container.R Format \u00b6 The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitparsed_mysql/format.yaml If you want to use this stream with your data, modify every sensor in format.yaml to map all columns except TIMESTAMP in [RAPIDS_COLUMN_MAPPINGS] to your raw data column names. All columns are mandatory; however, all except device_id and local_date_time can be empty if you don\u2019t have that data. Just have in mind that some features will be empty if some of these columns are empty. FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR heartrate_daily_restinghr HEARTRATE_DAILY_CALORIESOUTOFRANGE heartrate_daily_caloriesoutofrange HEARTRATE_DAILY_CALORIESFATBURN heartrate_daily_caloriesfatburn HEARTRATE_DAILY_CALORIESCARDIO heartrate_daily_caloriescardio HEARTRATE_DAILY_CALORIESPEAK heartrate_daily_caloriespeak MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate_daily_restinghr heartrate_daily_caloriesoutofrange heartrate_daily_caloriesfatburn heartrate_daily_caloriescardio heartrate_daily_caloriespeak a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 72 1200.6102 760.3020 15.2048 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 70 1100.1120 660.0012 23.7088 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 69 750.3615 734.1516 131.8579 0 FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE heartrate HEARTRATE_ZONE heartrate_zone MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate heartrate_zone a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 68 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 67 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 67 outofrange FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time LOCAL_START_DATE_TIME local_start_date_time LOCAL_END_DATE_TIME local_end_date_time DEVICE_ID device_id EFFICIENCY efficiency MINUTES_AFTER_WAKEUP minutes_after_wakeup MINUTES_ASLEEP minutes_asleep MINUTES_AWAKE minutes_awake MINUTES_TO_FALL_ASLEEP minutes_to_fall_asleep MINUTES_IN_BED minutes_in_bed IS_MAIN_SLEEP is_main_sleep TYPE type MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. Example of the expected raw data device_id local_start_date_time local_end_date_time efficiency minutes_after_wakeup minutes_asleep minutes_awake minutes_to_fall_asleep minutes_in_bed is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 15:36:30 2020-10-10 16:37:00 92 0 55 5 0 60 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 01:46:30 2020-10-10 08:10:00 88 0 318 65 0 383 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-11 00:12:30 2020-10-11 11:47:00 89 1 562 132 0 694 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-12 01:31:00 2020-10-12 09:34:30 93 0 415 68 0 483 1 stages FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id TYPE_EPISODE_ID type_episode_id DURATION duration IS_MAIN_SLEEP is_main_sleep TYPE type LEVEL level MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2, we support both. Example of the expected raw data device_id type_episode_id local_date_time duration level is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:36:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:37:30 660 asleep 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:48:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u2026 \u2026 \u2026 \u2026 \u2026 \u2026 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:46:30 420 light 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:53:30 1230 deep 1 stages FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 1775 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 3201 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 998 FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 5 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 3 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 0","title":"fitbitparsed_csv"},{"location":"datastreams/fitbitparsed-csv/#fitbitparsed_csv","text":"This data stream handles Fitbit sensor data downloaded using the Fitbit Web API , parsed , and stored in a CSV file. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your parsed sensor data in a CSV file, RAPIDS can process it. What is the difference between JSON and plain data streams Most people will only need fitbitjson_* because they downloaded and stored their data directly from Fitbit\u2019s API. However, if, for some reason, you don\u2019t have access to that JSON data and instead only have the parsed data (columns and rows), you can use this data stream. Warning The CSV files have to use , as separator, \\ as escape character (do not escape \" with \"\" ), and wrap any string columns with \" . Example of a valid CSV file \"device_id\",\"heartrate\",\"heartrate_zone\",\"local_date_time\",\"timestamp\" \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",69,\"outofrange\",\"2020-04-23 00:00:00\",0 \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",69,\"outofrange\",\"2020-04-23 00:01:00\",0 \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",67,\"outofrange\",\"2020-04-23 00:02:00\",0 \"a748ee1a-1d0b-4ae9-9074-279a2b6ba524\",69,\"outofrange\",\"2020-04-23 00:03:00\",0","title":"fitbitparsed_csv"},{"location":"datastreams/fitbitparsed-csv/#container","text":"The container should be a CSV file per sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitparsed_csv/container.R","title":"Container"},{"location":"datastreams/fitbitparsed-csv/#format","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitparsed_mysql/format.yaml If you want to use this stream with your data, modify every sensor in format.yaml to map all columns except TIMESTAMP in [RAPIDS_COLUMN_MAPPINGS] to your raw data column names. All columns are mandatory; however, all except device_id and local_date_time can be empty if you don\u2019t have that data. Just have in mind that some features will be empty if some of these columns are empty. FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR heartrate_daily_restinghr HEARTRATE_DAILY_CALORIESOUTOFRANGE heartrate_daily_caloriesoutofrange HEARTRATE_DAILY_CALORIESFATBURN heartrate_daily_caloriesfatburn HEARTRATE_DAILY_CALORIESCARDIO heartrate_daily_caloriescardio HEARTRATE_DAILY_CALORIESPEAK heartrate_daily_caloriespeak MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate_daily_restinghr heartrate_daily_caloriesoutofrange heartrate_daily_caloriesfatburn heartrate_daily_caloriescardio heartrate_daily_caloriespeak a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 72 1200.6102 760.3020 15.2048 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 70 1100.1120 660.0012 23.7088 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 69 750.3615 734.1516 131.8579 0 FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE heartrate HEARTRATE_ZONE heartrate_zone MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate heartrate_zone a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 68 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 67 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 67 outofrange FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time LOCAL_START_DATE_TIME local_start_date_time LOCAL_END_DATE_TIME local_end_date_time DEVICE_ID device_id EFFICIENCY efficiency MINUTES_AFTER_WAKEUP minutes_after_wakeup MINUTES_ASLEEP minutes_asleep MINUTES_AWAKE minutes_awake MINUTES_TO_FALL_ASLEEP minutes_to_fall_asleep MINUTES_IN_BED minutes_in_bed IS_MAIN_SLEEP is_main_sleep TYPE type MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. Example of the expected raw data device_id local_start_date_time local_end_date_time efficiency minutes_after_wakeup minutes_asleep minutes_awake minutes_to_fall_asleep minutes_in_bed is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 15:36:30 2020-10-10 16:37:00 92 0 55 5 0 60 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 01:46:30 2020-10-10 08:10:00 88 0 318 65 0 383 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-11 00:12:30 2020-10-11 11:47:00 89 1 562 132 0 694 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-12 01:31:00 2020-10-12 09:34:30 93 0 415 68 0 483 1 stages FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id TYPE_EPISODE_ID type_episode_id DURATION duration IS_MAIN_SLEEP is_main_sleep TYPE type LEVEL level MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2, we support both. Example of the expected raw data device_id type_episode_id local_date_time duration level is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:36:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:37:30 660 asleep 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:48:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u2026 \u2026 \u2026 \u2026 \u2026 \u2026 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:46:30 420 light 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:53:30 1230 deep 1 stages FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 1775 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 3201 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 998 FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 5 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 3 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 0","title":"Format"},{"location":"datastreams/fitbitparsed-mysql/","text":"fitbitparsed_mysql \u00b6 This data stream handles Fitbit sensor data downloaded using the Fitbit Web API , parsed , and stored in a MySQL database. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your parsed sensor data in a MySQL database, RAPIDS can process it. What is the difference between JSON and plain data streams Most people will only need fitbitjson_* because they downloaded and stored their data directly from Fitbit\u2019s API. However, if, for some reason, you don\u2019t have access to that JSON data and instead only have the parsed data (columns and rows), you can use this data stream. Container \u00b6 The container should be a MySQL database with a table per sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitparsed_mysql/container.R Format \u00b6 The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitparsed_mysql/format.yaml If you want to use this stream with your data, modify every sensor in format.yaml to map all columns except TIMESTAMP in [RAPIDS_COLUMN_MAPPINGS] to your raw data column names. All columns are mandatory; however, all except device_id and local_date_time can be empty if you don\u2019t have that data. Just have in mind that some features will be empty if some of these columns are empty. FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR heartrate_daily_restinghr HEARTRATE_DAILY_CALORIESOUTOFRANGE heartrate_daily_caloriesoutofrange HEARTRATE_DAILY_CALORIESFATBURN heartrate_daily_caloriesfatburn HEARTRATE_DAILY_CALORIESCARDIO heartrate_daily_caloriescardio HEARTRATE_DAILY_CALORIESPEAK heartrate_daily_caloriespeak MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate_daily_restinghr heartrate_daily_caloriesoutofrange heartrate_daily_caloriesfatburn heartrate_daily_caloriescardio heartrate_daily_caloriespeak a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 72 1200.6102 760.3020 15.2048 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 70 1100.1120 660.0012 23.7088 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 69 750.3615 734.1516 131.8579 0 FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE heartrate HEARTRATE_ZONE heartrate_zone MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate heartrate_zone a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 68 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 67 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 67 outofrange FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time LOCAL_START_DATE_TIME local_start_date_time LOCAL_END_DATE_TIME local_end_date_time DEVICE_ID device_id EFFICIENCY efficiency MINUTES_AFTER_WAKEUP minutes_after_wakeup MINUTES_ASLEEP minutes_asleep MINUTES_AWAKE minutes_awake MINUTES_TO_FALL_ASLEEP minutes_to_fall_asleep MINUTES_IN_BED minutes_in_bed IS_MAIN_SLEEP is_main_sleep TYPE type MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. Example of the expected raw data device_id local_start_date_time local_end_date_time efficiency minutes_after_wakeup minutes_asleep minutes_awake minutes_to_fall_asleep minutes_in_bed is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 15:36:30 2020-10-10 16:37:00 92 0 55 5 0 60 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 01:46:30 2020-10-10 08:10:00 88 0 318 65 0 383 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-11 00:12:30 2020-10-11 11:47:00 89 1 562 132 0 694 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-12 01:31:00 2020-10-12 09:34:30 93 0 415 68 0 483 1 stages FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id TYPE_EPISODE_ID type_episode_id DURATION duration IS_MAIN_SLEEP is_main_sleep TYPE type LEVEL level MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2, we support both. Example of the expected raw data device_id type_episode_id local_date_time duration level is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:36:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:37:30 660 asleep 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:48:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u2026 \u2026 \u2026 \u2026 \u2026 \u2026 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:46:30 420 light 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:53:30 1230 deep 1 stages FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 1775 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 3201 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 998 FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 5 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 3 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 0","title":"fitbitparsed_mysql"},{"location":"datastreams/fitbitparsed-mysql/#fitbitparsed_mysql","text":"This data stream handles Fitbit sensor data downloaded using the Fitbit Web API , parsed , and stored in a MySQL database. Please note that RAPIDS cannot query the API directly; you need to use other available tools or implement your own. Once you have your parsed sensor data in a MySQL database, RAPIDS can process it. What is the difference between JSON and plain data streams Most people will only need fitbitjson_* because they downloaded and stored their data directly from Fitbit\u2019s API. However, if, for some reason, you don\u2019t have access to that JSON data and instead only have the parsed data (columns and rows), you can use this data stream.","title":"fitbitparsed_mysql"},{"location":"datastreams/fitbitparsed-mysql/#container","text":"The container should be a MySQL database with a table per sensor, each containing all participants\u2019 data. The script to connect and download data from this container is at: src/data/streams/fitbitparsed_mysql/container.R","title":"Container"},{"location":"datastreams/fitbitparsed-mysql/#format","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitparsed_mysql/format.yaml If you want to use this stream with your data, modify every sensor in format.yaml to map all columns except TIMESTAMP in [RAPIDS_COLUMN_MAPPINGS] to your raw data column names. All columns are mandatory; however, all except device_id and local_date_time can be empty if you don\u2019t have that data. Just have in mind that some features will be empty if some of these columns are empty. FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR heartrate_daily_restinghr HEARTRATE_DAILY_CALORIESOUTOFRANGE heartrate_daily_caloriesoutofrange HEARTRATE_DAILY_CALORIESFATBURN heartrate_daily_caloriesfatburn HEARTRATE_DAILY_CALORIESCARDIO heartrate_daily_caloriescardio HEARTRATE_DAILY_CALORIESPEAK heartrate_daily_caloriespeak MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate_daily_restinghr heartrate_daily_caloriesoutofrange heartrate_daily_caloriesfatburn heartrate_daily_caloriescardio heartrate_daily_caloriespeak a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 72 1200.6102 760.3020 15.2048 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 70 1100.1120 660.0012 23.7088 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 69 750.3615 734.1516 131.8579 0 FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE heartrate HEARTRATE_ZONE heartrate_zone MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate heartrate_zone a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 68 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 67 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 67 outofrange FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time LOCAL_START_DATE_TIME local_start_date_time LOCAL_END_DATE_TIME local_end_date_time DEVICE_ID device_id EFFICIENCY efficiency MINUTES_AFTER_WAKEUP minutes_after_wakeup MINUTES_ASLEEP minutes_asleep MINUTES_AWAKE minutes_awake MINUTES_TO_FALL_ASLEEP minutes_to_fall_asleep MINUTES_IN_BED minutes_in_bed IS_MAIN_SLEEP is_main_sleep TYPE type MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. Example of the expected raw data device_id local_start_date_time local_end_date_time efficiency minutes_after_wakeup minutes_asleep minutes_awake minutes_to_fall_asleep minutes_in_bed is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 15:36:30 2020-10-10 16:37:00 92 0 55 5 0 60 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 01:46:30 2020-10-10 08:10:00 88 0 318 65 0 383 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-11 00:12:30 2020-10-11 11:47:00 89 1 562 132 0 694 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-12 01:31:00 2020-10-12 09:34:30 93 0 415 68 0 483 1 stages FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id TYPE_EPISODE_ID type_episode_id DURATION duration IS_MAIN_SLEEP is_main_sleep TYPE type LEVEL level MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2, we support both. Example of the expected raw data device_id type_episode_id local_date_time duration level is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:36:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:37:30 660 asleep 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:48:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u2026 \u2026 \u2026 \u2026 \u2026 \u2026 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:46:30 420 light 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:53:30 1230 deep 1 stages FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 1775 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 3201 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 998 FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 5 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 3 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 0","title":"Format"},{"location":"datastreams/mandatory-empatica-format/","text":"Mandatory Empatica Format \u00b6 This is a description of the format RAPIDS needs to process data for the following Empatica sensors. EMPATICA_ACCELEROMETER RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_VALUES_0 x axis of acceleration DOUBLE_VALUES_1 y axis of acceleration DOUBLE_VALUES_2 z axis of acceleration EMPATICA_HEARTRATE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device HEARTRATE Intraday heartrate EMPATICA_TEMPERATURE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device TEMPERATURE temperature EMPATICA_ELECTRODERMAL_ACTIVITY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device ELECTRODERMAL_ACTIVITY electrical conductance EMPATICA_BLOOD_VOLUME_PULSE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device BLOOD_VOLUME_PULSE blood volume pulse EMPATICA_INTER_BEAT_INTERVAL RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device INTER_BEAT_INTERVAL inter beat interval EMPATICA_TAGS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device TAGS tags","title":"Mandatory Empatica Format"},{"location":"datastreams/mandatory-empatica-format/#mandatory-empatica-format","text":"This is a description of the format RAPIDS needs to process data for the following Empatica sensors. EMPATICA_ACCELEROMETER RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_VALUES_0 x axis of acceleration DOUBLE_VALUES_1 y axis of acceleration DOUBLE_VALUES_2 z axis of acceleration EMPATICA_HEARTRATE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device HEARTRATE Intraday heartrate EMPATICA_TEMPERATURE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device TEMPERATURE temperature EMPATICA_ELECTRODERMAL_ACTIVITY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device ELECTRODERMAL_ACTIVITY electrical conductance EMPATICA_BLOOD_VOLUME_PULSE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device BLOOD_VOLUME_PULSE blood volume pulse EMPATICA_INTER_BEAT_INTERVAL RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device INTER_BEAT_INTERVAL inter beat interval EMPATICA_TAGS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) DEVICE_ID A string that uniquely identifies a device TAGS tags","title":"Mandatory Empatica Format"},{"location":"datastreams/mandatory-fitbit-format/","text":"Mandatory Fitbit Format \u00b6 This is a description of the format RAPIDS needs to process data for the following Fitbit sensors. FITBIT_HEARTRATE_SUMMARY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device HEARTRATE_DAILY_RESTINGHR Daily resting heartrate HEARTRATE_DAILY_CALORIESOUTOFRANGE Calories spent while heartrate was oustide a heartrate zone HEARTRATE_DAILY_CALORIESFATBURN Calories spent while heartrate was inside the fat burn zone HEARTRATE_DAILY_CALORIESCARDIO Calories spent while heartrate was inside the cardio zone HEARTRATE_DAILY_CALORIESPEAK Calories spent while heartrate was inside the peak zone FITBIT_HEARTRATE_INTRADAY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device HEARTRATE Intraday heartrate HEARTRATE_ZONE Heartrate zone that HEARTRATE belongs to. It is based on the heartrate zone ranges of each device FITBIT_SLEEP_SUMMARY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss , this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day LOCAL_START_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss representing the start of a daily sleep episode LOCAL_END_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss representing the end of a daily sleep episode DEVICE_ID A string that uniquely identifies a device EFFICIENCY Sleep efficiency computed by fitbit as time asleep / (total time in bed - time to fall asleep) MINUTES_AFTER_WAKEUP Minutes the participant spent in bed after waking up MINUTES_ASLEEP Minutes the participant was asleep MINUTES_AWAKE Minutes the participant was awake MINUTES_TO_FALL_ASLEEP Minutes the participant spent in bed before falling asleep MINUTES_IN_BED Minutes the participant spent in bed across the sleep episode IS_MAIN_SLEEP 0 if this episode is a nap, or 1 if it is a main sleep episode TYPE stages or classic sleep data FITBIT_SLEEP_INTRADAY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss , this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day DEVICE_ID A string that uniquely identifies a device TYPE_EPISODE_ID An id for each unique main or nap episode. Main and nap episodes have different levels, each row in this table is one of such levels, so multiple rows can have the same TYPE_EPISODE_ID DURATION Duration of the episode level in minutes IS_MAIN_SLEEP 0 if this episode level belongs to a nap, or 1 if it belongs to a main sleep episode TYPE type of level: stages or classic sleep data LEVEL For stages levels one of wake , deep , light , or rem . For classic levels one of awake , restless , and asleep FITBIT_STEPS_SUMMARY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device STEPS Daily step count FITBIT_STEPS_INTRADAY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device STEPS Intraday step count (usually every minute)","title":"Mandatory Fitbit Format"},{"location":"datastreams/mandatory-fitbit-format/#mandatory-fitbit-format","text":"This is a description of the format RAPIDS needs to process data for the following Fitbit sensors. FITBIT_HEARTRATE_SUMMARY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device HEARTRATE_DAILY_RESTINGHR Daily resting heartrate HEARTRATE_DAILY_CALORIESOUTOFRANGE Calories spent while heartrate was oustide a heartrate zone HEARTRATE_DAILY_CALORIESFATBURN Calories spent while heartrate was inside the fat burn zone HEARTRATE_DAILY_CALORIESCARDIO Calories spent while heartrate was inside the cardio zone HEARTRATE_DAILY_CALORIESPEAK Calories spent while heartrate was inside the peak zone FITBIT_HEARTRATE_INTRADAY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device HEARTRATE Intraday heartrate HEARTRATE_ZONE Heartrate zone that HEARTRATE belongs to. It is based on the heartrate zone ranges of each device FITBIT_SLEEP_SUMMARY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss , this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day LOCAL_START_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss representing the start of a daily sleep episode LOCAL_END_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss representing the end of a daily sleep episode DEVICE_ID A string that uniquely identifies a device EFFICIENCY Sleep efficiency computed by fitbit as time asleep / (total time in bed - time to fall asleep) MINUTES_AFTER_WAKEUP Minutes the participant spent in bed after waking up MINUTES_ASLEEP Minutes the participant was asleep MINUTES_AWAKE Minutes the participant was awake MINUTES_TO_FALL_ASLEEP Minutes the participant spent in bed before falling asleep MINUTES_IN_BED Minutes the participant spent in bed across the sleep episode IS_MAIN_SLEEP 0 if this episode is a nap, or 1 if it is a main sleep episode TYPE stages or classic sleep data FITBIT_SLEEP_INTRADAY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss , this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day DEVICE_ID A string that uniquely identifies a device TYPE_EPISODE_ID An id for each unique main or nap episode. Main and nap episodes have different levels, each row in this table is one of such levels, so multiple rows can have the same TYPE_EPISODE_ID DURATION Duration of the episode level in minutes IS_MAIN_SLEEP 0 if this episode level belongs to a nap, or 1 if it belongs to a main sleep episode TYPE type of level: stages or classic sleep data LEVEL For stages levels one of wake , deep , light , or rem . For classic levels one of awake , restless , and asleep FITBIT_STEPS_SUMMARY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device STEPS Daily step count FITBIT_STEPS_INTRADAY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) LOCAL_DATE_TIME Date time string with format yyyy-mm-dd hh:mm:ss DEVICE_ID A string that uniquely identifies a device STEPS Intraday step count (usually every minute)","title":"Mandatory Fitbit Format"},{"location":"datastreams/mandatory-phone-format/","text":"Mandatory Phone Format \u00b6 This is a description of the format RAPIDS needs to process data for the following PHONE sensors. See examples in the CSV files inside rapids_example_csv.zip PHONE_ACCELEROMETER RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_VALUES_0 x axis of acceleration DOUBLE_VALUES_1 y axis of acceleration DOUBLE_VALUES_2 z axis of acceleration PHONE_ACTIVITY_RECOGNITION RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device ACTIVITY_NAME An string that denotes current activity name: in_vehicle , on_bicycle , on_foot , still , unknown , tilting , walking or running ACTIVITY_TYPE An integer (ranged from 0 to 8) that denotes current activity type CONFIDENCE An integer (ranged from 0 to 100) that denotes the prediction accuracy PHONE_APPLICATIONS_CRASHES RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME Application\u2019s package name APPLICATION_NAME Application\u2019s localized name APPLICATION_VERSION Application\u2019s version code ERROR_SHORT Short description of the error ERROR_LONG More verbose version of the error description ERROR_CONDITION 1 = code error; 2 = non-responsive (ANR error) IS_SYSTEM_APP Device\u2019s pre-installed application PHONE_APPLICATIONS_FOREGROUND RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME Application\u2019s package name APPLICATION_NAME Application\u2019s localized name IS_SYSTEM_APP Device\u2019s pre-installed application PHONE_APPLICATIONS_NOTIFICATIONS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME Application\u2019s package name APPLICATION_NAME Application\u2019s localized name TEXT Notification\u2019s header text, not the content SOUND Notification\u2019s sound source (if applicable) VIBRATE Notification\u2019s vibration pattern (if applicable) DEFAULTS If notification was delivered according to device\u2019s default settings FLAGS An integer that denotes Android notification flag PHONE_BATTERY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device BATTERY_STATUS An integer that denotes battery status: 0 or 1 = unknown, 2 = charging, 3 = discharging, 4 = not charging, 5 = full BATTERY_LEVEL An integer that denotes battery level, between 0 and BATTERY_SCALE BATTERY_SCALE An integer that denotes the maximum battery level PHONE_BLUETOOTH RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device BT_ADDRESS MAC address of the device\u2019s Bluetooth sensor BT_NAME User assigned name of the device\u2019s Bluetooth sensor BT_RSSI The RSSI dB to the scanned device PHONE_CALLS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device CALL_TYPE An integer that denotes call type: 1 = incoming, 2 = outgoing, 3 = missed CALL_DURATION Length of the call session TRACE SHA-1 one-way source/target of the call PHONE_CONVERSATION RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_ENERGY A number that denotes the amplitude of an audio sample (L2-norm of the audio frame) INFERENCE An integer (ranged from 0 to 3) that denotes the type of an audio sample: 0 = silence, 1 = noise, 2 = voice, 3 = unknown DOUBLE_CONVO_START UNIX timestamp (13 digits) of the beginning of a conversation DOUBLE_CONVO_END UNIX timestamp (13 digits) of the end of a conversation PHONE_KEYBOARD RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME The application\u2019s package name of keyboard interaction BEFORE_TEXT The previous keyboard input (empty if password) CURRENT_TEXT The current keyboard input (empty if password) IS_PASSWORD An integer: 0 = not password; 1 = password PHONE_LIGHT RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_LIGHT_LUX The ambient luminance in lux units ACCURACY An integer that denotes the sensor\u2019s accuracy level: 3 = maximum accuracy, 2 = medium accuracy, 1 = low accuracy PHONE_LOCATIONS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_LATITUDE The location\u2019s latitude, in degrees DOUBLE_LONGITUDE The location\u2019s longitude, in degrees DOUBLE_BEARING The location\u2019s bearing, in degrees DOUBLE_SPEED The speed if available, in meters/second over ground DOUBLE_ALTITUDE The altitude if available, in meters above sea level PROVIDER A string that denotes the provider: gps , fused or network ACCURACY The estimated location accuracy PHONE_LOG RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device LOG_MESSAGE A string that denotes log message PHONE_MESSAGES RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device MESSAGE_TYPE An integer that denotes message type: 1 = received, 2 = sent TRACE SHA-1 one-way source/target of the message PHONE_SCREEN RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device SCREEN_STATUS An integer that denotes screen status: 0 = off, 1 = on, 2 = locked, 3 = unlocked PHONE_WIFI_CONNECTED RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device MAC_ADDRESS Device\u2019s MAC address SSID Currently connected access point network name BSSID Currently connected access point MAC address PHONE_WIFI_VISIBLE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device SSID Detected access point network name BSSID Detected access point MAC address SECURITY Active security protocols FREQUENCY Wi-Fi band frequency (e.g., 2427, 5180), in Hz RSSI RSSI dB to the scanned device","title":"Mandatory Phone Format"},{"location":"datastreams/mandatory-phone-format/#mandatory-phone-format","text":"This is a description of the format RAPIDS needs to process data for the following PHONE sensors. See examples in the CSV files inside rapids_example_csv.zip PHONE_ACCELEROMETER RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_VALUES_0 x axis of acceleration DOUBLE_VALUES_1 y axis of acceleration DOUBLE_VALUES_2 z axis of acceleration PHONE_ACTIVITY_RECOGNITION RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device ACTIVITY_NAME An string that denotes current activity name: in_vehicle , on_bicycle , on_foot , still , unknown , tilting , walking or running ACTIVITY_TYPE An integer (ranged from 0 to 8) that denotes current activity type CONFIDENCE An integer (ranged from 0 to 100) that denotes the prediction accuracy PHONE_APPLICATIONS_CRASHES RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME Application\u2019s package name APPLICATION_NAME Application\u2019s localized name APPLICATION_VERSION Application\u2019s version code ERROR_SHORT Short description of the error ERROR_LONG More verbose version of the error description ERROR_CONDITION 1 = code error; 2 = non-responsive (ANR error) IS_SYSTEM_APP Device\u2019s pre-installed application PHONE_APPLICATIONS_FOREGROUND RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME Application\u2019s package name APPLICATION_NAME Application\u2019s localized name IS_SYSTEM_APP Device\u2019s pre-installed application PHONE_APPLICATIONS_NOTIFICATIONS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME Application\u2019s package name APPLICATION_NAME Application\u2019s localized name TEXT Notification\u2019s header text, not the content SOUND Notification\u2019s sound source (if applicable) VIBRATE Notification\u2019s vibration pattern (if applicable) DEFAULTS If notification was delivered according to device\u2019s default settings FLAGS An integer that denotes Android notification flag PHONE_BATTERY RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device BATTERY_STATUS An integer that denotes battery status: 0 or 1 = unknown, 2 = charging, 3 = discharging, 4 = not charging, 5 = full BATTERY_LEVEL An integer that denotes battery level, between 0 and BATTERY_SCALE BATTERY_SCALE An integer that denotes the maximum battery level PHONE_BLUETOOTH RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device BT_ADDRESS MAC address of the device\u2019s Bluetooth sensor BT_NAME User assigned name of the device\u2019s Bluetooth sensor BT_RSSI The RSSI dB to the scanned device PHONE_CALLS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device CALL_TYPE An integer that denotes call type: 1 = incoming, 2 = outgoing, 3 = missed CALL_DURATION Length of the call session TRACE SHA-1 one-way source/target of the call PHONE_CONVERSATION RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_ENERGY A number that denotes the amplitude of an audio sample (L2-norm of the audio frame) INFERENCE An integer (ranged from 0 to 3) that denotes the type of an audio sample: 0 = silence, 1 = noise, 2 = voice, 3 = unknown DOUBLE_CONVO_START UNIX timestamp (13 digits) of the beginning of a conversation DOUBLE_CONVO_END UNIX timestamp (13 digits) of the end of a conversation PHONE_KEYBOARD RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device PACKAGE_NAME The application\u2019s package name of keyboard interaction BEFORE_TEXT The previous keyboard input (empty if password) CURRENT_TEXT The current keyboard input (empty if password) IS_PASSWORD An integer: 0 = not password; 1 = password PHONE_LIGHT RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_LIGHT_LUX The ambient luminance in lux units ACCURACY An integer that denotes the sensor\u2019s accuracy level: 3 = maximum accuracy, 2 = medium accuracy, 1 = low accuracy PHONE_LOCATIONS RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device DOUBLE_LATITUDE The location\u2019s latitude, in degrees DOUBLE_LONGITUDE The location\u2019s longitude, in degrees DOUBLE_BEARING The location\u2019s bearing, in degrees DOUBLE_SPEED The speed if available, in meters/second over ground DOUBLE_ALTITUDE The altitude if available, in meters above sea level PROVIDER A string that denotes the provider: gps , fused or network ACCURACY The estimated location accuracy PHONE_LOG RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device LOG_MESSAGE A string that denotes log message PHONE_MESSAGES RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device MESSAGE_TYPE An integer that denotes message type: 1 = received, 2 = sent TRACE SHA-1 one-way source/target of the message PHONE_SCREEN RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device SCREEN_STATUS An integer that denotes screen status: 0 = off, 1 = on, 2 = locked, 3 = unlocked PHONE_WIFI_CONNECTED RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device MAC_ADDRESS Device\u2019s MAC address SSID Currently connected access point network name BSSID Currently connected access point MAC address PHONE_WIFI_VISIBLE RAPIDS column Description TIMESTAMP An UNIX timestamp (13 digits) when a row of data was logged DEVICE_ID A string that uniquely identifies a device SSID Detected access point network name BSSID Detected access point MAC address SECURITY Active security protocols FREQUENCY Wi-Fi band frequency (e.g., 2427, 5180), in Hz RSSI RSSI dB to the scanned device","title":"Mandatory Phone Format"},{"location":"developers/documentation/","text":"Documentation \u00b6 We use mkdocs with the material theme to write these docs. Whenever you make any changes, just push them back to the repo and the documentation will be deployed automatically. Set up development environment \u00b6 Make sure your conda environment is active pip install mkdocs pip install mkdocs-material Preview \u00b6 Run the following command in RAPIDS root folder and go to http://127.0.0.1:8000 : mkdocs serve File Structure \u00b6 The documentation config file is /mkdocs.yml , if you are adding new .md files to the docs modify the nav attribute at the bottom of that file. You can use the hierarchy there to find all the files that appear in the documentation. Reference \u00b6 Check this page to get familiar with the different visual elements we can use in the docs (admonitions, code blocks, tables, etc.) You can also refer to /docs/setup/installation.md and /docs/setup/configuration.md to see practical examples of these elements. Hint Any links to internal pages should be relative to the current page. For example, any link from this page (documentation) which is inside ./developers should begin with ../ to go one folder level up like: [ mylink ]( ../setup/installation.md ) Extras \u00b6 You can insert emojis using this syntax :[SOURCE]-[ICON_NAME] from the following sources: https://materialdesignicons.com/ https://fontawesome.com/icons/tasks?style=solid https://primer.style/octicons/ You can use this page to create markdown tables more easily","title":"Documentation"},{"location":"developers/documentation/#documentation","text":"We use mkdocs with the material theme to write these docs. Whenever you make any changes, just push them back to the repo and the documentation will be deployed automatically.","title":"Documentation"},{"location":"developers/documentation/#set-up-development-environment","text":"Make sure your conda environment is active pip install mkdocs pip install mkdocs-material","title":"Set up development environment"},{"location":"developers/documentation/#preview","text":"Run the following command in RAPIDS root folder and go to http://127.0.0.1:8000 : mkdocs serve","title":"Preview"},{"location":"developers/documentation/#file-structure","text":"The documentation config file is /mkdocs.yml , if you are adding new .md files to the docs modify the nav attribute at the bottom of that file. You can use the hierarchy there to find all the files that appear in the documentation.","title":"File Structure"},{"location":"developers/documentation/#reference","text":"Check this page to get familiar with the different visual elements we can use in the docs (admonitions, code blocks, tables, etc.) You can also refer to /docs/setup/installation.md and /docs/setup/configuration.md to see practical examples of these elements. Hint Any links to internal pages should be relative to the current page. For example, any link from this page (documentation) which is inside ./developers should begin with ../ to go one folder level up like: [ mylink ]( ../setup/installation.md )","title":"Reference"},{"location":"developers/documentation/#extras","text":"You can insert emojis using this syntax :[SOURCE]-[ICON_NAME] from the following sources: https://materialdesignicons.com/ https://fontawesome.com/icons/tasks?style=solid https://primer.style/octicons/ You can use this page to create markdown tables more easily","title":"Extras"},{"location":"developers/git-flow/","text":"Git Flow \u00b6 We use the develop/master variation of the OneFlow git flow Add New Features \u00b6 We use feature (topic) branches to implement new features Internal Developer You are an internal developer if you have writing permissions to the repository. Most feature branches are never pushed to the repo, only do so if you expect that its development will take days (to avoid losing your work if you computer is damaged). Otherwise follow the following instructions to locally rebase your feature branch into develop and push those rebased changes online. Starting your feature branch Pull the latest develop git checkout develop git pull Create your feature branch git checkout -b feature/feature1 Add, modify or delete the necessary files to add your new feature Update the change log ( docs/change-log.md ) Stage and commit your changes using VS Code git GUI or the following commands git add modified-file1 modified-file2 git commit -m \"Add my new feature\" # use a concise description Merging back your feature branch If your changes took time to be implemented it is possible that there are new commits in our develop branch, so we need to rebase your feature branch. Fetch the latest changes to develop git fetch origin develop Rebase your feature branch git checkout feature/feature1 git rebase -i develop Integrate your new feature to develop git checkout develop git merge --no-ff feature/feature1 # (use the default merge message) git push origin develop git branch -d feature/feature1 External Developer You are an external developer if you do NOT have writing permissions to the repository. Starting your feature branch Fork and clone our repository on Github Switch to the latest develop git checkout develop Create your feature branch git checkout -b feature/external-test Add, modify or delete the necessary files to add your new feature Stage and commit your changes using VS Code git GUI or the following commands git add modified-file1 modified-file2 git commit -m \"Add my new feature\" # use a concise description Merging back your feature branch If your changes took time to be implemented, it is possible that there are new commits in our develop branch, so we need to rebase your feature branch. Add our repo as another remote git remote add upstream https://github.com/carissalow/rapids/ Fetch the latest changes to develop git fetch upstream develop Rebase your feature branch git checkout feature/external-test git rebase -i develop Push your feature branch online git push --set-upstream origin feature/external-test Open a pull request to the develop branch using Github\u2019s GUI Release a New Version \u00b6 Pull the latest develop git checkout develop git pull Create a new release branch git describe --abbrev = 0 --tags # Bump the release (0.1.0 to 0.2.0 => NEW_HOTFIX) git checkout -b release/v [ NEW_RELEASE ] develop Add new tag git tag v [ NEW_RELEASE ] Merge and push the release branch git checkout develop git merge release/v [ NEW_RELEASE ] git push --tags origin develop git branch -d release/v [ NEW_RELEASE ] Fast-forward master git checkout master git merge --ff-only develop git push Go to GitHub and create a new release based on the newest tag v[NEW_RELEASE] (remember to add the change log) Release a Hotfix \u00b6 Pull the latest master git checkout master git pull Start a hotfix branch git describe --abbrev = 0 --tags # Bump the hotfix (0.1.0 to 0.1.1 => NEW_HOTFIX) git checkout -b hotfix/v [ NEW_HOTFIX ] master Fix whatever needs to be fixed Update the change log Tag and merge the hotfix git tag v [ NEW_HOTFIX ] git checkout develop git merge hotfix/v [ NEW_HOTFIX ] git push --tags origin develop git branch -d hotfix/v [ NEW_HOTFIX ] Fast-forward master git checkout master git merge --ff-only v[NEW_HOTFIX] git push Go to GitHub and create a new release based on the newest tag v[NEW_HOTFIX] (remember to add the change log)","title":"Git Flow"},{"location":"developers/git-flow/#git-flow","text":"We use the develop/master variation of the OneFlow git flow","title":"Git Flow"},{"location":"developers/git-flow/#add-new-features","text":"We use feature (topic) branches to implement new features Internal Developer You are an internal developer if you have writing permissions to the repository. Most feature branches are never pushed to the repo, only do so if you expect that its development will take days (to avoid losing your work if you computer is damaged). Otherwise follow the following instructions to locally rebase your feature branch into develop and push those rebased changes online. Starting your feature branch Pull the latest develop git checkout develop git pull Create your feature branch git checkout -b feature/feature1 Add, modify or delete the necessary files to add your new feature Update the change log ( docs/change-log.md ) Stage and commit your changes using VS Code git GUI or the following commands git add modified-file1 modified-file2 git commit -m \"Add my new feature\" # use a concise description Merging back your feature branch If your changes took time to be implemented it is possible that there are new commits in our develop branch, so we need to rebase your feature branch. Fetch the latest changes to develop git fetch origin develop Rebase your feature branch git checkout feature/feature1 git rebase -i develop Integrate your new feature to develop git checkout develop git merge --no-ff feature/feature1 # (use the default merge message) git push origin develop git branch -d feature/feature1 External Developer You are an external developer if you do NOT have writing permissions to the repository. Starting your feature branch Fork and clone our repository on Github Switch to the latest develop git checkout develop Create your feature branch git checkout -b feature/external-test Add, modify or delete the necessary files to add your new feature Stage and commit your changes using VS Code git GUI or the following commands git add modified-file1 modified-file2 git commit -m \"Add my new feature\" # use a concise description Merging back your feature branch If your changes took time to be implemented, it is possible that there are new commits in our develop branch, so we need to rebase your feature branch. Add our repo as another remote git remote add upstream https://github.com/carissalow/rapids/ Fetch the latest changes to develop git fetch upstream develop Rebase your feature branch git checkout feature/external-test git rebase -i develop Push your feature branch online git push --set-upstream origin feature/external-test Open a pull request to the develop branch using Github\u2019s GUI","title":"Add New Features"},{"location":"developers/git-flow/#release-a-new-version","text":"Pull the latest develop git checkout develop git pull Create a new release branch git describe --abbrev = 0 --tags # Bump the release (0.1.0 to 0.2.0 => NEW_HOTFIX) git checkout -b release/v [ NEW_RELEASE ] develop Add new tag git tag v [ NEW_RELEASE ] Merge and push the release branch git checkout develop git merge release/v [ NEW_RELEASE ] git push --tags origin develop git branch -d release/v [ NEW_RELEASE ] Fast-forward master git checkout master git merge --ff-only develop git push Go to GitHub and create a new release based on the newest tag v[NEW_RELEASE] (remember to add the change log)","title":"Release a New Version"},{"location":"developers/git-flow/#release-a-hotfix","text":"Pull the latest master git checkout master git pull Start a hotfix branch git describe --abbrev = 0 --tags # Bump the hotfix (0.1.0 to 0.1.1 => NEW_HOTFIX) git checkout -b hotfix/v [ NEW_HOTFIX ] master Fix whatever needs to be fixed Update the change log Tag and merge the hotfix git tag v [ NEW_HOTFIX ] git checkout develop git merge hotfix/v [ NEW_HOTFIX ] git push --tags origin develop git branch -d hotfix/v [ NEW_HOTFIX ] Fast-forward master git checkout master git merge --ff-only v[NEW_HOTFIX] git push Go to GitHub and create a new release based on the newest tag v[NEW_HOTFIX] (remember to add the change log)","title":"Release a Hotfix"},{"location":"developers/remote-support/","text":"Remote Support \u00b6 We use the Live Share extension of Visual Studio Code to debug bugs when sharing data or database credentials is not possible. Install Visual Studio Code Open you RAPIDS root folder in a new VSCode window Open a new Terminal Terminal > New terminal Install the Live Share extension pack Press Ctrl + P or Cmd + P and run this command: >live share: start collaboration session 6. Follow the instructions and share the session link you receive","title":"Remote Support"},{"location":"developers/remote-support/#remote-support","text":"We use the Live Share extension of Visual Studio Code to debug bugs when sharing data or database credentials is not possible. Install Visual Studio Code Open you RAPIDS root folder in a new VSCode window Open a new Terminal Terminal > New terminal Install the Live Share extension pack Press Ctrl + P or Cmd + P and run this command: >live share: start collaboration session 6. Follow the instructions and share the session link you receive","title":"Remote Support"},{"location":"developers/test-cases/","text":"Test Cases \u00b6 Along with the continued development and the addition of new sensors and features to the RAPIDS pipeline, tests for the currently available sensors and features are being implemented. Since this is a Work In Progress this page will be updated with the list of sensors and features for which testing is available. For each of the sensors listed a description of the data used for testing (test cases) are outline. Currently for all intent and testing purposes the tests/data/raw/test01/ contains all the test data files for testing android data formats and tests/data/raw/test02/ contains all the test data files for testing iOS data formats. It follows that the expected (verified output) are contained in the tests/data/processed/test01/ and tests/data/processed/test02/ for Android and iOS respectively. tests/data/raw/test03/ and tests/data/raw/test04/ contain data files for testing empty raw data files for android and iOS respectively. The following is a list of the sensors that testing is currently available. Sensor Provider Periodic Frequency Event Phone Accelerometer Panda N N N Phone Accelerometer RAPIDS N N N Phone Activity Recognition RAPIDS N N N Phone Applications Foreground RAPIDS N N N Phone Battery RAPIDS Y Y N Phone Bluetooth Doryab N N N Phone Bluetooth RAPIDS Y Y Y Phone Calls RAPIDS Y Y N Phone Conversation RAPIDS Y Y N Phone Data Yield RAPIDS N N N Phone Light RAPIDS Y Y N Phone Locations Doryab N N N Phone Locations Barnett N N N Phone Messages RAPIDS Y Y N Phone Screen RAPIDS Y N N Phone WiFi Connected RAPIDS Y Y N Phone WiFi Visible RAPIDS Y Y N Fitbit Data Yield RAPIDS N N N Fitbit Heart Rate Summary RAPIDS N N N Fitbit Heart Rate Intraday RAPIDS N N N Fitbit Sleep Summary RAPIDS N N N Fitbit Steps Summary RAPIDS N N N Fitbit Steps Intraday RAPIDS N N N Messages (SMS) \u00b6 The raw message data file contains data for 2 separate days. The data for the first day contains records 5 records for every epoch . The second day's data contains 6 records for each of only 2 epoch (currently morning and evening ) The raw message data contains records for both message_types (i.e. recieved and sent ) in both days in all epochs. The number records with each message_types per epoch is randomly distributed There is at least one records with each message_types per epoch. There is one raw message data file each, as described above, for testing both iOS and Android data. There is also an additional empty data file for both android and iOS for testing empty data files Calls \u00b6 Due to the difference in the format of the raw call data for iOS and Android the following is the expected results the calls_with_datetime_unified.csv . This would give a better idea of the use cases being tested since the calls_with_datetime_unified.csv would make both the iOS and Android data comparable. The call data would contain data for 2 days. The data for the first day contains 6 records for every epoch . The second day's data contains 6 records for each of only 2 epoch (currently morning and evening ) The call data contains records for all call_types (i.e. incoming , outgoing and missed ) in both days in all epochs. The number records with each of the call_types per epoch is randomly distributed. There is at least one records with each call_types per epoch. There is one call data file each, as described above, for testing both iOS and Android data. There is also an additional empty data file for both android and iOS for testing empty data files Screen \u00b6 Due to the difference in the format of the raw screen data for iOS and Android the following is the expected results the screen_deltas.csv . This would give a better idea of the use cases being tested since the screen_eltas.csv would make both the iOS and Android data comparable These files are used to calculate the features for the screen sensor The screen delta data file contains data for 1 day. The screen delta data contains 1 record to represent an unlock episode that falls within an epoch for every epoch . The screen delta data contains 1 record to represent an unlock episode that falls across the boundary of 2 epochs. Namely the unlock episode starts in one epoch and ends in the next, thus there is a record for unlock episodes that fall across night to morning , morning to afternoon and finally afternoon to night The testing is done for unlock episode_type. There is one screen data file each for testing both iOS and Android data formats. There is also an additional empty data file for both android and iOS for testing empty data files Battery \u00b6 Due to the difference in the format of the raw battery data for iOS and Android as well as versions of iOS the following is the expected results the battery_deltas.csv . This would give a better idea of the use cases being tested since the battery_deltas.csv would make both the iOS and Android data comparable. These files are used to calculate the features for the battery sensor. The battery delta data file contains data for 1 day. The battery delta data contains 1 record each for a charging and discharging episode that falls within an epoch for every epoch . Thus, for the daily epoch there would be multiple charging and discharging episodes Since either a charging episode or a discharging episode and not both can occur across epochs, in order to test episodes that occur across epochs alternating episodes of charging and discharging episodes that fall across night to morning , morning to afternoon and finally afternoon to night are present in the battery delta data. This starts with a discharging episode that begins in night and end in morning . There is one battery data file each, for testing both iOS and Android data formats. There is also an additional empty data file for both android and iOS for testing empty data files Bluetooth \u00b6 The raw Bluetooth data file contains data for 1 day. The raw Bluetooth data contains at least 2 records for each epoch . Each epoch has a record with a timestamp for the beginning boundary for that epoch and a record with a timestamp for the ending boundary for that epoch . (e.g. For the morning epoch there is a record with a timestamp for 6:00AM and another record with a timestamp for 11:59:59AM . These are to test edge cases) An option of 5 Bluetooth devices are randomly distributed throughout the data records. There is one raw Bluetooth data file each, for testing both iOS and Android data formats. There is also an additional empty data file for both android and iOS for testing empty data files. WIFI \u00b6 There are 2 data files ( wifi_raw.csv and sensor_wifi_raw.csv ) for each fake participant for each phone platform. The raw WIFI data files contain data for 1 day. The sensor_wifi_raw.csv data contains at least 2 records for each epoch . Each epoch has a record with a timestamp for the beginning boundary for that epoch and a record with a timestamp for the ending boundary for that epoch . (e.g. For the morning epoch there is a record with a timestamp for 6:00AM and another record with a timestamp for 11:59:59AM . These are to test edge cases) The wifi_raw.csv data contains 3 records with random timestamps for each epoch to represent visible broadcasting WIFI network. This file is empty for the iOS phone testing data. An option of 10 access point devices is randomly distributed throughout the data records. 5 each for sensor_wifi_raw.csv and wifi_raw.csv . There data files for testing both iOS and Android data formats. There are also additional empty data files for both android and iOS for testing empty data files. Light \u00b6 The raw light data file contains data for 1 day. The raw light data contains 3 or 4 rows of data for each epoch except night . The single row of data for night is for testing features for single values inputs. (Example testing the standard deviation of one input value) Since light is only available for Android there is only one file that contains data for Android. All other files (i.e. for iPhone) are empty data files. Application Foreground \u00b6 The raw application foreground data file contains data for 1 day. The raw application foreground data contains 7 - 9 rows of data for each epoch . The records for each epoch contains apps that are randomly selected from a list of apps that are from the MULTIPLE_CATEGORIES and SINGLE_CATEGORIES (See testing_config.yaml ). There are also records in each epoch that have apps randomly selected from a list of apps that are from the EXCLUDED_CATEGORIES and EXCLUDED_APPS . This is to test that these apps are actually being excluded from the calculations of features. There are also records to test SINGLE_APPS calculations. Since application foreground is only available for Android there is only one file that contains data for Android. All other files (i.e. for iPhone) are empty data files. Activity Recognition \u00b6 The raw Activity Recognition data file contains data for 1 day. The raw Activity Recognition data each epoch period contains rows that records 2 - 5 different activity_types . The is such that durations of activities can be tested. Additionally, there are records that mimic the duration of an activity over the time boundary of neighboring epochs. (For example, there a set of records that mimic the participant in_vehicle from afternoon into evening ) There is one file each with raw Activity Recognition data for testing both iOS and Android data formats. (plugin_google_activity_recognition_raw.csv for android and plugin_ios_activity_recognition_raw.csv for iOS) There is also an additional empty data file for both android and iOS for testing empty data files. Conversation \u00b6 The raw conversation data file contains data for 2 day. The raw conversation data contains records with a sample of both datatypes (i.e. voice/noise = 0 , and conversation = 2 ) as well as rows with for samples of each of the inference values (i.e. silence = 0 , noise = 1 , voice = 2 , and unknown = 3 ) for each epoch . The different datatype and inference records are randomly distributed throughout the epoch . Additionally there are 2 - 5 records for conversations ( datatype = 2, and inference = -1) in each epoch and for each epoch except night, there is a conversation record that has a double_convo_start timestamp that is from the previous epoch . This is to test the calculations of features across epochs . There is a raw conversation data file for both android and iOS platforms ( plugin_studentlife_audio_android_raw.csv and plugin_studentlife_audio_raw.csv respectively). Finally, there are also additional empty data files for both android and iOS for testing empty data files","title":"Test cases"},{"location":"developers/test-cases/#test-cases","text":"Along with the continued development and the addition of new sensors and features to the RAPIDS pipeline, tests for the currently available sensors and features are being implemented. Since this is a Work In Progress this page will be updated with the list of sensors and features for which testing is available. For each of the sensors listed a description of the data used for testing (test cases) are outline. Currently for all intent and testing purposes the tests/data/raw/test01/ contains all the test data files for testing android data formats and tests/data/raw/test02/ contains all the test data files for testing iOS data formats. It follows that the expected (verified output) are contained in the tests/data/processed/test01/ and tests/data/processed/test02/ for Android and iOS respectively. tests/data/raw/test03/ and tests/data/raw/test04/ contain data files for testing empty raw data files for android and iOS respectively. The following is a list of the sensors that testing is currently available. Sensor Provider Periodic Frequency Event Phone Accelerometer Panda N N N Phone Accelerometer RAPIDS N N N Phone Activity Recognition RAPIDS N N N Phone Applications Foreground RAPIDS N N N Phone Battery RAPIDS Y Y N Phone Bluetooth Doryab N N N Phone Bluetooth RAPIDS Y Y Y Phone Calls RAPIDS Y Y N Phone Conversation RAPIDS Y Y N Phone Data Yield RAPIDS N N N Phone Light RAPIDS Y Y N Phone Locations Doryab N N N Phone Locations Barnett N N N Phone Messages RAPIDS Y Y N Phone Screen RAPIDS Y N N Phone WiFi Connected RAPIDS Y Y N Phone WiFi Visible RAPIDS Y Y N Fitbit Data Yield RAPIDS N N N Fitbit Heart Rate Summary RAPIDS N N N Fitbit Heart Rate Intraday RAPIDS N N N Fitbit Sleep Summary RAPIDS N N N Fitbit Steps Summary RAPIDS N N N Fitbit Steps Intraday RAPIDS N N N","title":"Test Cases"},{"location":"developers/test-cases/#messages-sms","text":"The raw message data file contains data for 2 separate days. The data for the first day contains records 5 records for every epoch . The second day's data contains 6 records for each of only 2 epoch (currently morning and evening ) The raw message data contains records for both message_types (i.e. recieved and sent ) in both days in all epochs. The number records with each message_types per epoch is randomly distributed There is at least one records with each message_types per epoch. There is one raw message data file each, as described above, for testing both iOS and Android data. There is also an additional empty data file for both android and iOS for testing empty data files","title":"Messages (SMS)"},{"location":"developers/test-cases/#calls","text":"Due to the difference in the format of the raw call data for iOS and Android the following is the expected results the calls_with_datetime_unified.csv . This would give a better idea of the use cases being tested since the calls_with_datetime_unified.csv would make both the iOS and Android data comparable. The call data would contain data for 2 days. The data for the first day contains 6 records for every epoch . The second day's data contains 6 records for each of only 2 epoch (currently morning and evening ) The call data contains records for all call_types (i.e. incoming , outgoing and missed ) in both days in all epochs. The number records with each of the call_types per epoch is randomly distributed. There is at least one records with each call_types per epoch. There is one call data file each, as described above, for testing both iOS and Android data. There is also an additional empty data file for both android and iOS for testing empty data files","title":"Calls"},{"location":"developers/test-cases/#screen","text":"Due to the difference in the format of the raw screen data for iOS and Android the following is the expected results the screen_deltas.csv . This would give a better idea of the use cases being tested since the screen_eltas.csv would make both the iOS and Android data comparable These files are used to calculate the features for the screen sensor The screen delta data file contains data for 1 day. The screen delta data contains 1 record to represent an unlock episode that falls within an epoch for every epoch . The screen delta data contains 1 record to represent an unlock episode that falls across the boundary of 2 epochs. Namely the unlock episode starts in one epoch and ends in the next, thus there is a record for unlock episodes that fall across night to morning , morning to afternoon and finally afternoon to night The testing is done for unlock episode_type. There is one screen data file each for testing both iOS and Android data formats. There is also an additional empty data file for both android and iOS for testing empty data files","title":"Screen"},{"location":"developers/test-cases/#battery","text":"Due to the difference in the format of the raw battery data for iOS and Android as well as versions of iOS the following is the expected results the battery_deltas.csv . This would give a better idea of the use cases being tested since the battery_deltas.csv would make both the iOS and Android data comparable. These files are used to calculate the features for the battery sensor. The battery delta data file contains data for 1 day. The battery delta data contains 1 record each for a charging and discharging episode that falls within an epoch for every epoch . Thus, for the daily epoch there would be multiple charging and discharging episodes Since either a charging episode or a discharging episode and not both can occur across epochs, in order to test episodes that occur across epochs alternating episodes of charging and discharging episodes that fall across night to morning , morning to afternoon and finally afternoon to night are present in the battery delta data. This starts with a discharging episode that begins in night and end in morning . There is one battery data file each, for testing both iOS and Android data formats. There is also an additional empty data file for both android and iOS for testing empty data files","title":"Battery"},{"location":"developers/test-cases/#bluetooth","text":"The raw Bluetooth data file contains data for 1 day. The raw Bluetooth data contains at least 2 records for each epoch . Each epoch has a record with a timestamp for the beginning boundary for that epoch and a record with a timestamp for the ending boundary for that epoch . (e.g. For the morning epoch there is a record with a timestamp for 6:00AM and another record with a timestamp for 11:59:59AM . These are to test edge cases) An option of 5 Bluetooth devices are randomly distributed throughout the data records. There is one raw Bluetooth data file each, for testing both iOS and Android data formats. There is also an additional empty data file for both android and iOS for testing empty data files.","title":"Bluetooth"},{"location":"developers/test-cases/#wifi","text":"There are 2 data files ( wifi_raw.csv and sensor_wifi_raw.csv ) for each fake participant for each phone platform. The raw WIFI data files contain data for 1 day. The sensor_wifi_raw.csv data contains at least 2 records for each epoch . Each epoch has a record with a timestamp for the beginning boundary for that epoch and a record with a timestamp for the ending boundary for that epoch . (e.g. For the morning epoch there is a record with a timestamp for 6:00AM and another record with a timestamp for 11:59:59AM . These are to test edge cases) The wifi_raw.csv data contains 3 records with random timestamps for each epoch to represent visible broadcasting WIFI network. This file is empty for the iOS phone testing data. An option of 10 access point devices is randomly distributed throughout the data records. 5 each for sensor_wifi_raw.csv and wifi_raw.csv . There data files for testing both iOS and Android data formats. There are also additional empty data files for both android and iOS for testing empty data files.","title":"WIFI"},{"location":"developers/test-cases/#light","text":"The raw light data file contains data for 1 day. The raw light data contains 3 or 4 rows of data for each epoch except night . The single row of data for night is for testing features for single values inputs. (Example testing the standard deviation of one input value) Since light is only available for Android there is only one file that contains data for Android. All other files (i.e. for iPhone) are empty data files.","title":"Light"},{"location":"developers/test-cases/#application-foreground","text":"The raw application foreground data file contains data for 1 day. The raw application foreground data contains 7 - 9 rows of data for each epoch . The records for each epoch contains apps that are randomly selected from a list of apps that are from the MULTIPLE_CATEGORIES and SINGLE_CATEGORIES (See testing_config.yaml ). There are also records in each epoch that have apps randomly selected from a list of apps that are from the EXCLUDED_CATEGORIES and EXCLUDED_APPS . This is to test that these apps are actually being excluded from the calculations of features. There are also records to test SINGLE_APPS calculations. Since application foreground is only available for Android there is only one file that contains data for Android. All other files (i.e. for iPhone) are empty data files.","title":"Application Foreground"},{"location":"developers/test-cases/#activity-recognition","text":"The raw Activity Recognition data file contains data for 1 day. The raw Activity Recognition data each epoch period contains rows that records 2 - 5 different activity_types . The is such that durations of activities can be tested. Additionally, there are records that mimic the duration of an activity over the time boundary of neighboring epochs. (For example, there a set of records that mimic the participant in_vehicle from afternoon into evening ) There is one file each with raw Activity Recognition data for testing both iOS and Android data formats. (plugin_google_activity_recognition_raw.csv for android and plugin_ios_activity_recognition_raw.csv for iOS) There is also an additional empty data file for both android and iOS for testing empty data files.","title":"Activity Recognition"},{"location":"developers/test-cases/#conversation","text":"The raw conversation data file contains data for 2 day. The raw conversation data contains records with a sample of both datatypes (i.e. voice/noise = 0 , and conversation = 2 ) as well as rows with for samples of each of the inference values (i.e. silence = 0 , noise = 1 , voice = 2 , and unknown = 3 ) for each epoch . The different datatype and inference records are randomly distributed throughout the epoch . Additionally there are 2 - 5 records for conversations ( datatype = 2, and inference = -1) in each epoch and for each epoch except night, there is a conversation record that has a double_convo_start timestamp that is from the previous epoch . This is to test the calculations of features across epochs . There is a raw conversation data file for both android and iOS platforms ( plugin_studentlife_audio_android_raw.csv and plugin_studentlife_audio_raw.csv respectively). Finally, there are also additional empty data files for both android and iOS for testing empty data files","title":"Conversation"},{"location":"developers/testing/","text":"Testing \u00b6 The following is a simple guide to run RAPIDS\u2019 tests. All files necessary for testing are stored in the ./tests/ directory Steps for Testing \u00b6 Add raw data. Add the raw data to the corresponding sensor CSV file in tests/data/external/aware_csv . Create the CSV if it does not exist. Link raw data. Make sure that you link the new raw data to a participant by using the same device_id in the data and in [DEVICE_IDS] inside their participant file ( tests/data/external/participant_files/testXX.yaml ). Create the participant file if it does not exist, and don\u2019t forget to edit [PIDS] in the config file of the time segments you are testing (see below). For simplicity, we use a participant\u2019s id ( testXX ) as their device_id . Edit the config file. Activate the sensor provider you are testing if it isn\u2019t already. Set [SENSOR][PROVIDER][COMPUTE] to TRUE in the config.yaml of the time segments you are testing: - tests/settings/frequency_config.yaml # For frequency time segments - tests/settings/periodic_config.yaml # For periodic time segments # We have not tested events time segments yet Run the pipeline and tests. You can run all time segments pipelines and their tests tests/scripts/run_tests.sh -t all You can run only the pipeline of a specific time segment and its tests tests/scripts/run_tests.sh -t frequency -a both Or, if you are working on your tests and you want to run a pipeline and its tests independently tests/scripts/run_tests.sh -t frequency -a run tests/scripts/run_tests.sh -t frequency -a test Output example \u00b6 The following is a snippet of the output you should see after running your test. test_sensors_files_exist ( test_sensor_features.TestSensorFeatures ) ... periodic ok test_sensors_features_calculations ( test_sensor_features.TestSensorFeatures ) ... periodic ok test_sensors_files_exist ( test_sensor_features.TestSensorFeatures ) ... frequency ok test_sensors_features_calculations ( test_sensor_features.TestSensorFeatures ) ... frequency FAIL The results above show that the for periodic both test_sensors_files_exist and test_sensors_features_calculations passed while for frequency first test test_sensors_files_exist passed while test_sensors_features_calculations failed. Additionally, you should get the traceback of the failure (not shown here). For more information on how to implement test scripts and use unittest please see Unittest Documentation Testing of the RAPIDS sensors and features is a work-in-progress. Please see Test Cases for a list of sensors and features that have testing currently available. How do we execute the tests? \u00b6 This bash script tests/scripts/run_tests.sh executes one or all pipelines for different time segment types ( frequency , periodic , and events ) as well as their tests (see below). This python script tests/scripts/run_tests.py runs the tests. It parses the involved participants and active sensor providers in the config.yaml file of the time segment type being tested. We test that the output file we expect exists and that its content matches the expected values. Example of raw data for PHONE_APPLICATIONS_FOREGROUND testing 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 \"timestamp\" , \"device_id\" , \"package_name\" , \"application_name\" , \"is_system_app\" 1593946320761 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593961974942 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593958144033 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593947228964 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1593951572326 , \"test01\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1593950554868 , \"test01\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1593964799620 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593974241305 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593969483540 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593977289581 , \"test01\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1593970763367 , \"test01\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1593964867720 , \"test01\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1593974942995 , \"test01\" , \"com.google.android.gm\" , \"Gmail\" , 0 1593986399351 , \"test01\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1594000139073 , \"test01\" , \"com.google.android.gm\" , \"Gmail\" , 0 1593994717099 , \"test01\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1593985854872 , \"test01\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1594003154390 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1594003853415 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593991680045 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1594007999202 , \"test01\" , \"com.aware.plugin.upmc.cancer\" , \"AWARE\" , 0 1593939733998 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1593933324739 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593925161482 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593936918763 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593924155524 , \"test01\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1593922625358 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593943199317 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593951550550 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593981544544 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593999779779 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593933565565 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1602475200000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602475200000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602475200000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602475200000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602475999000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602475999000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602476999000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602476999000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602476999000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602476999000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602477000000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602477000000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602477000000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478000000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602478000000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602478799000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478799000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602478000000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602478799000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478799000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602478799000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602478800000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478800000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602478800000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478800000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602480500000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602480500000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602480500000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602558000000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602558000000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602558000000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602558799000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602558799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602558000000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602558799000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602558799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602559799000 , \"test05\" , \"com.aware.plugin.upmc.cancer\" , \"AWARE\" , 0 1602559799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602559799000 , \"test05\" , \"com.aware.plugin.upmc.cancer\" , \"AWARE\" , 0 1602559799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602559800000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602559800000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602560800000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602560800000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602560800000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602560800000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602561599000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602561599000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602561600000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602561600000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602563400000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602563500000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0","title":"Testing"},{"location":"developers/testing/#testing","text":"The following is a simple guide to run RAPIDS\u2019 tests. All files necessary for testing are stored in the ./tests/ directory","title":"Testing"},{"location":"developers/testing/#steps-for-testing","text":"Add raw data. Add the raw data to the corresponding sensor CSV file in tests/data/external/aware_csv . Create the CSV if it does not exist. Link raw data. Make sure that you link the new raw data to a participant by using the same device_id in the data and in [DEVICE_IDS] inside their participant file ( tests/data/external/participant_files/testXX.yaml ). Create the participant file if it does not exist, and don\u2019t forget to edit [PIDS] in the config file of the time segments you are testing (see below). For simplicity, we use a participant\u2019s id ( testXX ) as their device_id . Edit the config file. Activate the sensor provider you are testing if it isn\u2019t already. Set [SENSOR][PROVIDER][COMPUTE] to TRUE in the config.yaml of the time segments you are testing: - tests/settings/frequency_config.yaml # For frequency time segments - tests/settings/periodic_config.yaml # For periodic time segments # We have not tested events time segments yet Run the pipeline and tests. You can run all time segments pipelines and their tests tests/scripts/run_tests.sh -t all You can run only the pipeline of a specific time segment and its tests tests/scripts/run_tests.sh -t frequency -a both Or, if you are working on your tests and you want to run a pipeline and its tests independently tests/scripts/run_tests.sh -t frequency -a run tests/scripts/run_tests.sh -t frequency -a test","title":"Steps for Testing"},{"location":"developers/testing/#output-example","text":"The following is a snippet of the output you should see after running your test. test_sensors_files_exist ( test_sensor_features.TestSensorFeatures ) ... periodic ok test_sensors_features_calculations ( test_sensor_features.TestSensorFeatures ) ... periodic ok test_sensors_files_exist ( test_sensor_features.TestSensorFeatures ) ... frequency ok test_sensors_features_calculations ( test_sensor_features.TestSensorFeatures ) ... frequency FAIL The results above show that the for periodic both test_sensors_files_exist and test_sensors_features_calculations passed while for frequency first test test_sensors_files_exist passed while test_sensors_features_calculations failed. Additionally, you should get the traceback of the failure (not shown here). For more information on how to implement test scripts and use unittest please see Unittest Documentation Testing of the RAPIDS sensors and features is a work-in-progress. Please see Test Cases for a list of sensors and features that have testing currently available.","title":"Output example"},{"location":"developers/testing/#how-do-we-execute-the-tests","text":"This bash script tests/scripts/run_tests.sh executes one or all pipelines for different time segment types ( frequency , periodic , and events ) as well as their tests (see below). This python script tests/scripts/run_tests.py runs the tests. It parses the involved participants and active sensor providers in the config.yaml file of the time segment type being tested. We test that the output file we expect exists and that its content matches the expected values. Example of raw data for PHONE_APPLICATIONS_FOREGROUND testing 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 \"timestamp\" , \"device_id\" , \"package_name\" , \"application_name\" , \"is_system_app\" 1593946320761 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593961974942 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593958144033 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593947228964 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1593951572326 , \"test01\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1593950554868 , \"test01\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1593964799620 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593974241305 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593969483540 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593977289581 , \"test01\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1593970763367 , \"test01\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1593964867720 , \"test01\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1593974942995 , \"test01\" , \"com.google.android.gm\" , \"Gmail\" , 0 1593986399351 , \"test01\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1594000139073 , \"test01\" , \"com.google.android.gm\" , \"Gmail\" , 0 1593994717099 , \"test01\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1593985854872 , \"test01\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1594003154390 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1594003853415 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593991680045 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1594007999202 , \"test01\" , \"com.aware.plugin.upmc.cancer\" , \"AWARE\" , 0 1593939733998 , \"test01\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1593933324739 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593925161482 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593936918763 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593924155524 , \"test01\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1593922625358 , \"test01\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1593943199317 , \"test01\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1593951550550 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593981544544 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593999779779 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1593933565565 , \"test01\" , \"com.google.android.gms\" , \"Google\" , 1 1602475200000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602475200000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602475200000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602475200000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602475999000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602475999000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602476999000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602476999000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602476999000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602476999000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602477000000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602477000000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602477000000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478000000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602478000000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602478799000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478799000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602478000000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602478799000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478799000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602478799000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602478800000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478800000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602478800000 , \"test05\" , \"com.google.android.gm\" , \"Gmail\" , 0 1602478800000 , \"test05\" , \"com.fitbit.FitbitMobile\" , \"Fitbit\" , 0 1602480500000 , \"test05\" , \"com.netflix.mediaclient\" , \"Netflix\" , 0 1602480500000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602480500000 , \"test05\" , \"com.supercell.clashofclans\" , \"Clash of Clans\" , 0 1602558000000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602558000000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602558000000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602558799000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602558799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602558000000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602558799000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602558799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602559799000 , \"test05\" , \"com.aware.plugin.upmc.cancer\" , \"AWARE\" , 0 1602559799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602559799000 , \"test05\" , \"com.aware.plugin.upmc.cancer\" , \"AWARE\" , 0 1602559799000 , \"test05\" , \"com.google.android.apps.youtube.creator\" , \"Youtube Video Creater\" , 0 1602559800000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602559800000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602560800000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602560800000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602560800000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602560800000 , \"test05\" , \"com.google.android.youtube\" , \"Youtube\" , 0 1602561599000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0 1602561599000 , \"test05\" , \"tv.twitch.android.app\" , \"Twitch\" , 0 1602561600000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602561600000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602563400000 , \"test05\" , \"com.google.android.gms\" , \"Google\" , 1 1602563500000 , \"test05\" , \"com.facebook.moments\" , \"Facebook Moments\" , 0","title":"How do we execute the tests?"},{"location":"developers/validation-schema-config/","text":"Validation schema of config.yaml \u00b6 Why do we need to validate the config.yaml ? Most of the key/values in the config.yaml are constrained to a set of possible values or types. For example [TIME_SEGMENTS][TYPE] can only be one of [\"FREQUENCY\", \"PERIODIC\", \"EVENT\"] , and [TIMEZONE] has to be a string. We should show the user an error if that\u2019s not the case. We could validate this in Python or R but since we reuse scripts and keys in multiple places, tracking these validations can be time consuming and get out of control. Thus, we do these validations through a schema and check that schema before RAPIDS starts processing any data so the user can see the error right away. Keep in mind these validations can only cover certain base cases. Some validations that require more complex logic should still be done in the respective script. For example, we can check that a CSV file path actually ends in .csv but we can only check that the file actually exists in a Python script. The structure and values of the config.yaml file are validated using a YAML schema stored in tools/config.schema.yaml . Each key in config.yaml , for example PIDS , has a corresponding entry in the schema where we can validate its type, possible values, required properties, min and max values, among other things. The config.yaml is validated against the schema every time RAPIDS runs (see the top of the Snakefile ): validate ( config , \"tools/config.schema.yaml\" ) Structure of the schema \u00b6 The schema has three main sections required , definitions , and properties . All of them are just nested key/value YAML pairs, where the value can be a primitive type ( integer , string , boolean , number ) or can be another key/value pair ( object ). required \u00b6 required lists properties that should be present in the config.yaml . We will almost always add every config.yaml key to this list (meaning that the user cannot delete any of those keys like TIMEZONE or PIDS ). definitions \u00b6 definitions lists key/values that are common to different properties so we can reuse them. You can define a key/value under definitions and use $ref to refer to it in any property . For example, every sensor like [PHONE_ACCELEROMETER] has one or more providers like RAPIDS and PANDA , these providers have some common properties like the COMPUTE flag or the SRC_SCRIPT string. Therefore we define a shared provider \u201ctemplate\u201d that is used by every provider and extended with properties exclusive to each one of them. For example: provider definition (template) The PROVIDER definition will be used later on different properties . PROVIDER : type : object required : [ COMPUTE , SRC_SCRIPT , FEATURES ] properties : COMPUTE : type : boolean FEATURES : type : [ array , object ] SRC_SCRIPT : type : string pattern : \"^.*\\\\.(py|R)$\" provider reusing and extending the template Notice that RAPIDS (a provider) uses and extends the PROVIDER template in this example. The FEATURES key is overriding the FEATURES key from the #/definitions/PROVIDER template but is keeping the validation for COMPUTE , and SRC_SCRIPT . For more details about reusing properties, go to this link PHONE_ACCELEROMETER : type : object # .. other properties PROVIDERS : type : [ \"null\" , object ] properties : RAPIDS : allOf : - $ref : \"#/definitions/PROVIDER\" - properties : FEATURES : type : array uniqueItems : True items : type : string enum : [ \"maxmagnitude\" , \"minmagnitude\" , \"avgmagnitude\" , \"medianmagnitude\" , \"stdmagnitude\" ] properties \u00b6 properties are nested key/values that describe the different components of our config.yaml file. Values can be of one or more primitive types like string , number , array , boolean and null . Values can also be another key/value pair (of type object ) that are similar to a dictionary in Python. For example, the following property validates the PIDS of our config.yaml . It checks that PIDS is an array with unique items of type string . PIDS : type : array uniqueItems : True items : type : string Modifying the schema \u00b6 Validating the config.yaml during development If you updated the schema and want to check the config.yaml is compliant, you can run the command snakemake --list-params-changes . You will see Building DAG of jobs... if there are no problems or an error message otherwise (try setting any COMPUTE flag to a string like test instead of False/True ). You can use this command without having to configure RAPIDS to process any participants or sensors. You can validate different aspects of each key/value in our config.yaml file: number/integer Including min and max values MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS : type : number minimum : 0 maximum : 1 FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD : type : integer exclusiveMinimum : 0 string Including valid values ( enum ) items : type : string enum : [ \"count\" , \"maxlux\" , \"minlux\" , \"avglux\" , \"medianlux\" , \"stdlux\" ] boolean MINUTES_DATA_USED : type : boolean array Including whether or not it should have unique values, the type of the array\u2019s elements ( strings , numbers ) and valid values ( enum ). MESSAGES_TYPES : type : array uniqueItems : True items : type : string enum : [ \"received\" , \"sent\" ] object PARENT is an object that has two properties. KID1 is one of those properties that are, in turn, another object that will reuse the \"#/definitions/PROVIDER\" definition AND also include (extend) two extra properties GRAND_KID1 of type array and GRAND_KID2 of type number . KID2 is another property of PARENT of type boolean . The schema validation looks like this PARENT : type : object properties : KID1 : allOf : - $ref : \"#/definitions/PROVIDER\" - properties : GRAND_KID1 : type : array uniqueItems : True GRAND_KID2 : type : number KID2 : type : boolean The config.yaml key that the previous schema validates looks like this: PARENT : KID1 : # These four come from the `PROVIDER` definition (template) COMPUTE : False FEATURES : [ x , y ] # an array SRC_SCRIPT : \"a path to a py or R script\" # This two come from the extension GRAND_KID1 : [ a , b ] # an array GRAND_KID2 : 5.1 # an number KID2 : True # a boolean Verifying the schema is correct \u00b6 We recommend that before you start modifying the schema you modify the config.yaml key that you want to validate with an invalid value. For example, if you want to validate that COMPUTE is boolean, you set COMPUTE: 123 . Then create your validation, run snakemake --list-params-changes and make sure your validation fails (123 is not boolean ), and then set the key to the correct value. In other words, make sure it\u2019s broken first so that you know that your validation works. Warning Be careful . You can check that the schema config.schema.yaml has a valid format by running python tools/check_schema.py . You will see this message if its structure is correct: Schema is OK . However, we don\u2019t have a way to detect typos, for example allOf will work but allOF won\u2019t (capital F ) and it won\u2019t show any error. That\u2019s why we recommend to start with an invalid key/value in your config.yaml so that you can be sure the schema validation finds the problem. Useful resources \u00b6 Read the following links to learn more about what we can validate with schemas. They are based on JSON instead of YAML schemas but the same concepts apply. Understanding JSON Schemas Specification of the JSON schema we use","title":"Validation schema of config.yaml"},{"location":"developers/validation-schema-config/#validation-schema-of-configyaml","text":"Why do we need to validate the config.yaml ? Most of the key/values in the config.yaml are constrained to a set of possible values or types. For example [TIME_SEGMENTS][TYPE] can only be one of [\"FREQUENCY\", \"PERIODIC\", \"EVENT\"] , and [TIMEZONE] has to be a string. We should show the user an error if that\u2019s not the case. We could validate this in Python or R but since we reuse scripts and keys in multiple places, tracking these validations can be time consuming and get out of control. Thus, we do these validations through a schema and check that schema before RAPIDS starts processing any data so the user can see the error right away. Keep in mind these validations can only cover certain base cases. Some validations that require more complex logic should still be done in the respective script. For example, we can check that a CSV file path actually ends in .csv but we can only check that the file actually exists in a Python script. The structure and values of the config.yaml file are validated using a YAML schema stored in tools/config.schema.yaml . Each key in config.yaml , for example PIDS , has a corresponding entry in the schema where we can validate its type, possible values, required properties, min and max values, among other things. The config.yaml is validated against the schema every time RAPIDS runs (see the top of the Snakefile ): validate ( config , \"tools/config.schema.yaml\" )","title":"Validation schema of config.yaml"},{"location":"developers/validation-schema-config/#structure-of-the-schema","text":"The schema has three main sections required , definitions , and properties . All of them are just nested key/value YAML pairs, where the value can be a primitive type ( integer , string , boolean , number ) or can be another key/value pair ( object ).","title":"Structure of the schema"},{"location":"developers/validation-schema-config/#required","text":"required lists properties that should be present in the config.yaml . We will almost always add every config.yaml key to this list (meaning that the user cannot delete any of those keys like TIMEZONE or PIDS ).","title":"required"},{"location":"developers/validation-schema-config/#definitions","text":"definitions lists key/values that are common to different properties so we can reuse them. You can define a key/value under definitions and use $ref to refer to it in any property . For example, every sensor like [PHONE_ACCELEROMETER] has one or more providers like RAPIDS and PANDA , these providers have some common properties like the COMPUTE flag or the SRC_SCRIPT string. Therefore we define a shared provider \u201ctemplate\u201d that is used by every provider and extended with properties exclusive to each one of them. For example: provider definition (template) The PROVIDER definition will be used later on different properties . PROVIDER : type : object required : [ COMPUTE , SRC_SCRIPT , FEATURES ] properties : COMPUTE : type : boolean FEATURES : type : [ array , object ] SRC_SCRIPT : type : string pattern : \"^.*\\\\.(py|R)$\" provider reusing and extending the template Notice that RAPIDS (a provider) uses and extends the PROVIDER template in this example. The FEATURES key is overriding the FEATURES key from the #/definitions/PROVIDER template but is keeping the validation for COMPUTE , and SRC_SCRIPT . For more details about reusing properties, go to this link PHONE_ACCELEROMETER : type : object # .. other properties PROVIDERS : type : [ \"null\" , object ] properties : RAPIDS : allOf : - $ref : \"#/definitions/PROVIDER\" - properties : FEATURES : type : array uniqueItems : True items : type : string enum : [ \"maxmagnitude\" , \"minmagnitude\" , \"avgmagnitude\" , \"medianmagnitude\" , \"stdmagnitude\" ]","title":"definitions"},{"location":"developers/validation-schema-config/#properties","text":"properties are nested key/values that describe the different components of our config.yaml file. Values can be of one or more primitive types like string , number , array , boolean and null . Values can also be another key/value pair (of type object ) that are similar to a dictionary in Python. For example, the following property validates the PIDS of our config.yaml . It checks that PIDS is an array with unique items of type string . PIDS : type : array uniqueItems : True items : type : string","title":"properties"},{"location":"developers/validation-schema-config/#modifying-the-schema","text":"Validating the config.yaml during development If you updated the schema and want to check the config.yaml is compliant, you can run the command snakemake --list-params-changes . You will see Building DAG of jobs... if there are no problems or an error message otherwise (try setting any COMPUTE flag to a string like test instead of False/True ). You can use this command without having to configure RAPIDS to process any participants or sensors. You can validate different aspects of each key/value in our config.yaml file: number/integer Including min and max values MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS : type : number minimum : 0 maximum : 1 FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD : type : integer exclusiveMinimum : 0 string Including valid values ( enum ) items : type : string enum : [ \"count\" , \"maxlux\" , \"minlux\" , \"avglux\" , \"medianlux\" , \"stdlux\" ] boolean MINUTES_DATA_USED : type : boolean array Including whether or not it should have unique values, the type of the array\u2019s elements ( strings , numbers ) and valid values ( enum ). MESSAGES_TYPES : type : array uniqueItems : True items : type : string enum : [ \"received\" , \"sent\" ] object PARENT is an object that has two properties. KID1 is one of those properties that are, in turn, another object that will reuse the \"#/definitions/PROVIDER\" definition AND also include (extend) two extra properties GRAND_KID1 of type array and GRAND_KID2 of type number . KID2 is another property of PARENT of type boolean . The schema validation looks like this PARENT : type : object properties : KID1 : allOf : - $ref : \"#/definitions/PROVIDER\" - properties : GRAND_KID1 : type : array uniqueItems : True GRAND_KID2 : type : number KID2 : type : boolean The config.yaml key that the previous schema validates looks like this: PARENT : KID1 : # These four come from the `PROVIDER` definition (template) COMPUTE : False FEATURES : [ x , y ] # an array SRC_SCRIPT : \"a path to a py or R script\" # This two come from the extension GRAND_KID1 : [ a , b ] # an array GRAND_KID2 : 5.1 # an number KID2 : True # a boolean","title":"Modifying the schema"},{"location":"developers/validation-schema-config/#verifying-the-schema-is-correct","text":"We recommend that before you start modifying the schema you modify the config.yaml key that you want to validate with an invalid value. For example, if you want to validate that COMPUTE is boolean, you set COMPUTE: 123 . Then create your validation, run snakemake --list-params-changes and make sure your validation fails (123 is not boolean ), and then set the key to the correct value. In other words, make sure it\u2019s broken first so that you know that your validation works. Warning Be careful . You can check that the schema config.schema.yaml has a valid format by running python tools/check_schema.py . You will see this message if its structure is correct: Schema is OK . However, we don\u2019t have a way to detect typos, for example allOf will work but allOF won\u2019t (capital F ) and it won\u2019t show any error. That\u2019s why we recommend to start with an invalid key/value in your config.yaml so that you can be sure the schema validation finds the problem.","title":"Verifying the schema is correct"},{"location":"developers/validation-schema-config/#useful-resources","text":"Read the following links to learn more about what we can validate with schemas. They are based on JSON instead of YAML schemas but the same concepts apply. Understanding JSON Schemas Specification of the JSON schema we use","title":"Useful resources"},{"location":"developers/virtual-environments/","text":"Python Virtual Environment \u00b6 Add new packages \u00b6 Try to install any new package using conda install -c CHANNEL PACKAGE_NAME (you can use pip if the package is only available there). Make sure your Python virtual environment is active ( conda activate YOUR_ENV ). Remove packages \u00b6 Uninstall packages using the same manager you used to install them conda remove PACKAGE_NAME or pip uninstall PACKAGE_NAME Updating all packages \u00b6 Make sure your Python virtual environment is active ( conda activate YOUR_ENV ), then run conda update --all Update your conda environment.yaml \u00b6 After installing or removing a package you can use the following command in your terminal to update your environment.yaml before publishing your pipeline. Note that we ignore the package version for libfortran and mkl to keep compatibility with Linux: conda env export --no-builds | sed 's/^.*libgfortran.*$/ - libgfortran/' | sed 's/^.*mkl=.*$/ - mkl/' > environment.yml R Virtual Environment \u00b6 Add new packages \u00b6 Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::install(\"PACKAGE_NAME\") Remove packages \u00b6 Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::remove(\"PACKAGE_NAME\") Updating all packages \u00b6 Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::update() Update your R renv.lock \u00b6 After installing or removing a package you can use the following command in your terminal to update your renv.lock before publishing your pipeline. Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::snapshot() (renv will ask you to confirm any updates to this file)","title":"Virtual Environments"},{"location":"developers/virtual-environments/#python-virtual-environment","text":"","title":"Python Virtual Environment"},{"location":"developers/virtual-environments/#add-new-packages","text":"Try to install any new package using conda install -c CHANNEL PACKAGE_NAME (you can use pip if the package is only available there). Make sure your Python virtual environment is active ( conda activate YOUR_ENV ).","title":"Add new packages"},{"location":"developers/virtual-environments/#remove-packages","text":"Uninstall packages using the same manager you used to install them conda remove PACKAGE_NAME or pip uninstall PACKAGE_NAME","title":"Remove packages"},{"location":"developers/virtual-environments/#updating-all-packages","text":"Make sure your Python virtual environment is active ( conda activate YOUR_ENV ), then run conda update --all","title":"Updating all packages"},{"location":"developers/virtual-environments/#update-your-conda-environmentyaml","text":"After installing or removing a package you can use the following command in your terminal to update your environment.yaml before publishing your pipeline. Note that we ignore the package version for libfortran and mkl to keep compatibility with Linux: conda env export --no-builds | sed 's/^.*libgfortran.*$/ - libgfortran/' | sed 's/^.*mkl=.*$/ - mkl/' > environment.yml","title":"Update your conda environment.yaml"},{"location":"developers/virtual-environments/#r-virtual-environment","text":"","title":"R Virtual Environment"},{"location":"developers/virtual-environments/#add-new-packages_1","text":"Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::install(\"PACKAGE_NAME\")","title":"Add new packages"},{"location":"developers/virtual-environments/#remove-packages_1","text":"Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::remove(\"PACKAGE_NAME\")","title":"Remove packages"},{"location":"developers/virtual-environments/#updating-all-packages_1","text":"Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::update()","title":"Updating all packages"},{"location":"developers/virtual-environments/#update-your-r-renvlock","text":"After installing or removing a package you can use the following command in your terminal to update your renv.lock before publishing your pipeline. Open your terminal and navigate to RAPIDS\u2019 root folder Run R to open an R interactive session Run renv::snapshot() (renv will ask you to confirm any updates to this file)","title":"Update your R renv.lock"},{"location":"features/add-new-features/","text":"Add New Features \u00b6 Hint We recommend reading the Behavioral Features Introduction before reading this page. You can implement new features in Python or R scripts. You won\u2019t have to deal with time zones, dates, times, data cleaning, or preprocessing. The data that RAPIDS pipes to your feature extraction code are ready to process. New Features for Existing Sensors \u00b6 You can add new features to any existing sensors (see list below) by adding a new provider in three steps: Modify the config.yaml file Create your feature provider script Implement your features extraction code As a tutorial, we will add a new provider for PHONE_ACCELEROMETER called VEGA that extracts feature1 , feature2 , feature3 with a Python script that requires a parameter from the user called MY_PARAMETER . Existing Sensors An existing sensor of any device with a configuration entry in config.yaml : Smartphone (AWARE) Phone Accelerometer Phone Activity Recognition Phone Applications Crashes Phone Applications Foreground Phone Applications Notifications Phone Battery Phone Bluetooth Phone Calls Phone Conversation Phone Data Yield Phone Keyboard Phone Light Phone Locations Phone Log Phone Messages Phone Screen Phone WiFI Connected Phone WiFI Visible Fitbit Fitbit Data Yield Fitbit Heart Rate Summary Fitbit Heart Rate Intraday Fitbit Sleep Summary Fitbit Sleep Intraday Fitbit Steps Summary Fitbit Steps Intraday Empatica Empatica Accelerometer Empatica Heart Rate Empatica Temperature Empatica Electrodermal Activity Empatica Blood Volume Pulse Empatica Inter Beat Interval Empatica Tags Modify the config.yaml file \u00b6 In this step, you need to add your provider configuration section under the relevant sensor in config.yaml . See our example for our tutorial\u2019s VEGA provider for PHONE_ACCELEROMETER : Example configuration for a new accelerometer provider VEGA PHONE_ACCELEROMETER : CONTAINER : accelerometer PROVIDERS : RAPIDS : # this is a feature provider COMPUTE : False ... PANDA : # this is another feature provider COMPUTE : False ... VEGA : # this is our new feature provider COMPUTE : False FEATURES : [ \"feature1\" , \"feature2\" , \"feature3\" ] MY_PARAMTER : a_string SRC_SCRIPT : src/features/phone_accelerometer/vega/main.py Key Description [COMPUTE] Flag to activate/deactivate your provider [FEATURES] List of features your provider supports. Your provider code should only return the features on this list [MY_PARAMTER] An arbitrary parameter that our example provider VEGA needs. This can be a boolean, integer, float, string, or an array of any of such types. [SRC_SCRIPT] The relative path from RAPIDS\u2019 root folder to an script that computes the features for this provider. It can be implemented in R or Python. Create a feature provider script \u00b6 Create your feature Python or R script called main.py or main.R in the correct folder, src/feature/[sensorname]/[providername]/ . RAPIDS automatically loads and executes it based on the config key [SRC_SCRIPT] you added in the last step. For our example, this script is: src/feature/phone_accelerometer/vega/main.py Implement your feature extraction code \u00b6 Every feature script ( main.[py|R] ) needs a [providername]_features function with specific parameters. RAPIDS calls this function with the sensor data ready to process and with other functions and arguments you will need. Python function def [ providername ] _features ( sensor_data_files , time_segment , provider , filter_data_by_segment , * args , ** kwargs ): # empty for now return ( your_features_df ) R function [ providername ] _ features <- function ( sensor_data , time_segment , provider ){ # empty for now return ( your_features_df ) } Parameter Description sensor_data_files Path to the CSV file containing the data of a single participant. This data has been cleaned and preprocessed. Your function will be automatically called for each participant in your study (in the [PIDS] array in config.yaml ) time_segment The label of the time segment that should be processed. provider The parameters you configured for your provider in config.yaml will be available in this variable as a dictionary in Python or a list in R. In our example this dictionary contains {MY_PARAMETER:\"a_string\"} filter_data_by_segment Python only. A function that you will use to filter your data. In R this function is already available in the environment. *args Python only. Not used for now **kwargs Python only. Not used for now The next step is to implement the code that computes your behavioral features in your provider script\u2019s function. As with any other script, this function can call other auxiliary methods, but in general terms, it should have three stages: 1. Read a participant\u2019s data by loading the CSV data stored in the file pointed by sensor_data_files acc_data = pd . read_csv ( sensor_data_files [ \"sensor_data\" ]) Note that the phone\u2019s battery, screen, and activity recognition data are given as episodes instead of event rows (for example, start and end timestamps of the periods the phone screen was on) 2. Filter your data to process only those rows that belong to time_segment This step is only one line of code, but keep reading to understand why we need it. acc_data = filter_data_by_segment ( acc_data , time_segment ) You should use the filter_data_by_segment() function to process and group those rows that belong to each of the time segments RAPIDS could be configured with . Let\u2019s understand the filter_data_by_segment() function with an example. A RAPIDS user can extract features on any arbitrary time segment . A time segment is a period that has a label and one or more instances. For example, the user (or you) could have requested features on a daily, weekly, and weekend basis for p01 . The labels are arbitrary, and the instances depend on the days a participant was monitored for: the daily segment could be named my_days and if p01 was monitored for 14 days, it would have 14 instances the weekly segment could be named my_weeks and if p01 was monitored for 14 days, it would have 2 instances. the weekend segment could be named my_weekends and if p01 was monitored for 14 days, it would have 2 instances. For this example, RAPIDS will call your provider function three times for p01 , once where time_segment is my_days , once where time_segment is my_weeks , and once where time_segment is my_weekends . In this example, not every row in p01 \u2018s data needs to take part in the feature computation for either segment and the rows need to be grouped differently. Thus filter_data_by_segment() comes in handy, it will return a data frame that contains the rows that were logged during a time segment plus an extra column called local_segment . This new column will have as many unique values as time segment instances exist (14, 2, and 2 for our p01 \u2018s my_days , my_weeks , and my_weekends examples). After filtering, you should group the data frame by this column and compute any desired features , for example: acc_features [ \"maxmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . max () The reason RAPIDS does not filter the participant\u2019s data set for you is because your code might need to compute something based on a participant\u2019s complete dataset before computing their features. For example, you might want to identify the number that called a participant the most throughout the study before computing a feature with the number of calls the participant received from that number. 3. Return a data frame with your features After filtering, grouping your data, and computing your features, your provider function should return a data frame that has: One row per time segment instance (e.g., 14 our p01 \u2018s my_days example) The local_segment column added by filter_data_by_segment() One column per feature. The name of your features should only contain letters or numbers ( feature1 ) by convention. RAPIDS automatically adds the correct sensor and provider prefix; in our example, this prefix is phone_accelerometr_vega_ . PHONE_ACCELEROMETER Provider Example For your reference, this our own provider ( RAPIDS ) for PHONE_ACCELEROMETER that computes five acceleration features import pandas as pd import numpy as np def rapids_features ( sensor_data_files , time_segment , provider , filter_data_by_segment , * args , ** kwargs ): acc_data = pd . read_csv ( sensor_data_files [ \"sensor_data\" ]) requested_features = provider [ \"FEATURES\" ] # name of the features this function can compute base_features_names = [ \"maxmagnitude\" , \"minmagnitude\" , \"avgmagnitude\" , \"medianmagnitude\" , \"stdmagnitude\" ] # the subset of requested features this function can compute features_to_compute = list ( set ( requested_features ) & set ( base_features_names )) acc_features = pd . DataFrame ( columns = [ \"local_segment\" ] + features_to_compute ) if not acc_data . empty : acc_data = filter_data_by_segment ( acc_data , time_segment ) if not acc_data . empty : acc_features = pd . DataFrame () # get magnitude related features: magnitude = sqrt(x^2+y^2+z^2) magnitude = acc_data . apply ( lambda row : np . sqrt ( row [ \"double_values_0\" ] ** 2 + row [ \"double_values_1\" ] ** 2 + row [ \"double_values_2\" ] ** 2 ), axis = 1 ) acc_data = acc_data . assign ( magnitude = magnitude . values ) if \"maxmagnitude\" in features_to_compute : acc_features [ \"maxmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . max () if \"minmagnitude\" in features_to_compute : acc_features [ \"minmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . min () if \"avgmagnitude\" in features_to_compute : acc_features [ \"avgmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . mean () if \"medianmagnitude\" in features_to_compute : acc_features [ \"medianmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . median () if \"stdmagnitude\" in features_to_compute : acc_features [ \"stdmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . std () acc_features = acc_features . reset_index () return acc_features New Features for Non-Existing Sensors \u00b6 If you want to add features for a device or a sensor that we do not support at the moment (those that do not appear in the \"Existing Sensors\" list above), contact us or request it on Slack and we can add the necessary code so you can follow the instructions above.","title":"Add New Features"},{"location":"features/add-new-features/#add-new-features","text":"Hint We recommend reading the Behavioral Features Introduction before reading this page. You can implement new features in Python or R scripts. You won\u2019t have to deal with time zones, dates, times, data cleaning, or preprocessing. The data that RAPIDS pipes to your feature extraction code are ready to process.","title":"Add New Features"},{"location":"features/add-new-features/#new-features-for-existing-sensors","text":"You can add new features to any existing sensors (see list below) by adding a new provider in three steps: Modify the config.yaml file Create your feature provider script Implement your features extraction code As a tutorial, we will add a new provider for PHONE_ACCELEROMETER called VEGA that extracts feature1 , feature2 , feature3 with a Python script that requires a parameter from the user called MY_PARAMETER . Existing Sensors An existing sensor of any device with a configuration entry in config.yaml : Smartphone (AWARE) Phone Accelerometer Phone Activity Recognition Phone Applications Crashes Phone Applications Foreground Phone Applications Notifications Phone Battery Phone Bluetooth Phone Calls Phone Conversation Phone Data Yield Phone Keyboard Phone Light Phone Locations Phone Log Phone Messages Phone Screen Phone WiFI Connected Phone WiFI Visible Fitbit Fitbit Data Yield Fitbit Heart Rate Summary Fitbit Heart Rate Intraday Fitbit Sleep Summary Fitbit Sleep Intraday Fitbit Steps Summary Fitbit Steps Intraday Empatica Empatica Accelerometer Empatica Heart Rate Empatica Temperature Empatica Electrodermal Activity Empatica Blood Volume Pulse Empatica Inter Beat Interval Empatica Tags","title":"New Features for Existing Sensors"},{"location":"features/add-new-features/#modify-the-configyaml-file","text":"In this step, you need to add your provider configuration section under the relevant sensor in config.yaml . See our example for our tutorial\u2019s VEGA provider for PHONE_ACCELEROMETER : Example configuration for a new accelerometer provider VEGA PHONE_ACCELEROMETER : CONTAINER : accelerometer PROVIDERS : RAPIDS : # this is a feature provider COMPUTE : False ... PANDA : # this is another feature provider COMPUTE : False ... VEGA : # this is our new feature provider COMPUTE : False FEATURES : [ \"feature1\" , \"feature2\" , \"feature3\" ] MY_PARAMTER : a_string SRC_SCRIPT : src/features/phone_accelerometer/vega/main.py Key Description [COMPUTE] Flag to activate/deactivate your provider [FEATURES] List of features your provider supports. Your provider code should only return the features on this list [MY_PARAMTER] An arbitrary parameter that our example provider VEGA needs. This can be a boolean, integer, float, string, or an array of any of such types. [SRC_SCRIPT] The relative path from RAPIDS\u2019 root folder to an script that computes the features for this provider. It can be implemented in R or Python.","title":"Modify the config.yaml file"},{"location":"features/add-new-features/#create-a-feature-provider-script","text":"Create your feature Python or R script called main.py or main.R in the correct folder, src/feature/[sensorname]/[providername]/ . RAPIDS automatically loads and executes it based on the config key [SRC_SCRIPT] you added in the last step. For our example, this script is: src/feature/phone_accelerometer/vega/main.py","title":"Create a feature provider script"},{"location":"features/add-new-features/#implement-your-feature-extraction-code","text":"Every feature script ( main.[py|R] ) needs a [providername]_features function with specific parameters. RAPIDS calls this function with the sensor data ready to process and with other functions and arguments you will need. Python function def [ providername ] _features ( sensor_data_files , time_segment , provider , filter_data_by_segment , * args , ** kwargs ): # empty for now return ( your_features_df ) R function [ providername ] _ features <- function ( sensor_data , time_segment , provider ){ # empty for now return ( your_features_df ) } Parameter Description sensor_data_files Path to the CSV file containing the data of a single participant. This data has been cleaned and preprocessed. Your function will be automatically called for each participant in your study (in the [PIDS] array in config.yaml ) time_segment The label of the time segment that should be processed. provider The parameters you configured for your provider in config.yaml will be available in this variable as a dictionary in Python or a list in R. In our example this dictionary contains {MY_PARAMETER:\"a_string\"} filter_data_by_segment Python only. A function that you will use to filter your data. In R this function is already available in the environment. *args Python only. Not used for now **kwargs Python only. Not used for now The next step is to implement the code that computes your behavioral features in your provider script\u2019s function. As with any other script, this function can call other auxiliary methods, but in general terms, it should have three stages: 1. Read a participant\u2019s data by loading the CSV data stored in the file pointed by sensor_data_files acc_data = pd . read_csv ( sensor_data_files [ \"sensor_data\" ]) Note that the phone\u2019s battery, screen, and activity recognition data are given as episodes instead of event rows (for example, start and end timestamps of the periods the phone screen was on) 2. Filter your data to process only those rows that belong to time_segment This step is only one line of code, but keep reading to understand why we need it. acc_data = filter_data_by_segment ( acc_data , time_segment ) You should use the filter_data_by_segment() function to process and group those rows that belong to each of the time segments RAPIDS could be configured with . Let\u2019s understand the filter_data_by_segment() function with an example. A RAPIDS user can extract features on any arbitrary time segment . A time segment is a period that has a label and one or more instances. For example, the user (or you) could have requested features on a daily, weekly, and weekend basis for p01 . The labels are arbitrary, and the instances depend on the days a participant was monitored for: the daily segment could be named my_days and if p01 was monitored for 14 days, it would have 14 instances the weekly segment could be named my_weeks and if p01 was monitored for 14 days, it would have 2 instances. the weekend segment could be named my_weekends and if p01 was monitored for 14 days, it would have 2 instances. For this example, RAPIDS will call your provider function three times for p01 , once where time_segment is my_days , once where time_segment is my_weeks , and once where time_segment is my_weekends . In this example, not every row in p01 \u2018s data needs to take part in the feature computation for either segment and the rows need to be grouped differently. Thus filter_data_by_segment() comes in handy, it will return a data frame that contains the rows that were logged during a time segment plus an extra column called local_segment . This new column will have as many unique values as time segment instances exist (14, 2, and 2 for our p01 \u2018s my_days , my_weeks , and my_weekends examples). After filtering, you should group the data frame by this column and compute any desired features , for example: acc_features [ \"maxmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . max () The reason RAPIDS does not filter the participant\u2019s data set for you is because your code might need to compute something based on a participant\u2019s complete dataset before computing their features. For example, you might want to identify the number that called a participant the most throughout the study before computing a feature with the number of calls the participant received from that number. 3. Return a data frame with your features After filtering, grouping your data, and computing your features, your provider function should return a data frame that has: One row per time segment instance (e.g., 14 our p01 \u2018s my_days example) The local_segment column added by filter_data_by_segment() One column per feature. The name of your features should only contain letters or numbers ( feature1 ) by convention. RAPIDS automatically adds the correct sensor and provider prefix; in our example, this prefix is phone_accelerometr_vega_ . PHONE_ACCELEROMETER Provider Example For your reference, this our own provider ( RAPIDS ) for PHONE_ACCELEROMETER that computes five acceleration features import pandas as pd import numpy as np def rapids_features ( sensor_data_files , time_segment , provider , filter_data_by_segment , * args , ** kwargs ): acc_data = pd . read_csv ( sensor_data_files [ \"sensor_data\" ]) requested_features = provider [ \"FEATURES\" ] # name of the features this function can compute base_features_names = [ \"maxmagnitude\" , \"minmagnitude\" , \"avgmagnitude\" , \"medianmagnitude\" , \"stdmagnitude\" ] # the subset of requested features this function can compute features_to_compute = list ( set ( requested_features ) & set ( base_features_names )) acc_features = pd . DataFrame ( columns = [ \"local_segment\" ] + features_to_compute ) if not acc_data . empty : acc_data = filter_data_by_segment ( acc_data , time_segment ) if not acc_data . empty : acc_features = pd . DataFrame () # get magnitude related features: magnitude = sqrt(x^2+y^2+z^2) magnitude = acc_data . apply ( lambda row : np . sqrt ( row [ \"double_values_0\" ] ** 2 + row [ \"double_values_1\" ] ** 2 + row [ \"double_values_2\" ] ** 2 ), axis = 1 ) acc_data = acc_data . assign ( magnitude = magnitude . values ) if \"maxmagnitude\" in features_to_compute : acc_features [ \"maxmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . max () if \"minmagnitude\" in features_to_compute : acc_features [ \"minmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . min () if \"avgmagnitude\" in features_to_compute : acc_features [ \"avgmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . mean () if \"medianmagnitude\" in features_to_compute : acc_features [ \"medianmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . median () if \"stdmagnitude\" in features_to_compute : acc_features [ \"stdmagnitude\" ] = acc_data . groupby ([ \"local_segment\" ])[ \"magnitude\" ] . std () acc_features = acc_features . reset_index () return acc_features","title":"Implement your feature extraction code"},{"location":"features/add-new-features/#new-features-for-non-existing-sensors","text":"If you want to add features for a device or a sensor that we do not support at the moment (those that do not appear in the \"Existing Sensors\" list above), contact us or request it on Slack and we can add the necessary code so you can follow the instructions above.","title":"New Features for Non-Existing Sensors"},{"location":"features/empatica-accelerometer/","text":"Empatica Accelerometer \u00b6 Sensor parameters description for [EMPATICA_ACCELEROMETER] : Key Description [CONTAINER] Name of the CSV file containing accelerometer data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. DBDP provider \u00b6 Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_accelerometer_raw.csv - data/raw/ { pid } /empatica_accelerometer_with_datetime.csv - data/interim/ { pid } /empatica_accelerometer_features/empatica_accelerometer_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_accelerometer.csv Parameters description for [EMPATICA_ACCELEROMETER][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_ACCELEROMETER features from the DBDP provider [FEATURES] Features to be computed, see table below Features description for [EMPATICA_ACCELEROMETER][PROVIDERS][RAPDBDPIDS] : Feature Units Description maxmagnitude m/s 2 The maximum magnitude of acceleration ( \\(\\|acceleration\\| = \\sqrt{x^2 + y^2 + z^2}\\) ). minmagnitude m/s 2 The minimum magnitude of acceleration. avgmagnitude m/s 2 The average magnitude of acceleration. medianmagnitude m/s 2 The median magnitude of acceleration. stdmagnitude m/s 2 The standard deviation of acceleration. Assumptions/Observations Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is too big to fit in memory. We are considering different alternatives to overcome this problem, if this is something you need, get in touch and we can discuss how to implement it.","title":"Empatica Accelerometer"},{"location":"features/empatica-accelerometer/#empatica-accelerometer","text":"Sensor parameters description for [EMPATICA_ACCELEROMETER] : Key Description [CONTAINER] Name of the CSV file containing accelerometer data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute.","title":"Empatica Accelerometer"},{"location":"features/empatica-accelerometer/#dbdp-provider","text":"Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_accelerometer_raw.csv - data/raw/ { pid } /empatica_accelerometer_with_datetime.csv - data/interim/ { pid } /empatica_accelerometer_features/empatica_accelerometer_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_accelerometer.csv Parameters description for [EMPATICA_ACCELEROMETER][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_ACCELEROMETER features from the DBDP provider [FEATURES] Features to be computed, see table below Features description for [EMPATICA_ACCELEROMETER][PROVIDERS][RAPDBDPIDS] : Feature Units Description maxmagnitude m/s 2 The maximum magnitude of acceleration ( \\(\\|acceleration\\| = \\sqrt{x^2 + y^2 + z^2}\\) ). minmagnitude m/s 2 The minimum magnitude of acceleration. avgmagnitude m/s 2 The average magnitude of acceleration. medianmagnitude m/s 2 The median magnitude of acceleration. stdmagnitude m/s 2 The standard deviation of acceleration. Assumptions/Observations Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is too big to fit in memory. We are considering different alternatives to overcome this problem, if this is something you need, get in touch and we can discuss how to implement it.","title":"DBDP provider"},{"location":"features/empatica-blood-volume-pulse/","text":"Empatica Blood Volume Pulse \u00b6 Sensor parameters description for [EMPATICA_BLOOD_VOLUME_PULSE] : Key Description [CONTAINER] Name of the CSV file containing blood volume pulse data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. DBDP provider \u00b6 Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_blood_volume_pulse_raw.csv - data/raw/ { pid } /empatica_blood_volume_pulse_with_datetime.csv - data/interim/ { pid } /empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_blood_volume_pulse.csv Parameters description for [EMPATICA_BLOOD_VOLUME_PULSE][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_BLOOD_VOLUME_PULSE features from the DBDP provider [FEATURES] Features to be computed from blood volume pulse intraday data, see table below Features description for [EMPATICA_BLOOD_VOLUME_PULSE][PROVIDERS][DBDP] : Feature Units Description maxbvp - The maximum blood volume pulse during a time segment. minbvp - The minimum blood volume pulse during a time segment. avgbvp - The average blood volume pulse during a time segment. medianbvp - The median of blood volume pulse during a time segment. modebvp - The mode of blood volume pulse during a time segment. stdbvp - The standard deviation of blood volume pulse during a time segment. diffmaxmodebvp - The difference between the maximum and mode blood volume pulse during a time segment. diffminmodebvp - The difference between the mode and minimum blood volume pulse during a time segment. entropybvp nats Shannon\u2019s entropy measurement based on blood volume pulse during a time segment. Assumptions/Observations For more information about BVP read this .","title":"Empatica Blood Volume Pulse"},{"location":"features/empatica-blood-volume-pulse/#empatica-blood-volume-pulse","text":"Sensor parameters description for [EMPATICA_BLOOD_VOLUME_PULSE] : Key Description [CONTAINER] Name of the CSV file containing blood volume pulse data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute.","title":"Empatica Blood Volume Pulse"},{"location":"features/empatica-blood-volume-pulse/#dbdp-provider","text":"Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_blood_volume_pulse_raw.csv - data/raw/ { pid } /empatica_blood_volume_pulse_with_datetime.csv - data/interim/ { pid } /empatica_blood_volume_pulse_features/empatica_blood_volume_pulse_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_blood_volume_pulse.csv Parameters description for [EMPATICA_BLOOD_VOLUME_PULSE][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_BLOOD_VOLUME_PULSE features from the DBDP provider [FEATURES] Features to be computed from blood volume pulse intraday data, see table below Features description for [EMPATICA_BLOOD_VOLUME_PULSE][PROVIDERS][DBDP] : Feature Units Description maxbvp - The maximum blood volume pulse during a time segment. minbvp - The minimum blood volume pulse during a time segment. avgbvp - The average blood volume pulse during a time segment. medianbvp - The median of blood volume pulse during a time segment. modebvp - The mode of blood volume pulse during a time segment. stdbvp - The standard deviation of blood volume pulse during a time segment. diffmaxmodebvp - The difference between the maximum and mode blood volume pulse during a time segment. diffminmodebvp - The difference between the mode and minimum blood volume pulse during a time segment. entropybvp nats Shannon\u2019s entropy measurement based on blood volume pulse during a time segment. Assumptions/Observations For more information about BVP read this .","title":"DBDP provider"},{"location":"features/empatica-electrodermal-activity/","text":"Empatica Electrodermal Activity \u00b6 Sensor parameters description for [EMPATICA_ELECTRODERMAL_ACTIVITY] : Key Description [CONTAINER] Name of the CSV file containing electrodermal activity data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. DBDP provider \u00b6 Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_electrodermal_activity_raw.csv - data/raw/ { pid } /empatica_electrodermal_activity_with_datetime.csv - data/interim/ { pid } /empatica_electrodermal_activity_features/empatica_electrodermal activity_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_electrodermal_activity.csv Parameters description for [EMPATICA_ELECTRODERMAL_ACTIVITY][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_ELECTRODERMAL_ACTIVITY features from the DBDP provider [FEATURES] Features to be computed from electrodermal activity intraday data, see table below Features description for [EMPATICA_ELECTRODERMAL ACTIVITY][PROVIDERS][DBDP] : Feature Units Description maxeda microsiemens The maximum electrical conductance during a time segment. mineda microsiemens The minimum electrical conductance during a time segment. avgeda microsiemens The average electrical conductance during a time segment. medianeda microsiemens The median of electrical conductance during a time segment. modeeda microsiemens The mode of electrical conductance during a time segment. stdeda microsiemens The standard deviation of electrical conductance during a time segment. diffmaxmodeeda microsiemens The difference between the maximum and mode electrical conductance during a time segment. diffminmodeeda microsiemens The difference between the mode and minimum electrical conductance during a time segment. entropyeda nats Shannon\u2019s entropy measurement based on electrical conductance during a time segment. Assumptions/Observations None","title":"Empatica Electrodermal Activity"},{"location":"features/empatica-electrodermal-activity/#empatica-electrodermal-activity","text":"Sensor parameters description for [EMPATICA_ELECTRODERMAL_ACTIVITY] : Key Description [CONTAINER] Name of the CSV file containing electrodermal activity data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute.","title":"Empatica Electrodermal Activity"},{"location":"features/empatica-electrodermal-activity/#dbdp-provider","text":"Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_electrodermal_activity_raw.csv - data/raw/ { pid } /empatica_electrodermal_activity_with_datetime.csv - data/interim/ { pid } /empatica_electrodermal_activity_features/empatica_electrodermal activity_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_electrodermal_activity.csv Parameters description for [EMPATICA_ELECTRODERMAL_ACTIVITY][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_ELECTRODERMAL_ACTIVITY features from the DBDP provider [FEATURES] Features to be computed from electrodermal activity intraday data, see table below Features description for [EMPATICA_ELECTRODERMAL ACTIVITY][PROVIDERS][DBDP] : Feature Units Description maxeda microsiemens The maximum electrical conductance during a time segment. mineda microsiemens The minimum electrical conductance during a time segment. avgeda microsiemens The average electrical conductance during a time segment. medianeda microsiemens The median of electrical conductance during a time segment. modeeda microsiemens The mode of electrical conductance during a time segment. stdeda microsiemens The standard deviation of electrical conductance during a time segment. diffmaxmodeeda microsiemens The difference between the maximum and mode electrical conductance during a time segment. diffminmodeeda microsiemens The difference between the mode and minimum electrical conductance during a time segment. entropyeda nats Shannon\u2019s entropy measurement based on electrical conductance during a time segment. Assumptions/Observations None","title":"DBDP provider"},{"location":"features/empatica-heartrate/","text":"Empatica Heart Rate \u00b6 Sensor parameters description for [EMPATICA_HEARTRATE] : Key Description [CONTAINER] Name of the CSV file containing heart rate data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. DBDP provider \u00b6 Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_heartrate_raw.csv - data/raw/ { pid } /empatica_heartrate_with_datetime.csv - data/interim/ { pid } /empatica_heartrate_features/empatica_heartrate_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_heartrate.csv Parameters description for [EMPATICA_HEARTRATE][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_HEARTRATE features from the DBDP provider [FEATURES] Features to be computed from heart rate intraday data, see table below Features description for [EMPATICA_HEARTRATE][PROVIDERS][DBDP] : Feature Units Description maxhr beats The maximum heart rate during a time segment. minhr beats The minimum heart rate during a time segment. avghr beats The average heart rate during a time segment. medianhr beats The median of heart rate during a time segment. modehr beats The mode of heart rate during a time segment. stdhr beats The standard deviation of heart rate during a time segment. diffmaxmodehr beats The difference between the maximum and mode heart rate during a time segment. diffminmodehr beats The difference between the mode and minimum heart rate during a time segment. entropyhr nats Shannon\u2019s entropy measurement based on heart rate during a time segment. Assumptions/Observations We extract the previous features based on the average heart rate values computed in 10-second windows .","title":"Empatica Heart Rate"},{"location":"features/empatica-heartrate/#empatica-heart-rate","text":"Sensor parameters description for [EMPATICA_HEARTRATE] : Key Description [CONTAINER] Name of the CSV file containing heart rate data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute.","title":"Empatica Heart Rate"},{"location":"features/empatica-heartrate/#dbdp-provider","text":"Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_heartrate_raw.csv - data/raw/ { pid } /empatica_heartrate_with_datetime.csv - data/interim/ { pid } /empatica_heartrate_features/empatica_heartrate_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_heartrate.csv Parameters description for [EMPATICA_HEARTRATE][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_HEARTRATE features from the DBDP provider [FEATURES] Features to be computed from heart rate intraday data, see table below Features description for [EMPATICA_HEARTRATE][PROVIDERS][DBDP] : Feature Units Description maxhr beats The maximum heart rate during a time segment. minhr beats The minimum heart rate during a time segment. avghr beats The average heart rate during a time segment. medianhr beats The median of heart rate during a time segment. modehr beats The mode of heart rate during a time segment. stdhr beats The standard deviation of heart rate during a time segment. diffmaxmodehr beats The difference between the maximum and mode heart rate during a time segment. diffminmodehr beats The difference between the mode and minimum heart rate during a time segment. entropyhr nats Shannon\u2019s entropy measurement based on heart rate during a time segment. Assumptions/Observations We extract the previous features based on the average heart rate values computed in 10-second windows .","title":"DBDP provider"},{"location":"features/empatica-inter-beat-interval/","text":"Empatica Inter Beat Interval \u00b6 Sensor parameters description for [EMPATICA_INTER_BEAT_INTERVAL] : Key Description [CONTAINER] Name of the CSV file containing inter beat interval data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. DBDP provider \u00b6 Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_inter_beat_interval_raw.csv - data/raw/ { pid } /empatica_inter_beat_interval_with_datetime.csv - data/interim/ { pid } /empatica_inter_beat_interval_features/empatica_inter_beat_interval_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_inter_beat_interval.csv Parameters description for [EMPATICA_INTER_BEAT_INTERVAL][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_INTER_BEAT_INTERVAL features from the DBDP provider [FEATURES] Features to be computed from inter beat interval intraday data, see table below Features description for [EMPATICA_INTER_BEAT_INTERVAL][PROVIDERS][DBDP] : Feature Units Description maxibi seconds The maximum inter beat interval during a time segment. minibi seconds The minimum inter beat interval during a time segment. avgibi seconds The average inter beat interval during a time segment. medianibi seconds The median of inter beat interval during a time segment. modeibi seconds The mode of inter beat interval during a time segment. stdibi seconds The standard deviation of inter beat interval during a time segment. diffmaxmodeibi seconds The difference between the maximum and mode inter beat interval during a time segment. diffminmodeibi seconds The difference between the mode and minimum inter beat interval during a time segment. entropyibi nats Shannon\u2019s entropy measurement based on inter beat interval during a time segment. Assumptions/Observations For more information about IBI read this .","title":"Empatica Inter Beat Interval"},{"location":"features/empatica-inter-beat-interval/#empatica-inter-beat-interval","text":"Sensor parameters description for [EMPATICA_INTER_BEAT_INTERVAL] : Key Description [CONTAINER] Name of the CSV file containing inter beat interval data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute.","title":"Empatica Inter Beat Interval"},{"location":"features/empatica-inter-beat-interval/#dbdp-provider","text":"Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_inter_beat_interval_raw.csv - data/raw/ { pid } /empatica_inter_beat_interval_with_datetime.csv - data/interim/ { pid } /empatica_inter_beat_interval_features/empatica_inter_beat_interval_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_inter_beat_interval.csv Parameters description for [EMPATICA_INTER_BEAT_INTERVAL][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_INTER_BEAT_INTERVAL features from the DBDP provider [FEATURES] Features to be computed from inter beat interval intraday data, see table below Features description for [EMPATICA_INTER_BEAT_INTERVAL][PROVIDERS][DBDP] : Feature Units Description maxibi seconds The maximum inter beat interval during a time segment. minibi seconds The minimum inter beat interval during a time segment. avgibi seconds The average inter beat interval during a time segment. medianibi seconds The median of inter beat interval during a time segment. modeibi seconds The mode of inter beat interval during a time segment. stdibi seconds The standard deviation of inter beat interval during a time segment. diffmaxmodeibi seconds The difference between the maximum and mode inter beat interval during a time segment. diffminmodeibi seconds The difference between the mode and minimum inter beat interval during a time segment. entropyibi nats Shannon\u2019s entropy measurement based on inter beat interval during a time segment. Assumptions/Observations For more information about IBI read this .","title":"DBDP provider"},{"location":"features/empatica-tags/","text":"Empatica Tags \u00b6 Sensor parameters description for [EMPATICA_TAGS] : Key Description [CONTAINER] Name of the CSV file containing tags data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. Note No feature providers have been implemented for this sensor yet, however you can implement your own features . To know more about tags read this .","title":"Empatica Tags"},{"location":"features/empatica-tags/#empatica-tags","text":"Sensor parameters description for [EMPATICA_TAGS] : Key Description [CONTAINER] Name of the CSV file containing tags data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. Note No feature providers have been implemented for this sensor yet, however you can implement your own features . To know more about tags read this .","title":"Empatica Tags"},{"location":"features/empatica-temperature/","text":"Empatica Temperature \u00b6 Sensor parameters description for [EMPATICA_TEMPERATURE] : Key Description [CONTAINER] Name of the CSV file containing temperature data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute. DBDP provider \u00b6 Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_temperature_raw.csv - data/raw/ { pid } /empatica_temperature_with_datetime.csv - data/interim/ { pid } /empatica_temperature_features/empatica_temperature_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_temperature.csv Parameters description for [EMPATICA_TEMPERATURE][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_TEMPERATURE features from the DBDP provider [FEATURES] Features to be computed from temperature intraday data, see table below Features description for [EMPATICA_TEMPERATURE][PROVIDERS][DBDP] : Feature Units Description maxtemp degrees C The maximum temperature during a time segment. mintemp degrees C The minimum temperature during a time segment. avgtemp degrees C The average temperature during a time segment. mediantemp degrees C The median of temperature during a time segment. modetemp degrees C The mode of temperature during a time segment. stdtemp degrees C The standard deviation of temperature during a time segment. diffmaxmodetemp degrees C The difference between the maximum and mode temperature during a time segment. diffminmodetemp degrees C The difference between the mode and minimum temperature during a time segment. entropytemp nats Shannon\u2019s entropy measurement based on temperature during a time segment. Assumptions/Observations None","title":"Empatica Temperature"},{"location":"features/empatica-temperature/#empatica-temperature","text":"Sensor parameters description for [EMPATICA_TEMPERATURE] : Key Description [CONTAINER] Name of the CSV file containing temperature data that is compressed inside an Empatica zip file. Since these zip files are created automatically by Empatica, there is no need to change the value of this attribute.","title":"Empatica Temperature"},{"location":"features/empatica-temperature/#dbdp-provider","text":"Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /empatica_temperature_raw.csv - data/raw/ { pid } /empatica_temperature_with_datetime.csv - data/interim/ { pid } /empatica_temperature_features/empatica_temperature_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /empatica_temperature.csv Parameters description for [EMPATICA_TEMPERATURE][PROVIDERS][DBDP] : Key Description [COMPUTE] Set to True to extract EMPATICA_TEMPERATURE features from the DBDP provider [FEATURES] Features to be computed from temperature intraday data, see table below Features description for [EMPATICA_TEMPERATURE][PROVIDERS][DBDP] : Feature Units Description maxtemp degrees C The maximum temperature during a time segment. mintemp degrees C The minimum temperature during a time segment. avgtemp degrees C The average temperature during a time segment. mediantemp degrees C The median of temperature during a time segment. modetemp degrees C The mode of temperature during a time segment. stdtemp degrees C The standard deviation of temperature during a time segment. diffmaxmodetemp degrees C The difference between the maximum and mode temperature during a time segment. diffminmodetemp degrees C The difference between the mode and minimum temperature during a time segment. entropytemp nats Shannon\u2019s entropy measurement based on temperature during a time segment. Assumptions/Observations None","title":"DBDP provider"},{"location":"features/feature-introduction/","text":"Behavioral Features Introduction \u00b6 A behavioral feature is a metric computed from raw sensor data quantifying the behavior of a participant. For example, the time spent at home computed based on location data. These are also known as digital biomarkers. RAPIDS\u2019 config.yaml has a section for each supported device/sensor (e.g., PHONE_ACCELEROMETER , FITBIT_STEPS , EMPATICA_HEARTRATE ). These sections follow a similar structure, and they can have one or more feature PROVIDERS , that compute one or more behavioral features. You will modify the parameters of these PROVIDERS to obtain features from different mobile sensors. We\u2019ll use PHONE_ACCELEROMETER as an example to explain this further. Hint We recommend reading this page if you are using RAPIDS for the first time All computed sensor features are stored under /data/processed/features on files per sensor, per participant and per study (all participants). Every time you change any sensor parameters, provider parameters or provider features, all the necessary files will be updated as soon as you execute RAPIDS. In short, to extract features offered by a provider, you need to set its [COMPUTE] flag to TRUE , configure any of its parameters, and execute RAPIDS. Explaining the config.yaml sensor sections with an example \u00b6 Each sensor section follows the same structure. Click on the numbered markers to know more. PHONE_ACCELEROMETER : # (1) CONTAINER : accelerometer # (2) PROVIDERS : # (3) RAPIDS : COMPUTE : False # (4) FEATURES : [ \"maxmagnitude\" , \"minmagnitude\" , \"avgmagnitude\" , \"medianmagnitude\" , \"stdmagnitude\" ] SRC_SCRIPT : src/features/phone_accelerometer/rapids/main.py PANDA : COMPUTE : False VALID_SENSED_MINUTES : False FEATURES : # (5) exertional_activity_episode : [ \"sumduration\" , \"maxduration\" , \"minduration\" , \"avgduration\" , \"medianduration\" , \"stdduration\" ] nonexertional_activity_episode : [ \"sumduration\" , \"maxduration\" , \"minduration\" , \"avgduration\" , \"medianduration\" , \"stdduration\" ] # (6) SRC_SCRIPT : src/features/phone_accelerometer/panda/main.py Sensor section Each sensor (accelerometer, screen, etc.) of every supported device (smartphone, Fitbit, etc.) has a section in the config.yaml with parameters and feature PROVIDERS . Sensor Parameters. Each sensor section has one or more parameters. These are parameters that affect different aspects of how the raw data is pulled, and processed. The CONTAINER parameter exists for every sensor, but some sensors will have extra parameters like [PHONE_LOCATIONS] . We explain these parameters in a table at the top of each sensor documentation page. Sensor Providers Each object in this list represents a feature PROVIDER . Each sensor can have zero, one, or more providers. A PROVIDER is a script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . In this example, there are two accelerometer feature providers RAPIDS and PANDA . PROVIDER Parameters Each PROVIDER has parameters that affect the computation of the behavioral features it offers. These parameters include at least a [COMPUTE] flag that you switch to True to extract a provider\u2019s behavioral features. We explain every provider\u2019s parameter in a table under the Parameters description heading on each provider documentation page. PROVIDER Features Each PROVIDER offers a set of behavioral features. These features are grouped in an array for some providers, like those for RAPIDS provider. For others, they are grouped in a collection of arrays, like those for PANDAS provider. In either case, you can delete the features you are not interested in, and they will not be included in the sensor\u2019s output feature file. We explain each behavioral feature in a table under the Features description heading on each provider documentation page. PROVIDER script Each PROVIDER has a SRC_SCRIPT that points to the script implementing its behavioral features. It has to be a relative path from RAPIDS\u2019 root folder and the script\u2019s parent folder should be named after the provider, e.g. panda . These are the descriptions of each marker for accessibility: Sensor section Each sensor (accelerometer, screen, etc.) of every supported device (smartphone, Fitbit, etc.) has a section in the config.yaml with parameters and feature PROVIDERS . Sensor Parameters. Each sensor section has one or more parameters. These are parameters that affect different aspects of how the raw data is pulled, and processed. The CONTAINER parameter exists for every sensor, but some sensors will have extra parameters like [PHONE_LOCATIONS] . We explain these parameters in a table at the top of each sensor documentation page. Sensor Providers Each object in this list represents a feature PROVIDER . Each sensor can have zero, one, or more providers. A PROVIDER is a script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . In this example, there are two accelerometer feature providers RAPIDS and PANDA . PROVIDER Parameters Each PROVIDER has parameters that affect the computation of the behavioral features it offers. These parameters include at least a [COMPUTE] flag that you switch to True to extract a provider\u2019s behavioral features. We explain every provider\u2019s parameter in a table under the Parameters description heading on each provider documentation page. PROVIDER Features Each PROVIDER offers a set of behavioral features. These features are grouped in an array for some providers, like those for RAPIDS provider. For others, they are grouped in a collection of arrays, like those for PANDAS provider. In either case, you can delete the features you are not interested in, and they will not be included in the sensor\u2019s output feature file. We explain each behavioral feature in a table under the Features description heading on each provider documentation page. PROVIDER script Each PROVIDER has a SRC_SCRIPT that points to the script implementing its behavioral features. It has to be a relative path from RAPIDS\u2019 root folder and the script\u2019s parent folder should be named after the provider, e.g. panda .","title":"Introduction"},{"location":"features/feature-introduction/#behavioral-features-introduction","text":"A behavioral feature is a metric computed from raw sensor data quantifying the behavior of a participant. For example, the time spent at home computed based on location data. These are also known as digital biomarkers. RAPIDS\u2019 config.yaml has a section for each supported device/sensor (e.g., PHONE_ACCELEROMETER , FITBIT_STEPS , EMPATICA_HEARTRATE ). These sections follow a similar structure, and they can have one or more feature PROVIDERS , that compute one or more behavioral features. You will modify the parameters of these PROVIDERS to obtain features from different mobile sensors. We\u2019ll use PHONE_ACCELEROMETER as an example to explain this further. Hint We recommend reading this page if you are using RAPIDS for the first time All computed sensor features are stored under /data/processed/features on files per sensor, per participant and per study (all participants). Every time you change any sensor parameters, provider parameters or provider features, all the necessary files will be updated as soon as you execute RAPIDS. In short, to extract features offered by a provider, you need to set its [COMPUTE] flag to TRUE , configure any of its parameters, and execute RAPIDS.","title":"Behavioral Features Introduction"},{"location":"features/feature-introduction/#explaining-the-configyaml-sensor-sections-with-an-example","text":"Each sensor section follows the same structure. Click on the numbered markers to know more. PHONE_ACCELEROMETER : # (1) CONTAINER : accelerometer # (2) PROVIDERS : # (3) RAPIDS : COMPUTE : False # (4) FEATURES : [ \"maxmagnitude\" , \"minmagnitude\" , \"avgmagnitude\" , \"medianmagnitude\" , \"stdmagnitude\" ] SRC_SCRIPT : src/features/phone_accelerometer/rapids/main.py PANDA : COMPUTE : False VALID_SENSED_MINUTES : False FEATURES : # (5) exertional_activity_episode : [ \"sumduration\" , \"maxduration\" , \"minduration\" , \"avgduration\" , \"medianduration\" , \"stdduration\" ] nonexertional_activity_episode : [ \"sumduration\" , \"maxduration\" , \"minduration\" , \"avgduration\" , \"medianduration\" , \"stdduration\" ] # (6) SRC_SCRIPT : src/features/phone_accelerometer/panda/main.py Sensor section Each sensor (accelerometer, screen, etc.) of every supported device (smartphone, Fitbit, etc.) has a section in the config.yaml with parameters and feature PROVIDERS . Sensor Parameters. Each sensor section has one or more parameters. These are parameters that affect different aspects of how the raw data is pulled, and processed. The CONTAINER parameter exists for every sensor, but some sensors will have extra parameters like [PHONE_LOCATIONS] . We explain these parameters in a table at the top of each sensor documentation page. Sensor Providers Each object in this list represents a feature PROVIDER . Each sensor can have zero, one, or more providers. A PROVIDER is a script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . In this example, there are two accelerometer feature providers RAPIDS and PANDA . PROVIDER Parameters Each PROVIDER has parameters that affect the computation of the behavioral features it offers. These parameters include at least a [COMPUTE] flag that you switch to True to extract a provider\u2019s behavioral features. We explain every provider\u2019s parameter in a table under the Parameters description heading on each provider documentation page. PROVIDER Features Each PROVIDER offers a set of behavioral features. These features are grouped in an array for some providers, like those for RAPIDS provider. For others, they are grouped in a collection of arrays, like those for PANDAS provider. In either case, you can delete the features you are not interested in, and they will not be included in the sensor\u2019s output feature file. We explain each behavioral feature in a table under the Features description heading on each provider documentation page. PROVIDER script Each PROVIDER has a SRC_SCRIPT that points to the script implementing its behavioral features. It has to be a relative path from RAPIDS\u2019 root folder and the script\u2019s parent folder should be named after the provider, e.g. panda . These are the descriptions of each marker for accessibility: Sensor section Each sensor (accelerometer, screen, etc.) of every supported device (smartphone, Fitbit, etc.) has a section in the config.yaml with parameters and feature PROVIDERS . Sensor Parameters. Each sensor section has one or more parameters. These are parameters that affect different aspects of how the raw data is pulled, and processed. The CONTAINER parameter exists for every sensor, but some sensors will have extra parameters like [PHONE_LOCATIONS] . We explain these parameters in a table at the top of each sensor documentation page. Sensor Providers Each object in this list represents a feature PROVIDER . Each sensor can have zero, one, or more providers. A PROVIDER is a script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . In this example, there are two accelerometer feature providers RAPIDS and PANDA . PROVIDER Parameters Each PROVIDER has parameters that affect the computation of the behavioral features it offers. These parameters include at least a [COMPUTE] flag that you switch to True to extract a provider\u2019s behavioral features. We explain every provider\u2019s parameter in a table under the Parameters description heading on each provider documentation page. PROVIDER Features Each PROVIDER offers a set of behavioral features. These features are grouped in an array for some providers, like those for RAPIDS provider. For others, they are grouped in a collection of arrays, like those for PANDAS provider. In either case, you can delete the features you are not interested in, and they will not be included in the sensor\u2019s output feature file. We explain each behavioral feature in a table under the Features description heading on each provider documentation page. PROVIDER script Each PROVIDER has a SRC_SCRIPT that points to the script implementing its behavioral features. It has to be a relative path from RAPIDS\u2019 root folder and the script\u2019s parent folder should be named after the provider, e.g. panda .","title":"Explaining the config.yaml sensor sections with an example"},{"location":"features/fitbit-data-yield/","text":"Fitbit Data Yield \u00b6 We use Fitbit heart rate intraday data to extract data yield features. Fitbit data yield features can be used to remove rows ( time segments ) that do not contain enough Fitbit data. You should decide what is your \u201cenough\u201d threshold depending on the time a participant was supposed to be wearing their Fitbit, the length of your study, and the rates of missing data that your analysis could handle. Why is Fitbit data yield important? Imagine that you want to extract FITBIT_STEPS_SUMMARY features on daily segments ( 00:00 to 23:59 ). Let\u2019s say that on day 1 the Fitbit logged 6k as the total step count and the heart rate sensor logged 24 hours of data and on day 2 the Fitbit logged 101 as the total step count and the heart rate sensor logged 2 hours of data. It\u2019s very likely that on day 2 you walked during the other 22 hours so including this day in your analysis could bias your results. Sensor parameters description for [FITBIT_DATA_YIELD] : Key Description [SENSORS] The Fitbit sensor we considered for calculating the Fitbit data yield features. We only support FITBIT_HEARTRATE_INTRADAY since sleep data is commonly collected only overnight, and step counts are 0 even when not wearing the Fitbit device. RAPIDS provider \u00b6 Before explaining the data yield features, let\u2019s define the following relevant concepts: A valid minute is any 60 second window when Fitbit heart rate intraday sensor logged at least 1 row of data A valid hour is any 60 minute window with at least X valid minutes. The X or threshold is given by [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /fitbit_heartrate_intraday_raw.csv - data/raw/ { pid } /fitbit_heartrate_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_data_yield_features/fitbit_data_yield_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_data_yield.csv Parameters description for [FITBIT_DATA_YIELD][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_DATA_YIELD features from the RAPIDS provider [FEATURES] Features to be computed, see table below [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] The proportion [0.0 ,1.0] of valid minutes in a 60-minute window necessary to flag that window as valid. Features description for [FITBIT_DATA_YIELD][PROVIDERS][RAPIDS] : Feature Units Description ratiovalidyieldedminutes - The ratio between the number of valid minutes and the duration in minutes of a time segment. ratiovalidyieldedhours - The ratio between the number of valid hours and the duration in hours of a time segment. If the time segment is shorter than 1 hour this feature will always be 1. Assumptions/Observations We recommend using ratiovalidyieldedminutes on time segments that are shorter than two or three hours and ratiovalidyieldedhours for longer segments. This is because relying on yielded minutes only can be misleading when a big chunk of those missing minutes are clustered together. For example, let\u2019s assume we are working with a 24-hour time segment that is missing 12 hours of data. Two extreme cases can occur: the 12 missing hours are from the beginning of the segment or 30 minutes could be missing from every hour (24 * 30 minutes = 12 hours). ratiovalidyieldedminutes would be 0.5 for both a and b (hinting the missing circumstances are similar). However, ratiovalidyieldedhours would be 0.5 for a and 1.0 for b if [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] is between [0.0 and 0.49] (hinting that the missing circumstances might be more favorable for b . In other words, sensed data for b is more evenly spread compared to a . We assume your Fitbit intraday data was sampled (requested form the Fitbit API) at 1 minute intervals, if the interval is longer, for example 15 minutes, you need to take into account that valid minutes and valid hours ratios are going to be small (for example you would have at most 4 \u201cminutes\u201d of data per hour because you would have four 15-minute windows) and so you should adjust your thresholds to include and exclude rows accordingly. If you are in this situation, get in touch with us, we could implement this use case but we are not sure there is enough demand for it at the moment since you can control the sampling rate of the data you request from Fitbit API.","title":"Fitbit Data Yield"},{"location":"features/fitbit-data-yield/#fitbit-data-yield","text":"We use Fitbit heart rate intraday data to extract data yield features. Fitbit data yield features can be used to remove rows ( time segments ) that do not contain enough Fitbit data. You should decide what is your \u201cenough\u201d threshold depending on the time a participant was supposed to be wearing their Fitbit, the length of your study, and the rates of missing data that your analysis could handle. Why is Fitbit data yield important? Imagine that you want to extract FITBIT_STEPS_SUMMARY features on daily segments ( 00:00 to 23:59 ). Let\u2019s say that on day 1 the Fitbit logged 6k as the total step count and the heart rate sensor logged 24 hours of data and on day 2 the Fitbit logged 101 as the total step count and the heart rate sensor logged 2 hours of data. It\u2019s very likely that on day 2 you walked during the other 22 hours so including this day in your analysis could bias your results. Sensor parameters description for [FITBIT_DATA_YIELD] : Key Description [SENSORS] The Fitbit sensor we considered for calculating the Fitbit data yield features. We only support FITBIT_HEARTRATE_INTRADAY since sleep data is commonly collected only overnight, and step counts are 0 even when not wearing the Fitbit device.","title":"Fitbit Data Yield"},{"location":"features/fitbit-data-yield/#rapids-provider","text":"Before explaining the data yield features, let\u2019s define the following relevant concepts: A valid minute is any 60 second window when Fitbit heart rate intraday sensor logged at least 1 row of data A valid hour is any 60 minute window with at least X valid minutes. The X or threshold is given by [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] Available time segments and platforms Available for all time segments File Sequence - data/raw/ { pid } /fitbit_heartrate_intraday_raw.csv - data/raw/ { pid } /fitbit_heartrate_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_data_yield_features/fitbit_data_yield_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_data_yield.csv Parameters description for [FITBIT_DATA_YIELD][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_DATA_YIELD features from the RAPIDS provider [FEATURES] Features to be computed, see table below [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] The proportion [0.0 ,1.0] of valid minutes in a 60-minute window necessary to flag that window as valid. Features description for [FITBIT_DATA_YIELD][PROVIDERS][RAPIDS] : Feature Units Description ratiovalidyieldedminutes - The ratio between the number of valid minutes and the duration in minutes of a time segment. ratiovalidyieldedhours - The ratio between the number of valid hours and the duration in hours of a time segment. If the time segment is shorter than 1 hour this feature will always be 1. Assumptions/Observations We recommend using ratiovalidyieldedminutes on time segments that are shorter than two or three hours and ratiovalidyieldedhours for longer segments. This is because relying on yielded minutes only can be misleading when a big chunk of those missing minutes are clustered together. For example, let\u2019s assume we are working with a 24-hour time segment that is missing 12 hours of data. Two extreme cases can occur: the 12 missing hours are from the beginning of the segment or 30 minutes could be missing from every hour (24 * 30 minutes = 12 hours). ratiovalidyieldedminutes would be 0.5 for both a and b (hinting the missing circumstances are similar). However, ratiovalidyieldedhours would be 0.5 for a and 1.0 for b if [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] is between [0.0 and 0.49] (hinting that the missing circumstances might be more favorable for b . In other words, sensed data for b is more evenly spread compared to a . We assume your Fitbit intraday data was sampled (requested form the Fitbit API) at 1 minute intervals, if the interval is longer, for example 15 minutes, you need to take into account that valid minutes and valid hours ratios are going to be small (for example you would have at most 4 \u201cminutes\u201d of data per hour because you would have four 15-minute windows) and so you should adjust your thresholds to include and exclude rows accordingly. If you are in this situation, get in touch with us, we could implement this use case but we are not sure there is enough demand for it at the moment since you can control the sampling rate of the data you request from Fitbit API.","title":"RAPIDS provider"},{"location":"features/fitbit-heartrate-intraday/","text":"Fitbit Heart Rate Intraday \u00b6 Sensor parameters description for [FITBIT_HEARTRATE_INTRADAY] : Key Description [CONTAINER] Container where your heart rate intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. RAPIDS provider \u00b6 Available time segments Available for all time segments File Sequence - data/raw/ { pid } /fitbit_heartrate_intraday_raw.csv - data/raw/ { pid } /fitbit_heartrate_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_heartrate_intraday.csv Parameters description for [FITBIT_HEARTRATE_INTRADAY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_HEARTRATE_INTRADAY features from the RAPIDS provider [FEATURES] Features to be computed from heart rate intraday data, see table below Features description for [FITBIT_HEARTRATE_INTRADAY][PROVIDERS][RAPIDS] : Feature Units Description maxhr beats/mins The maximum heart rate during a time segment. minhr beats/mins The minimum heart rate during a time segment. avghr beats/mins The average heart rate during a time segment. medianhr beats/mins The median of heart rate during a time segment. modehr beats/mins The mode of heart rate during a time segment. stdhr beats/mins The standard deviation of heart rate during a time segment. diffmaxmodehr beats/mins The difference between the maximum and mode heart rate during a time segment. diffminmodehr beats/mins The difference between the mode and minimum heart rate during a time segment. entropyhr nats Shannon\u2019s entropy measurement based on heart rate during a time segment. minutesonZONE minutes Number of minutes the user\u2019s heart rate fell within each heartrate_zone during a time segment. Assumptions/Observations There are four heart rate zones (ZONE): outofrange , fatburn , cardio , and peak . Please refer to Fitbit documentation for more information about the way they are computed.","title":"Fitbit Heart Rate Intraday"},{"location":"features/fitbit-heartrate-intraday/#fitbit-heart-rate-intraday","text":"Sensor parameters description for [FITBIT_HEARTRATE_INTRADAY] : Key Description [CONTAINER] Container where your heart rate intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc.","title":"Fitbit Heart Rate Intraday"},{"location":"features/fitbit-heartrate-intraday/#rapids-provider","text":"Available time segments Available for all time segments File Sequence - data/raw/ { pid } /fitbit_heartrate_intraday_raw.csv - data/raw/ { pid } /fitbit_heartrate_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_heartrate_intraday.csv Parameters description for [FITBIT_HEARTRATE_INTRADAY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_HEARTRATE_INTRADAY features from the RAPIDS provider [FEATURES] Features to be computed from heart rate intraday data, see table below Features description for [FITBIT_HEARTRATE_INTRADAY][PROVIDERS][RAPIDS] : Feature Units Description maxhr beats/mins The maximum heart rate during a time segment. minhr beats/mins The minimum heart rate during a time segment. avghr beats/mins The average heart rate during a time segment. medianhr beats/mins The median of heart rate during a time segment. modehr beats/mins The mode of heart rate during a time segment. stdhr beats/mins The standard deviation of heart rate during a time segment. diffmaxmodehr beats/mins The difference between the maximum and mode heart rate during a time segment. diffminmodehr beats/mins The difference between the mode and minimum heart rate during a time segment. entropyhr nats Shannon\u2019s entropy measurement based on heart rate during a time segment. minutesonZONE minutes Number of minutes the user\u2019s heart rate fell within each heartrate_zone during a time segment. Assumptions/Observations There are four heart rate zones (ZONE): outofrange , fatburn , cardio , and peak . Please refer to Fitbit documentation for more information about the way they are computed.","title":"RAPIDS provider"},{"location":"features/fitbit-heartrate-summary/","text":"Fitbit Heart Rate Summary \u00b6 Sensor parameters description for [FITBIT_HEARTRATE_SUMMARY] : Key Description [CONTAINER] Container where your heart rate summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. RAPIDS provider \u00b6 Available time segments Only available for segments that span 1 or more complete days (e.g. Jan 1 st 00:00 to Jan 3 rd 23:59) File Sequence - data/raw/ { pid } /fitbit_heartrate_summary_raw.csv - data/raw/ { pid } /fitbit_heartrate_summary_with_datetime.csv - data/interim/ { pid } /fitbit_heartrate_summary_features/fitbit_heartrate_summary_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_heartrate_summary.csv Parameters description for [FITBIT_HEARTRATE_SUMMARY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_HEARTRATE_SUMMARY features from the RAPIDS provider [FEATURES] Features to be computed from heart rate summary data, see table below Features description for [FITBIT_HEARTRATE_SUMMARY][PROVIDERS][RAPIDS] : Feature Units Description maxrestinghr beats/mins The maximum daily resting heart rate during a time segment. minrestinghr beats/mins The minimum daily resting heart rate during a time segment. avgrestinghr beats/mins The average daily resting heart rate during a time segment. medianrestinghr beats/mins The median of daily resting heart rate during a time segment. moderestinghr beats/mins The mode of daily resting heart rate during a time segment. stdrestinghr beats/mins The standard deviation of daily resting heart rate during a time segment. diffmaxmoderestinghr beats/mins The difference between the maximum and mode daily resting heart rate during a time segment. diffminmoderestinghr beats/mins The difference between the mode and minimum daily resting heart rate during a time segment. entropyrestinghr nats Shannon\u2019s entropy measurement based on daily resting heart rate during a time segment. sumcaloriesZONE cals The total daily calories burned within heartrate_zone during a time segment. maxcaloriesZONE cals The maximum daily calories burned within heartrate_zone during a time segment. mincaloriesZONE cals The minimum daily calories burned within heartrate_zone during a time segment. avgcaloriesZONE cals The average daily calories burned within heartrate_zone during a time segment. mediancaloriesZONE cals The median of daily calories burned within heartrate_zone during a time segment. stdcaloriesZONE cals The standard deviation of daily calories burned within heartrate_zone during a time segment. entropycaloriesZONE nats Shannon\u2019s entropy measurement based on daily calories burned within heartrate_zone during a time segment. Assumptions/Observations There are four heart rate zones (ZONE): outofrange , fatburn , cardio , and peak . Please refer to Fitbit documentation for more information about the way they are computed. Calories\u2019 accuracy depends on the users\u2019 Fitbit profile (weight, height, etc.).","title":"Fitbit Heart Rate Summary"},{"location":"features/fitbit-heartrate-summary/#fitbit-heart-rate-summary","text":"Sensor parameters description for [FITBIT_HEARTRATE_SUMMARY] : Key Description [CONTAINER] Container where your heart rate summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc.","title":"Fitbit Heart Rate Summary"},{"location":"features/fitbit-heartrate-summary/#rapids-provider","text":"Available time segments Only available for segments that span 1 or more complete days (e.g. Jan 1 st 00:00 to Jan 3 rd 23:59) File Sequence - data/raw/ { pid } /fitbit_heartrate_summary_raw.csv - data/raw/ { pid } /fitbit_heartrate_summary_with_datetime.csv - data/interim/ { pid } /fitbit_heartrate_summary_features/fitbit_heartrate_summary_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_heartrate_summary.csv Parameters description for [FITBIT_HEARTRATE_SUMMARY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_HEARTRATE_SUMMARY features from the RAPIDS provider [FEATURES] Features to be computed from heart rate summary data, see table below Features description for [FITBIT_HEARTRATE_SUMMARY][PROVIDERS][RAPIDS] : Feature Units Description maxrestinghr beats/mins The maximum daily resting heart rate during a time segment. minrestinghr beats/mins The minimum daily resting heart rate during a time segment. avgrestinghr beats/mins The average daily resting heart rate during a time segment. medianrestinghr beats/mins The median of daily resting heart rate during a time segment. moderestinghr beats/mins The mode of daily resting heart rate during a time segment. stdrestinghr beats/mins The standard deviation of daily resting heart rate during a time segment. diffmaxmoderestinghr beats/mins The difference between the maximum and mode daily resting heart rate during a time segment. diffminmoderestinghr beats/mins The difference between the mode and minimum daily resting heart rate during a time segment. entropyrestinghr nats Shannon\u2019s entropy measurement based on daily resting heart rate during a time segment. sumcaloriesZONE cals The total daily calories burned within heartrate_zone during a time segment. maxcaloriesZONE cals The maximum daily calories burned within heartrate_zone during a time segment. mincaloriesZONE cals The minimum daily calories burned within heartrate_zone during a time segment. avgcaloriesZONE cals The average daily calories burned within heartrate_zone during a time segment. mediancaloriesZONE cals The median of daily calories burned within heartrate_zone during a time segment. stdcaloriesZONE cals The standard deviation of daily calories burned within heartrate_zone during a time segment. entropycaloriesZONE nats Shannon\u2019s entropy measurement based on daily calories burned within heartrate_zone during a time segment. Assumptions/Observations There are four heart rate zones (ZONE): outofrange , fatburn , cardio , and peak . Please refer to Fitbit documentation for more information about the way they are computed. Calories\u2019 accuracy depends on the users\u2019 Fitbit profile (weight, height, etc.).","title":"RAPIDS provider"},{"location":"features/fitbit-sleep-intraday/","text":"Fitbit Sleep Intraday \u00b6 Sensor parameters description for [FITBIT_SLEEP_INTRADAY] : Key Description [CONTAINER] Container where your sleep intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. RAPIDS provider \u00b6 Available time segments Available for all time segments File Sequence - data/raw/ { pid } /fitbit_sleep_intraday_raw.csv - data/raw/ { pid } /fitbit_sleep_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_intraday_features/fitbit_sleep_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_sleep_intraday.csv Parameters description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_SLEEP_INTRADAY features from the RAPIDS provider [FEATURES] Features to be computed from sleep intraday data, see table below [SLEEP_LEVELS] Fitbit\u2019s sleep API Version 1 only provides CLASSIC records. However, Version 1.2 provides 2 types of records: CLASSIC and STAGES . STAGES is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While CLASSIC contains 3 sleep levels ( awake , restless , and asleep ), STAGES contains 4 sleep levels ( wake , deep , light , rem ). To make it consistent, RAPIDS grouped them into 2 UNIFIED sleep levels: awake ( CLASSIC : awake and restless ; STAGES : wake ) and asleep ( CLASSIC : asleep ; STAGES : deep , light , and rem ). [SLEEP_TYPES] Types of sleep to be included in the feature extraction computation. Fitbit provides 2 types of sleep: main , nap . [INCLUDE_SLEEP_LATER_THAN] All resampled sleep rows (bin interval: one minute) that started after this time will be included in the feature computation. It is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. If a segment is longer than one day, this value is for every day. [REFERENCE_TIME] The reference point from which the [ROUTINE] features are to be computed. Chosen from MIDNIGHT and START_OF_THE_SEGMENT , default is MIDNIGHT . If you have multiple time segments per day it might be more informative to set this flag to START_OF_THE_SEGMENT . Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS][LEVELS_AND_TYPES] : Feature Units Description countepisode [LEVEL][TYPE] episodes Number of [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. sumduration [LEVEL][TYPE] minutes Total duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. maxduration [LEVEL][TYPE] minutes Longest duration of any [LEVEL][TYPE] sleep episode. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. minduration [LEVEL][TYPE] minutes Shortest duration of any [LEVEL][TYPE] sleep episode. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. avgduration [LEVEL][TYPE] minutes Average duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. medianduration [LEVEL][TYPE] minutes Median duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. stdduration [LEVEL][TYPE] minutes Standard deviation duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [ACROSS_LEVELS] : Feature Units Description ratiocount [LEVEL] - Ratio between the count of episodes of a single sleep [LEVEL] and the count of all episodes of all levels during both main and nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem episodes were rem ? (e.g., \\(countepisode[remstages][all] / countepisode[all][all]\\) ) ratioduration [LEVEL] - Ratio between the duration of episodes of a single sleep [LEVEL] and the duration of all episodes of all levels during both main and nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem time was rem ? (e.g., \\(sumduration[remstages][all] / sumduration[all][all]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [ACROSS_TYPES] : Feature Units Description ratiocountmain - Ratio between the count of all main episodes (independently of the levels inside) divided by the count of all main and nap episodes. This answers the question: what percentage of all sleep episodes ( main and nap ) were main ? We do not provide the ratio for nap because is complementary. ( \\(countepisode[all][main] / countepisode[all][all]\\) ) ratiodurationmain - Ratio between the duration of all main episodes (independently of the levels inside) divided by the duration of all main and nap episodes. This answers the question: what percentage of all sleep time ( main and nap ) was main ? We do not provide the ratio for nap because is complementary. ( \\(sumduration[all][main] / sumduration[all][all]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [WITHIN_LEVELS] : Feature Units Description ratiocount [TYPE] within [LEVEL] - Ratio between the count of episodes of a single sleep [LEVEL] during main sleep divided by the count of episodes of a single sleep [LEVEL] during main and nap . This answers the question: are rem episodes more frequent during main than nap sleep? We do not provide the ratio for nap because is complementary. ( \\(countepisode[remstages][main] / countepisode[remstages][all]\\) ) ratioduration [TYPE] within [LEVEL] - Ratio between the duration of episodes of a single sleep [LEVEL] during main sleep divided by the duration of episodes of a single sleep [LEVEL] during main and nap . This answers the question: is rem time more frequent during main than nap sleep? We do not provide the ratio for nap because is complementary. ( \\(countepisode[remstages][main] / countepisode[remstages][all]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [WITHIN_TYPES] : Feature Units Description ratiocount [LEVEL] within [TYPE] - Ratio between the count of episodes of a single sleep [LEVEL] and the count of all episodes of all levels during either main or nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem episodes were rem during main / nap sleep time? (e.g., \\(countepisode[remstages][main] / countepisode[all][main]\\) ) ratioduration [LEVEL] within [TYPE] - Ratio between the duration of episodes of a single sleep [LEVEL] and the duration of all episodes of all levels during either main or nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem time was rem during main / nap sleep time? (e.g., \\(sumduration[remstages][main] / sumduration[all][main]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS][ROUTINE] : Feature Units Description starttimefirstmainsleep minutes Start time (in minutes since REFERENCE_TIME ) of the first main sleep episode after INCLUDE_EPISODES_LATER_THAN . endtimelastmainsleep minutes End time (in minutes since REFERENCE_TIME ) of the last main sleep episode after INCLUDE_EPISODES_LATER_THAN . starttimefirstnap minutes Start time (in minutes since REFERENCE_TIME ) of the first nap episode after INCLUDE_EPISODES_LATER_THAN . endtimelastnap minutes End time (in minutes since REFERENCE_TIME ) of the last nap episode after INCLUDE_EPISODES_LATER_THAN . Assumptions/Observations Deleting values from [SLEEP_LEVELS] or [SLEEP_TYPES] will only change the features you receive from [LEVELS_AND_TYPES] . For example if STAGES only contains [rem, light] you will not receive countepisode[wake|deep][TYPE] or sum, max, min, avg, median, or std duration . These values will not influence RATIOS or ROUTINE features. Any [LEVEL] grouping is done within the elements of each class CLASSIC , STAGES , and UNIFIED . That is, we never combine CLASSIC or STAGES types to compute features when LEVELS_AND_TYPES_COMBINING_ALL is True or when computing RATIOS . PRICE provider \u00b6 Available time segments Available for any time segments larger or equal to one day File Sequence - data/raw/ { pid } /fitbit_sleep_intraday_raw.csv - data/raw/ { pid } /fitbit_sleep_intraday_parsed.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_intraday_features/fitbit_sleep_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_sleep_intraday.csv Parameters description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE] : Key Description [COMPUTE] Set to True to extract FITBIT_SLEEP_INTRADAY features from the PRICE provider [FEATURES] Features to be computed from sleep intraday data, see table below [SLEEP_LEVELS] Fitbit\u2019s sleep API Version 1 only provides CLASSIC records. However, Version 1.2 provides 2 types of records: CLASSIC and STAGES . STAGES is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While CLASSIC contains 3 sleep levels ( awake , restless , and asleep ), STAGES contains 4 sleep levels ( wake , deep , light , rem ). To make it consistent, RAPIDS grouped them into 2 UNIFIED sleep levels: awake ( CLASSIC : awake and restless ; STAGES : wake ) and asleep ( CLASSIC : asleep ; STAGES : deep , light , and rem ). [DAY_TYPE] The features of this provider can be computed using daily averages/standard deviations that were extracted on WEEKEND days only, WEEK days only, or ALL days [GROUP_EPISODES_WITHIN] This parameter contains 2 values: [START_TIME] and [LENGTH] . Only main sleep episodes that intersect or contain the period between [ START_TIME , START_TIME + LENGTH ] are taken into account to compute the features described below. Both [START_TIME] and [LENGTH] are in minutes. [START_TIME] is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. [LENGTH] is a number smaller than 1440 (24 hours). Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE] : Feature Units Description avgduration [LEVEL] main [DAY_TYPE] minutes Average duration of daily LEVEL sleep episodes. You can include daily average that were computed on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgratioduration [LEVEL] withinmain [DAY_TYPE] - Average ratio between daily LEVEL time and in-bed time inferred from main sleep episodes. LEVEL is one of SLEEP_LEVELS (e.g. awake-classic or rem-stages). In-bed time is the total duration of all main sleep episodes for each day. You can include daily ratios that were computed on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgstarttimeofepisodemain [DAY_TYPE] minutes Average start time of the first main sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgendtimeofepisodemain [DAY_TYPE] minutes Average end time of the last main sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgmidpointofepisodemain [DAY_TYPE] minutes Average mid time between the start of the first main sleep episode and the end of the last main sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. stdstarttimeofepisodemain [DAY_TYPE] minutes Standard deviation of start time of the first main sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. stdendtimeofepisodemain [DAY_TYPE] minutes Standard deviation of end time of the last main sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. stdmidpointofepisodemain [DAY_TYPE] minutes Standard deviation of mid time between the start of the first main sleep episode and the end of the last main sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. socialjetlag minutes Difference in minutes between the avgmidpointofepisodemain (average mid time between bedtime and wake time) of weekends and weekdays. meanssdstarttimeofepisodemain minutes squared Same as avgstarttimeofepisodemain[DAY_TYPE] but the average is computed over the squared differences of each pair of consecutive start times. meanssdendtimeofepisodemain minutes squared Same as avgendtimeofepisodemain[DAY_TYPE] but the average is computed over the squared differences of each pair of consecutive end times. meanssdmidpointofepisodemain minutes squared Same as avgmidpointofepisodemain[DAY_TYPE] but the average is computed over the squared differences of each pair of consecutive mid times. medianssdstarttimeofepisodemain minutes squared Same as avgstarttimeofepisodemain[DAY_TYPE] but the median is computed over the squared differences of each pair of consecutive start times. medianssdendtimeofepisodemain minutes squared Same as avgendtimeofepisodemain[DAY_TYPE] but the median is computed over the squared differences of each pair of consecutive end times. medianssdmidpointofepisodemain minutes squared Same as avgmidpointofepisodemain[DAY_TYPE] but the median is computed over the squared differences of each pair of consecutive mid times. Assumptions/Observations These features are based on descriptive statistics computed across daily values (start/end/mid times of sleep episodes). This is the reason why they are only available on time segments that are longer than 24 hours (we need at least 1 day to get the average). Even though Fitbit provides 2 types of sleep episodes ( main and nap ), only main sleep episodes are considered. How do we assign sleep episodes to specific dates? START_TIME and LENGTH control the dates that sleep episodes belong to. For a pair of [START_TIME] and [LENGTH] , sleep episodes (blue boxes) can only be placed at the following places: Relationship between sleep episodes and the given times ([START_TIME], [LENGTH]) If the end time of a sleep episode is before [START_TIME] , it will belong to the day before its start date (e.g. sleep episode #1). if (1) the start time or the end time of a sleep episode are between (overlap) [START_TIME] and [START_TIME] + [LENGTH] or (2) the start time is before [START_TIME] and the end time is after [START_TIME] + [LENGTH] , it will belong to its start date (e.g. sleep episode #2, #3, #4, #5). If the start time of a sleep episode is after START_TIME] + [LENGTH] , it will belong to the day after its start date (e.g. sleep episode #6). Only main sleep episodes that intersect or contain the period between [START_TIME] and [START_TIME] + [LENGTH] will be included in the feature computation. If we process the following main sleep episodes: episode start end 1 2021-02-01 12:00 2021-02-01 15:00 2 2021-02-01 21:00 2021-02-02 03:00 3 2021-02-02 05:00 2021-02-02 08:00 4 2021-02-02 11:00 2021-02-02 14:00 5 2021-02-02 19:00 2021-02-03 06:00 And our parameters: [INCLUDE_EPISODES_INTERSECTING][START_TIME] = 1320 (today\u2019s 22:00) [INCLUDE_EPISODES_INTERSECTING][LENGTH] = 720 (tomorrow\u2019s 10:00, or 22:00 + 12 hours) Only sleep episodes 2, 3,and 5 would be considered. Time related features represent the number of minutes between the start/end/midpoint of sleep episodes and the assigned day\u2019s midnight. All main sleep episodes are chunked within the requested time segments which need to be at least 24 hours or more long (1, 2, 3, 7 days, etc.). Then, daily features will be extracted and averaged across the length of the time segment, for example: The daily features extracted on 2021-02-01 will be: starttimeofepisodemain (bedtime) is 21 * 60 (episode 2 start time 2021-02-01 21:00) endtimeofepisodemain (wake time) is 32 * 60 (episode 3 end time 2021-02-02 08:00 + 24) midpointofepisodemain (midpoint sleep) is [(21 * 60) + (32 * 60)] / 2 The daily features extracted on 2021-02-02 will be: starttimeofepisodemain (bedtime) is 19 * 60 (episode 5 start time 2021-02-01 19:00) endtimeofepisodemain (wake time) is 30 * 60 (episode 5 end time 2021-02-03 06:00 + 24) midpointofepisodemain (midpoint sleep) is [(19 * 60) + (30 * 60)] / 2 And avgstarttimeofepisodemain[DAY_TYPE] will be ([21 * 60] + [19 * 60]) / 2","title":"Fitbit Sleep Intraday"},{"location":"features/fitbit-sleep-intraday/#fitbit-sleep-intraday","text":"Sensor parameters description for [FITBIT_SLEEP_INTRADAY] : Key Description [CONTAINER] Container where your sleep intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc.","title":"Fitbit Sleep Intraday"},{"location":"features/fitbit-sleep-intraday/#rapids-provider","text":"Available time segments Available for all time segments File Sequence - data/raw/ { pid } /fitbit_sleep_intraday_raw.csv - data/raw/ { pid } /fitbit_sleep_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_intraday_features/fitbit_sleep_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_sleep_intraday.csv Parameters description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_SLEEP_INTRADAY features from the RAPIDS provider [FEATURES] Features to be computed from sleep intraday data, see table below [SLEEP_LEVELS] Fitbit\u2019s sleep API Version 1 only provides CLASSIC records. However, Version 1.2 provides 2 types of records: CLASSIC and STAGES . STAGES is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While CLASSIC contains 3 sleep levels ( awake , restless , and asleep ), STAGES contains 4 sleep levels ( wake , deep , light , rem ). To make it consistent, RAPIDS grouped them into 2 UNIFIED sleep levels: awake ( CLASSIC : awake and restless ; STAGES : wake ) and asleep ( CLASSIC : asleep ; STAGES : deep , light , and rem ). [SLEEP_TYPES] Types of sleep to be included in the feature extraction computation. Fitbit provides 2 types of sleep: main , nap . [INCLUDE_SLEEP_LATER_THAN] All resampled sleep rows (bin interval: one minute) that started after this time will be included in the feature computation. It is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. If a segment is longer than one day, this value is for every day. [REFERENCE_TIME] The reference point from which the [ROUTINE] features are to be computed. Chosen from MIDNIGHT and START_OF_THE_SEGMENT , default is MIDNIGHT . If you have multiple time segments per day it might be more informative to set this flag to START_OF_THE_SEGMENT . Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS][LEVELS_AND_TYPES] : Feature Units Description countepisode [LEVEL][TYPE] episodes Number of [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. sumduration [LEVEL][TYPE] minutes Total duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. maxduration [LEVEL][TYPE] minutes Longest duration of any [LEVEL][TYPE] sleep episode. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. minduration [LEVEL][TYPE] minutes Shortest duration of any [LEVEL][TYPE] sleep episode. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. avgduration [LEVEL][TYPE] minutes Average duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. medianduration [LEVEL][TYPE] minutes Median duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. stdduration [LEVEL][TYPE] minutes Standard deviation duration of all [LEVEL][TYPE] sleep episodes. [LEVEL] is one of [SLEEP_LEVELS] (e.g. awake-classic or rem-stages) and [TYPE] is one of [SLEEP_TYPES] (e.g. main). Both [LEVEL] and [TYPE] can also be all when LEVELS_AND_TYPES_COMBINING_ALL is True, which ignores the levels and groups by sleep types. Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [ACROSS_LEVELS] : Feature Units Description ratiocount [LEVEL] - Ratio between the count of episodes of a single sleep [LEVEL] and the count of all episodes of all levels during both main and nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem episodes were rem ? (e.g., \\(countepisode[remstages][all] / countepisode[all][all]\\) ) ratioduration [LEVEL] - Ratio between the duration of episodes of a single sleep [LEVEL] and the duration of all episodes of all levels during both main and nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem time was rem ? (e.g., \\(sumduration[remstages][all] / sumduration[all][all]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [ACROSS_TYPES] : Feature Units Description ratiocountmain - Ratio between the count of all main episodes (independently of the levels inside) divided by the count of all main and nap episodes. This answers the question: what percentage of all sleep episodes ( main and nap ) were main ? We do not provide the ratio for nap because is complementary. ( \\(countepisode[all][main] / countepisode[all][all]\\) ) ratiodurationmain - Ratio between the duration of all main episodes (independently of the levels inside) divided by the duration of all main and nap episodes. This answers the question: what percentage of all sleep time ( main and nap ) was main ? We do not provide the ratio for nap because is complementary. ( \\(sumduration[all][main] / sumduration[all][all]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [WITHIN_LEVELS] : Feature Units Description ratiocount [TYPE] within [LEVEL] - Ratio between the count of episodes of a single sleep [LEVEL] during main sleep divided by the count of episodes of a single sleep [LEVEL] during main and nap . This answers the question: are rem episodes more frequent during main than nap sleep? We do not provide the ratio for nap because is complementary. ( \\(countepisode[remstages][main] / countepisode[remstages][all]\\) ) ratioduration [TYPE] within [LEVEL] - Ratio between the duration of episodes of a single sleep [LEVEL] during main sleep divided by the duration of episodes of a single sleep [LEVEL] during main and nap . This answers the question: is rem time more frequent during main than nap sleep? We do not provide the ratio for nap because is complementary. ( \\(countepisode[remstages][main] / countepisode[remstages][all]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS] RATIOS [WITHIN_TYPES] : Feature Units Description ratiocount [LEVEL] within [TYPE] - Ratio between the count of episodes of a single sleep [LEVEL] and the count of all episodes of all levels during either main or nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem episodes were rem during main / nap sleep time? (e.g., \\(countepisode[remstages][main] / countepisode[all][main]\\) ) ratioduration [LEVEL] within [TYPE] - Ratio between the duration of episodes of a single sleep [LEVEL] and the duration of all episodes of all levels during either main or nap sleep types. This answers the question: what percentage of all wake , deep , light , and rem time was rem during main / nap sleep time? (e.g., \\(sumduration[remstages][main] / sumduration[all][main]\\) ) Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][RAPIDS][ROUTINE] : Feature Units Description starttimefirstmainsleep minutes Start time (in minutes since REFERENCE_TIME ) of the first main sleep episode after INCLUDE_EPISODES_LATER_THAN . endtimelastmainsleep minutes End time (in minutes since REFERENCE_TIME ) of the last main sleep episode after INCLUDE_EPISODES_LATER_THAN . starttimefirstnap minutes Start time (in minutes since REFERENCE_TIME ) of the first nap episode after INCLUDE_EPISODES_LATER_THAN . endtimelastnap minutes End time (in minutes since REFERENCE_TIME ) of the last nap episode after INCLUDE_EPISODES_LATER_THAN . Assumptions/Observations Deleting values from [SLEEP_LEVELS] or [SLEEP_TYPES] will only change the features you receive from [LEVELS_AND_TYPES] . For example if STAGES only contains [rem, light] you will not receive countepisode[wake|deep][TYPE] or sum, max, min, avg, median, or std duration . These values will not influence RATIOS or ROUTINE features. Any [LEVEL] grouping is done within the elements of each class CLASSIC , STAGES , and UNIFIED . That is, we never combine CLASSIC or STAGES types to compute features when LEVELS_AND_TYPES_COMBINING_ALL is True or when computing RATIOS .","title":"RAPIDS provider"},{"location":"features/fitbit-sleep-intraday/#price-provider","text":"Available time segments Available for any time segments larger or equal to one day File Sequence - data/raw/ { pid } /fitbit_sleep_intraday_raw.csv - data/raw/ { pid } /fitbit_sleep_intraday_parsed.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled.csv - data/interim/ { pid } /fitbit_sleep_intraday_episodes_resampled_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_intraday_features/fitbit_sleep_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_sleep_intraday.csv Parameters description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE] : Key Description [COMPUTE] Set to True to extract FITBIT_SLEEP_INTRADAY features from the PRICE provider [FEATURES] Features to be computed from sleep intraday data, see table below [SLEEP_LEVELS] Fitbit\u2019s sleep API Version 1 only provides CLASSIC records. However, Version 1.2 provides 2 types of records: CLASSIC and STAGES . STAGES is only available in devices with a heart rate sensor and even those devices will fail to report it if the battery is low or the device is not tight enough. While CLASSIC contains 3 sleep levels ( awake , restless , and asleep ), STAGES contains 4 sleep levels ( wake , deep , light , rem ). To make it consistent, RAPIDS grouped them into 2 UNIFIED sleep levels: awake ( CLASSIC : awake and restless ; STAGES : wake ) and asleep ( CLASSIC : asleep ; STAGES : deep , light , and rem ). [DAY_TYPE] The features of this provider can be computed using daily averages/standard deviations that were extracted on WEEKEND days only, WEEK days only, or ALL days [GROUP_EPISODES_WITHIN] This parameter contains 2 values: [START_TIME] and [LENGTH] . Only main sleep episodes that intersect or contain the period between [ START_TIME , START_TIME + LENGTH ] are taken into account to compute the features described below. Both [START_TIME] and [LENGTH] are in minutes. [START_TIME] is a number ranging from 0 (midnight) to 1439 (23:59) which denotes the number of minutes after midnight. [LENGTH] is a number smaller than 1440 (24 hours). Features description for [FITBIT_SLEEP_INTRADAY][PROVIDERS][PRICE] : Feature Units Description avgduration [LEVEL] main [DAY_TYPE] minutes Average duration of daily LEVEL sleep episodes. You can include daily average that were computed on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgratioduration [LEVEL] withinmain [DAY_TYPE] - Average ratio between daily LEVEL time and in-bed time inferred from main sleep episodes. LEVEL is one of SLEEP_LEVELS (e.g. awake-classic or rem-stages). In-bed time is the total duration of all main sleep episodes for each day. You can include daily ratios that were computed on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgstarttimeofepisodemain [DAY_TYPE] minutes Average start time of the first main sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgendtimeofepisodemain [DAY_TYPE] minutes Average end time of the last main sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. avgmidpointofepisodemain [DAY_TYPE] minutes Average mid time between the start of the first main sleep episode and the end of the last main sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. stdstarttimeofepisodemain [DAY_TYPE] minutes Standard deviation of start time of the first main sleep episode of each day in a time segment. You can include daily start times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. stdendtimeofepisodemain [DAY_TYPE] minutes Standard deviation of end time of the last main sleep episode of each day in a time segment. You can include daily end times from episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. stdmidpointofepisodemain [DAY_TYPE] minutes Standard deviation of mid time between the start of the first main sleep episode and the end of the last main sleep episode of each day in a time segment. You can include episodes detected on weekend days, week days or both depending on the value of the DAY_TYPE flag. socialjetlag minutes Difference in minutes between the avgmidpointofepisodemain (average mid time between bedtime and wake time) of weekends and weekdays. meanssdstarttimeofepisodemain minutes squared Same as avgstarttimeofepisodemain[DAY_TYPE] but the average is computed over the squared differences of each pair of consecutive start times. meanssdendtimeofepisodemain minutes squared Same as avgendtimeofepisodemain[DAY_TYPE] but the average is computed over the squared differences of each pair of consecutive end times. meanssdmidpointofepisodemain minutes squared Same as avgmidpointofepisodemain[DAY_TYPE] but the average is computed over the squared differences of each pair of consecutive mid times. medianssdstarttimeofepisodemain minutes squared Same as avgstarttimeofepisodemain[DAY_TYPE] but the median is computed over the squared differences of each pair of consecutive start times. medianssdendtimeofepisodemain minutes squared Same as avgendtimeofepisodemain[DAY_TYPE] but the median is computed over the squared differences of each pair of consecutive end times. medianssdmidpointofepisodemain minutes squared Same as avgmidpointofepisodemain[DAY_TYPE] but the median is computed over the squared differences of each pair of consecutive mid times. Assumptions/Observations These features are based on descriptive statistics computed across daily values (start/end/mid times of sleep episodes). This is the reason why they are only available on time segments that are longer than 24 hours (we need at least 1 day to get the average). Even though Fitbit provides 2 types of sleep episodes ( main and nap ), only main sleep episodes are considered. How do we assign sleep episodes to specific dates? START_TIME and LENGTH control the dates that sleep episodes belong to. For a pair of [START_TIME] and [LENGTH] , sleep episodes (blue boxes) can only be placed at the following places: Relationship between sleep episodes and the given times ([START_TIME], [LENGTH]) If the end time of a sleep episode is before [START_TIME] , it will belong to the day before its start date (e.g. sleep episode #1). if (1) the start time or the end time of a sleep episode are between (overlap) [START_TIME] and [START_TIME] + [LENGTH] or (2) the start time is before [START_TIME] and the end time is after [START_TIME] + [LENGTH] , it will belong to its start date (e.g. sleep episode #2, #3, #4, #5). If the start time of a sleep episode is after START_TIME] + [LENGTH] , it will belong to the day after its start date (e.g. sleep episode #6). Only main sleep episodes that intersect or contain the period between [START_TIME] and [START_TIME] + [LENGTH] will be included in the feature computation. If we process the following main sleep episodes: episode start end 1 2021-02-01 12:00 2021-02-01 15:00 2 2021-02-01 21:00 2021-02-02 03:00 3 2021-02-02 05:00 2021-02-02 08:00 4 2021-02-02 11:00 2021-02-02 14:00 5 2021-02-02 19:00 2021-02-03 06:00 And our parameters: [INCLUDE_EPISODES_INTERSECTING][START_TIME] = 1320 (today\u2019s 22:00) [INCLUDE_EPISODES_INTERSECTING][LENGTH] = 720 (tomorrow\u2019s 10:00, or 22:00 + 12 hours) Only sleep episodes 2, 3,and 5 would be considered. Time related features represent the number of minutes between the start/end/midpoint of sleep episodes and the assigned day\u2019s midnight. All main sleep episodes are chunked within the requested time segments which need to be at least 24 hours or more long (1, 2, 3, 7 days, etc.). Then, daily features will be extracted and averaged across the length of the time segment, for example: The daily features extracted on 2021-02-01 will be: starttimeofepisodemain (bedtime) is 21 * 60 (episode 2 start time 2021-02-01 21:00) endtimeofepisodemain (wake time) is 32 * 60 (episode 3 end time 2021-02-02 08:00 + 24) midpointofepisodemain (midpoint sleep) is [(21 * 60) + (32 * 60)] / 2 The daily features extracted on 2021-02-02 will be: starttimeofepisodemain (bedtime) is 19 * 60 (episode 5 start time 2021-02-01 19:00) endtimeofepisodemain (wake time) is 30 * 60 (episode 5 end time 2021-02-03 06:00 + 24) midpointofepisodemain (midpoint sleep) is [(19 * 60) + (30 * 60)] / 2 And avgstarttimeofepisodemain[DAY_TYPE] will be ([21 * 60] + [19 * 60]) / 2","title":"PRICE provider"},{"location":"features/fitbit-sleep-summary/","text":"Fitbit Sleep Summary \u00b6 Sensor parameters description for [FITBIT_SLEEP_SUMMARY] : Key Description [CONTAINER] Container where your sleep summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. RAPIDS provider \u00b6 Available time segments Only available for segments that span 1 or more complete days (e.g. Jan 1 st 00:00 to Jan 3 rd 23:59) File Sequence - data/raw/ { pid } /fitbit_sleep_summary_raw.csv - data/raw/ { pid } /fitbit_sleep_summary_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_summary_features/fitbit_sleep_summary_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_sleep_summary.csv Parameters description for [FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_SLEEP_SUMMARY features from the RAPIDS provider [SLEEP_TYPES] Types of sleep to be included in the feature extraction computation. Fitbit provides 3 types of sleep: main , nap , all . [FEATURES] Features to be computed from sleep summary data, see table below Features description for [FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS] : Feature Units Description countepisodeTYPE episodes Number of sleep episodes for a certain sleep type during a time segment. avgefficiencyTYPE scores Average sleep efficiency for a certain sleep type during a time segment. sumdurationafterwakeupTYPE minutes Total duration the user stayed in bed after waking up for a certain sleep type during a time segment. sumdurationasleepTYPE minutes Total sleep duration for a certain sleep type during a time segment. sumdurationawakeTYPE minutes Total duration the user stayed awake but still in bed for a certain sleep type during a time segment. sumdurationtofallasleepTYPE minutes Total duration the user spent to fall asleep for a certain sleep type during a time segment. sumdurationinbedTYPE minutes Total duration the user stayed in bed (sumdurationtofallasleep + sumdurationawake + sumdurationasleep + sumdurationafterwakeup) for a certain sleep type during a time segment. avgdurationafterwakeupTYPE minutes Average duration the user stayed in bed after waking up for a certain sleep type during a time segment. avgdurationasleepTYPE minutes Average sleep duration for a certain sleep type during a time segment. avgdurationawakeTYPE minutes Average duration the user stayed awake but still in bed for a certain sleep type during a time segment. avgdurationtofallasleepTYPE minutes Average duration the user spent to fall asleep for a certain sleep type during a time segment. avgdurationinbedTYPE minutes Average duration the user stayed in bed (sumdurationtofallasleep + sumdurationawake + sumdurationasleep + sumdurationafterwakeup) for a certain sleep type during a time segment. Assumptions/Observations There are three sleep types (TYPE): main , nap , all . The all type contains both main sleep and naps. There are two versions of Fitbit\u2019s sleep API ( version 1 and version 1.2 ), and each provides raw sleep data in a different format: Count & duration summaries . v1 contains count_awake , duration_awake , count_awakenings , count_restless , and duration_restless fields for every sleep record but v1.2 does not. API columns . Features are computed based on the values provided by Fitbit\u2019s API: efficiency , minutes_after_wakeup , minutes_asleep , minutes_awake , minutes_to_fall_asleep , minutes_in_bed , is_main_sleep and type .","title":"Fitbit Sleep Summary"},{"location":"features/fitbit-sleep-summary/#fitbit-sleep-summary","text":"Sensor parameters description for [FITBIT_SLEEP_SUMMARY] : Key Description [CONTAINER] Container where your sleep summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc.","title":"Fitbit Sleep Summary"},{"location":"features/fitbit-sleep-summary/#rapids-provider","text":"Available time segments Only available for segments that span 1 or more complete days (e.g. Jan 1 st 00:00 to Jan 3 rd 23:59) File Sequence - data/raw/ { pid } /fitbit_sleep_summary_raw.csv - data/raw/ { pid } /fitbit_sleep_summary_with_datetime.csv - data/interim/ { pid } /fitbit_sleep_summary_features/fitbit_sleep_summary_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_sleep_summary.csv Parameters description for [FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_SLEEP_SUMMARY features from the RAPIDS provider [SLEEP_TYPES] Types of sleep to be included in the feature extraction computation. Fitbit provides 3 types of sleep: main , nap , all . [FEATURES] Features to be computed from sleep summary data, see table below Features description for [FITBIT_SLEEP_SUMMARY][PROVIDERS][RAPIDS] : Feature Units Description countepisodeTYPE episodes Number of sleep episodes for a certain sleep type during a time segment. avgefficiencyTYPE scores Average sleep efficiency for a certain sleep type during a time segment. sumdurationafterwakeupTYPE minutes Total duration the user stayed in bed after waking up for a certain sleep type during a time segment. sumdurationasleepTYPE minutes Total sleep duration for a certain sleep type during a time segment. sumdurationawakeTYPE minutes Total duration the user stayed awake but still in bed for a certain sleep type during a time segment. sumdurationtofallasleepTYPE minutes Total duration the user spent to fall asleep for a certain sleep type during a time segment. sumdurationinbedTYPE minutes Total duration the user stayed in bed (sumdurationtofallasleep + sumdurationawake + sumdurationasleep + sumdurationafterwakeup) for a certain sleep type during a time segment. avgdurationafterwakeupTYPE minutes Average duration the user stayed in bed after waking up for a certain sleep type during a time segment. avgdurationasleepTYPE minutes Average sleep duration for a certain sleep type during a time segment. avgdurationawakeTYPE minutes Average duration the user stayed awake but still in bed for a certain sleep type during a time segment. avgdurationtofallasleepTYPE minutes Average duration the user spent to fall asleep for a certain sleep type during a time segment. avgdurationinbedTYPE minutes Average duration the user stayed in bed (sumdurationtofallasleep + sumdurationawake + sumdurationasleep + sumdurationafterwakeup) for a certain sleep type during a time segment. Assumptions/Observations There are three sleep types (TYPE): main , nap , all . The all type contains both main sleep and naps. There are two versions of Fitbit\u2019s sleep API ( version 1 and version 1.2 ), and each provides raw sleep data in a different format: Count & duration summaries . v1 contains count_awake , duration_awake , count_awakenings , count_restless , and duration_restless fields for every sleep record but v1.2 does not. API columns . Features are computed based on the values provided by Fitbit\u2019s API: efficiency , minutes_after_wakeup , minutes_asleep , minutes_awake , minutes_to_fall_asleep , minutes_in_bed , is_main_sleep and type .","title":"RAPIDS provider"},{"location":"features/fitbit-steps-intraday/","text":"Fitbit Steps Intraday \u00b6 Sensor parameters description for [FITBIT_STEPS_INTRADAY] : Key Description [CONTAINER] Container where your steps intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. RAPIDS provider \u00b6 Available time segments Available for all time segments File Sequence - data/raw/ { pid } /fitbit_steps_intraday_raw.csv - data/raw/ { pid } /fitbit_steps_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_steps_intraday_features/fitbit_steps_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_steps_intraday.csv Parameters description for [FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_STEPS_INTRADAY features from the RAPIDS provider [FEATURES] Features to be computed from steps intraday data, see table below [THRESHOLD_ACTIVE_BOUT] Every minute with Fitbit steps data wil be labelled as sedentary if its step count is below this threshold, otherwise, active . [INCLUDE_ZERO_STEP_ROWS] Whether or not to include time segments with a 0 step count during the whole day. Features description for [FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS] : Feature Units Description sumsteps steps The total step count during a time segment. maxsteps steps The maximum step count during a time segment. minsteps steps The minimum step count during a time segment. avgsteps steps The average step count during a time segment. stdsteps steps The standard deviation of step count during a time segment. countepisodesedentarybout bouts Number of sedentary bouts during a time segment. sumdurationsedentarybout minutes Total duration of all sedentary bouts during a time segment. maxdurationsedentarybout minutes The maximum duration of any sedentary bout during a time segment. mindurationsedentarybout minutes The minimum duration of any sedentary bout during a time segment. avgdurationsedentarybout minutes The average duration of sedentary bouts during a time segment. stddurationsedentarybout minutes The standard deviation of the duration of sedentary bouts during a time segment. countepisodeactivebout bouts Number of active bouts during a time segment. sumdurationactivebout minutes Total duration of all active bouts during a time segment. maxdurationactivebout minutes The maximum duration of any active bout during a time segment. mindurationactivebout minutes The minimum duration of any active bout during a time segment. avgdurationactivebout minutes The average duration of active bouts during a time segment. stddurationactivebout minutes The standard deviation of the duration of active bouts during a time segment. Assumptions/Observations Active and sedentary bouts . If the step count per minute is smaller than THRESHOLD_ACTIVE_BOUT (default value is 10), that minute is labelled as sedentary, otherwise, is labelled as active. Active and sedentary bouts are periods of consecutive minutes labelled as active or sedentary .","title":"Fitbit Steps Intraday"},{"location":"features/fitbit-steps-intraday/#fitbit-steps-intraday","text":"Sensor parameters description for [FITBIT_STEPS_INTRADAY] : Key Description [CONTAINER] Container where your steps intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc.","title":"Fitbit Steps Intraday"},{"location":"features/fitbit-steps-intraday/#rapids-provider","text":"Available time segments Available for all time segments File Sequence - data/raw/ { pid } /fitbit_steps_intraday_raw.csv - data/raw/ { pid } /fitbit_steps_intraday_with_datetime.csv - data/interim/ { pid } /fitbit_steps_intraday_features/fitbit_steps_intraday_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_steps_intraday.csv Parameters description for [FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_STEPS_INTRADAY features from the RAPIDS provider [FEATURES] Features to be computed from steps intraday data, see table below [THRESHOLD_ACTIVE_BOUT] Every minute with Fitbit steps data wil be labelled as sedentary if its step count is below this threshold, otherwise, active . [INCLUDE_ZERO_STEP_ROWS] Whether or not to include time segments with a 0 step count during the whole day. Features description for [FITBIT_STEPS_INTRADAY][PROVIDERS][RAPIDS] : Feature Units Description sumsteps steps The total step count during a time segment. maxsteps steps The maximum step count during a time segment. minsteps steps The minimum step count during a time segment. avgsteps steps The average step count during a time segment. stdsteps steps The standard deviation of step count during a time segment. countepisodesedentarybout bouts Number of sedentary bouts during a time segment. sumdurationsedentarybout minutes Total duration of all sedentary bouts during a time segment. maxdurationsedentarybout minutes The maximum duration of any sedentary bout during a time segment. mindurationsedentarybout minutes The minimum duration of any sedentary bout during a time segment. avgdurationsedentarybout minutes The average duration of sedentary bouts during a time segment. stddurationsedentarybout minutes The standard deviation of the duration of sedentary bouts during a time segment. countepisodeactivebout bouts Number of active bouts during a time segment. sumdurationactivebout minutes Total duration of all active bouts during a time segment. maxdurationactivebout minutes The maximum duration of any active bout during a time segment. mindurationactivebout minutes The minimum duration of any active bout during a time segment. avgdurationactivebout minutes The average duration of active bouts during a time segment. stddurationactivebout minutes The standard deviation of the duration of active bouts during a time segment. Assumptions/Observations Active and sedentary bouts . If the step count per minute is smaller than THRESHOLD_ACTIVE_BOUT (default value is 10), that minute is labelled as sedentary, otherwise, is labelled as active. Active and sedentary bouts are periods of consecutive minutes labelled as active or sedentary .","title":"RAPIDS provider"},{"location":"features/fitbit-steps-summary/","text":"Fitbit Steps Summary \u00b6 Sensor parameters description for [FITBIT_STEPS_SUMMARY] : Key Description [CONTAINER] Container where your steps summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. RAPIDS provider \u00b6 Available time segments Only available for segments that span 1 or more complete days (e.g. Jan 1 st 00:00 to Jan 3 rd 23:59) File Sequence - data/raw/ { pid } /fitbit_steps_summary_raw.csv - data/raw/ { pid } /fitbit_steps_summary_with_datetime.csv - data/interim/ { pid } /fitbit_steps_summary_features/fitbit_steps_summary_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_steps_summary.csv Parameters description for [FITBIT_STEPS_SUMMARY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_STEPS_SUMMARY features from the RAPIDS provider [FEATURES] Features to be computed from steps summary data, see table below Features description for [FITBIT_STEPS_SUMMARY][PROVIDERS][RAPIDS] : Feature Units Description maxsumsteps steps The maximum daily step count during a time segment. minsumsteps steps The minimum daily step count during a time segment. avgsumsteps steps The average daily step count during a time segment. mediansumsteps steps The median of daily step count during a time segment. stdsumsteps steps The standard deviation of daily step count during a time segment. Assumptions/Observations NA","title":"Fitbit Steps Summary"},{"location":"features/fitbit-steps-summary/#fitbit-steps-summary","text":"Sensor parameters description for [FITBIT_STEPS_SUMMARY] : Key Description [CONTAINER] Container where your steps summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc.","title":"Fitbit Steps Summary"},{"location":"features/fitbit-steps-summary/#rapids-provider","text":"Available time segments Only available for segments that span 1 or more complete days (e.g. Jan 1 st 00:00 to Jan 3 rd 23:59) File Sequence - data/raw/ { pid } /fitbit_steps_summary_raw.csv - data/raw/ { pid } /fitbit_steps_summary_with_datetime.csv - data/interim/ { pid } /fitbit_steps_summary_features/fitbit_steps_summary_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /fitbit_steps_summary.csv Parameters description for [FITBIT_STEPS_SUMMARY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract FITBIT_STEPS_SUMMARY features from the RAPIDS provider [FEATURES] Features to be computed from steps summary data, see table below Features description for [FITBIT_STEPS_SUMMARY][PROVIDERS][RAPIDS] : Feature Units Description maxsumsteps steps The maximum daily step count during a time segment. minsumsteps steps The minimum daily step count during a time segment. avgsumsteps steps The average daily step count during a time segment. mediansumsteps steps The median of daily step count during a time segment. stdsumsteps steps The standard deviation of daily step count during a time segment. Assumptions/Observations NA","title":"RAPIDS provider"},{"location":"features/phone-accelerometer/","text":"Phone Accelerometer \u00b6 Sensor parameters description for [PHONE_ACCELEROMETER] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the accelerometer data is stored RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_accelerometer_raw.csv - data/raw/ { pid } /phone_accelerometer_with_datetime.csv - data/interim/ { pid } /phone_accelerometer_features/phone_accelerometer_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_accelerometer.csv Parameters description for [PHONE_ACCELEROMETER][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_ACCELEROMETER features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_ACCELEROMETER][PROVIDERS][RAPIDS] : Feature Units Description maxmagnitude m/s 2 The maximum magnitude of acceleration ( \\(\\|acceleration\\| = \\sqrt{x^2 + y^2 + z^2}\\) ). minmagnitude m/s 2 The minimum magnitude of acceleration. avgmagnitude m/s 2 The average magnitude of acceleration. medianmagnitude m/s 2 The median magnitude of acceleration. stdmagnitude m/s 2 The standard deviation of acceleration. Assumptions/Observations Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is to big to fit in memory. We are considering different alternatives to overcome this problem. PANDA provider \u00b6 These features are based on the work by Panda et al . Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_accelerometer_raw.csv - data/raw/ { pid } /phone_accelerometer_with_datetime.csv - data/interim/ { pid } /phone_accelerometer_features/phone_accelerometer_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_accelerometer.csv Parameters description for [PHONE_ACCELEROMETER][PROVIDERS][PANDA] : Key Description [COMPUTE] Set to True to extract PHONE_ACCELEROMETER features from the PANDA provider [FEATURES] Features to be computed for exertional and non-exertional activity episodes, see table below Features description for [PHONE_ACCELEROMETER][PROVIDERS][PANDA] : Feature Units Description sumduration minutes Total duration of all exertional or non-exertional activity episodes. maxduration minutes Longest duration of any exertional or non-exertional activity episode. minduration minutes Shortest duration of any exertional or non-exertional activity episode. avgduration minutes Average duration of any exertional or non-exertional activity episode. medianduration minutes Median duration of any exertional or non-exertional activity episode. stdduration minutes Standard deviation of the duration of all exertional or non-exertional activity episodes. Assumptions/Observations Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is to big to fit in memory. We are considering different alternatives to overcome this problem. See Panda et al for a definition of exertional and non-exertional activity episodes","title":"Phone Accelerometer"},{"location":"features/phone-accelerometer/#phone-accelerometer","text":"Sensor parameters description for [PHONE_ACCELEROMETER] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the accelerometer data is stored","title":"Phone Accelerometer"},{"location":"features/phone-accelerometer/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_accelerometer_raw.csv - data/raw/ { pid } /phone_accelerometer_with_datetime.csv - data/interim/ { pid } /phone_accelerometer_features/phone_accelerometer_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_accelerometer.csv Parameters description for [PHONE_ACCELEROMETER][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_ACCELEROMETER features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_ACCELEROMETER][PROVIDERS][RAPIDS] : Feature Units Description maxmagnitude m/s 2 The maximum magnitude of acceleration ( \\(\\|acceleration\\| = \\sqrt{x^2 + y^2 + z^2}\\) ). minmagnitude m/s 2 The minimum magnitude of acceleration. avgmagnitude m/s 2 The average magnitude of acceleration. medianmagnitude m/s 2 The median magnitude of acceleration. stdmagnitude m/s 2 The standard deviation of acceleration. Assumptions/Observations Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is to big to fit in memory. We are considering different alternatives to overcome this problem.","title":"RAPIDS provider"},{"location":"features/phone-accelerometer/#panda-provider","text":"These features are based on the work by Panda et al . Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_accelerometer_raw.csv - data/raw/ { pid } /phone_accelerometer_with_datetime.csv - data/interim/ { pid } /phone_accelerometer_features/phone_accelerometer_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_accelerometer.csv Parameters description for [PHONE_ACCELEROMETER][PROVIDERS][PANDA] : Key Description [COMPUTE] Set to True to extract PHONE_ACCELEROMETER features from the PANDA provider [FEATURES] Features to be computed for exertional and non-exertional activity episodes, see table below Features description for [PHONE_ACCELEROMETER][PROVIDERS][PANDA] : Feature Units Description sumduration minutes Total duration of all exertional or non-exertional activity episodes. maxduration minutes Longest duration of any exertional or non-exertional activity episode. minduration minutes Shortest duration of any exertional or non-exertional activity episode. avgduration minutes Average duration of any exertional or non-exertional activity episode. medianduration minutes Median duration of any exertional or non-exertional activity episode. stdduration minutes Standard deviation of the duration of all exertional or non-exertional activity episodes. Assumptions/Observations Analyzing accelerometer data is a memory intensive task. If RAPIDS crashes is likely because the accelerometer dataset for a participant is to big to fit in memory. We are considering different alternatives to overcome this problem. See Panda et al for a definition of exertional and non-exertional activity episodes","title":"PANDA provider"},{"location":"features/phone-activity-recognition/","text":"Phone Activity Recognition \u00b6 Sensor parameters description for [PHONE_ACTIVITY_RECOGNITION] : Key Description [CONTAINER][ANDROID] Data stream container (database table, CSV file, etc.) where the activity data from Android devices is stored (the AWARE client saves this data on different tables for Android and iOS) [CONTAINER][IOS] Data stream container (database table, CSV file, etc.) where the activity data from iOS devices is stored (the AWARE client saves this data on different tables for Android and iOS) [EPISODE_THRESHOLD_BETWEEN_ROWS] Difference in minutes between any two rows for them to be considered part of the same activity episode RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_activity_recognition_raw.csv - data/raw/ { pid } /phone_activity_recognition_with_datetime.csv - data/interim/ { pid } /phone_activity_recognition_episodes.csv - data/interim/ { pid } /phone_activity_recognition_episodes_resampled.csv - data/interim/ { pid } /phone_activity_recognition_episodes_resampled_with_datetime.csv - data/interim/ { pid } /phone_activity_recognition_features/phone_activity_recognition_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_activity_recognition.csv Parameters description for [PHONE_ACTIVITY_RECOGNITION][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_ACTIVITY_RECOGNITION features from the RAPIDS provider [FEATURES] Features to be computed, see table below [ACTIVITY_CLASSES][STATIONARY] An array of the activity labels to be considered in the STATIONARY category choose any of still , tilting [ACTIVITY_CLASSES][MOBILE] An array of the activity labels to be considered in the MOBILE category choose any of on_foot , walking , running , on_bicycle [ACTIVITY_CLASSES][VEHICLE] An array of the activity labels to be considered in the VEHICLE category choose any of in_vehicule Features description for [PHONE_ACTIVITY_RECOGNITION][PROVIDERS][RAPIDS] : Feature Units Description count rows Number of episodes. mostcommonactivity activity type The most common activity type (e.g. still , on_foot , etc.). If there is a tie, the first one is chosen. countuniqueactivities activity type Number of unique activities. durationstationary minutes The total duration of [ACTIVITY_CLASSES][STATIONARY] episodes durationmobile minutes The total duration of [ACTIVITY_CLASSES][MOBILE] episodes of on foot, running, and on bicycle activities durationvehicle minutes The total duration of [ACTIVITY_CLASSES][VEHICLE] episodes of on vehicle activity Assumptions/Observations iOS Activity Recognition names and types are unified with Android labels: iOS Activity Name Android Activity Name Android Activity Type walking walking 7 running running 8 cycling on_bicycle 1 automotive in_vehicle 0 stationary still 3 unknown unknown 4 In AWARE, Activity Recognition data for Android and iOS are stored in two different database tables, RAPIDS automatically infers what platform each participant belongs to based on their participant file .","title":"Phone Activity Recognition"},{"location":"features/phone-activity-recognition/#phone-activity-recognition","text":"Sensor parameters description for [PHONE_ACTIVITY_RECOGNITION] : Key Description [CONTAINER][ANDROID] Data stream container (database table, CSV file, etc.) where the activity data from Android devices is stored (the AWARE client saves this data on different tables for Android and iOS) [CONTAINER][IOS] Data stream container (database table, CSV file, etc.) where the activity data from iOS devices is stored (the AWARE client saves this data on different tables for Android and iOS) [EPISODE_THRESHOLD_BETWEEN_ROWS] Difference in minutes between any two rows for them to be considered part of the same activity episode","title":"Phone Activity Recognition"},{"location":"features/phone-activity-recognition/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_activity_recognition_raw.csv - data/raw/ { pid } /phone_activity_recognition_with_datetime.csv - data/interim/ { pid } /phone_activity_recognition_episodes.csv - data/interim/ { pid } /phone_activity_recognition_episodes_resampled.csv - data/interim/ { pid } /phone_activity_recognition_episodes_resampled_with_datetime.csv - data/interim/ { pid } /phone_activity_recognition_features/phone_activity_recognition_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_activity_recognition.csv Parameters description for [PHONE_ACTIVITY_RECOGNITION][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_ACTIVITY_RECOGNITION features from the RAPIDS provider [FEATURES] Features to be computed, see table below [ACTIVITY_CLASSES][STATIONARY] An array of the activity labels to be considered in the STATIONARY category choose any of still , tilting [ACTIVITY_CLASSES][MOBILE] An array of the activity labels to be considered in the MOBILE category choose any of on_foot , walking , running , on_bicycle [ACTIVITY_CLASSES][VEHICLE] An array of the activity labels to be considered in the VEHICLE category choose any of in_vehicule Features description for [PHONE_ACTIVITY_RECOGNITION][PROVIDERS][RAPIDS] : Feature Units Description count rows Number of episodes. mostcommonactivity activity type The most common activity type (e.g. still , on_foot , etc.). If there is a tie, the first one is chosen. countuniqueactivities activity type Number of unique activities. durationstationary minutes The total duration of [ACTIVITY_CLASSES][STATIONARY] episodes durationmobile minutes The total duration of [ACTIVITY_CLASSES][MOBILE] episodes of on foot, running, and on bicycle activities durationvehicle minutes The total duration of [ACTIVITY_CLASSES][VEHICLE] episodes of on vehicle activity Assumptions/Observations iOS Activity Recognition names and types are unified with Android labels: iOS Activity Name Android Activity Name Android Activity Type walking walking 7 running running 8 cycling on_bicycle 1 automotive in_vehicle 0 stationary still 3 unknown unknown 4 In AWARE, Activity Recognition data for Android and iOS are stored in two different database tables, RAPIDS automatically infers what platform each participant belongs to based on their participant file .","title":"RAPIDS provider"},{"location":"features/phone-applications-crashes/","text":"Phone Applications Crashes \u00b6 Sensor parameters description for [PHONE_APPLICATIONS_CRASHES] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the applications crashes data is stored [APPLICATION_CATEGORIES][CATALOGUE_SOURCE] FILE or GOOGLE . If FILE , app categories (genres) are read from [CATALOGUE_FILE] . If [GOOGLE] , app categories (genres) are scrapped from the Play Store [APPLICATION_CATEGORIES][CATALOGUE_FILE] CSV file with a package_name and genre column. By default we provide the catalogue created by Stachl et al in data/external/stachl_application_genre_catalogue.csv [APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE] if [CATALOGUE_SOURCE] is equal to FILE , this flag signals whether or not to update [CATALOGUE_FILE] , if [CATALOGUE_SOURCE] is equal to GOOGLE all scraped genres will be saved to [CATALOGUE_FILE] [APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES] This flag signals whether or not to scrape categories (genres) missing from the [CATALOGUE_FILE] . If [CATALOGUE_SOURCE] is equal to GOOGLE , all genres are scraped anyway (this flag is ignored) Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_APPLICATIONS_CRASHES ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Applications Crashes"},{"location":"features/phone-applications-crashes/#phone-applications-crashes","text":"Sensor parameters description for [PHONE_APPLICATIONS_CRASHES] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the applications crashes data is stored [APPLICATION_CATEGORIES][CATALOGUE_SOURCE] FILE or GOOGLE . If FILE , app categories (genres) are read from [CATALOGUE_FILE] . If [GOOGLE] , app categories (genres) are scrapped from the Play Store [APPLICATION_CATEGORIES][CATALOGUE_FILE] CSV file with a package_name and genre column. By default we provide the catalogue created by Stachl et al in data/external/stachl_application_genre_catalogue.csv [APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE] if [CATALOGUE_SOURCE] is equal to FILE , this flag signals whether or not to update [CATALOGUE_FILE] , if [CATALOGUE_SOURCE] is equal to GOOGLE all scraped genres will be saved to [CATALOGUE_FILE] [APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES] This flag signals whether or not to scrape categories (genres) missing from the [CATALOGUE_FILE] . If [CATALOGUE_SOURCE] is equal to GOOGLE , all genres are scraped anyway (this flag is ignored) Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_APPLICATIONS_CRASHES ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Applications Crashes"},{"location":"features/phone-applications-foreground/","text":"Phone Applications Foreground \u00b6 Sensor parameters description for [PHONE_APPLICATIONS_FOREGROUND] (these parameters are used by the only provider available at the moment, RAPIDS): Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the applications foreground data is stored [APPLICATION_CATEGORIES][CATALOGUE_SOURCE] FILE or GOOGLE . If FILE , app categories (genres) are read from [CATALOGUE_FILE] . If [GOOGLE] , app categories (genres) are scrapped from the Play Store [APPLICATION_CATEGORIES][CATALOGUE_FILE] CSV file with a package_name and genre column. By default we provide the catalogue created by Stachl et al in data/external/stachl_application_genre_catalogue.csv [APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE] if [CATALOGUE_SOURCE] is equal to FILE , this flag signals whether or not to update [CATALOGUE_FILE] , if [CATALOGUE_SOURCE] is equal to GOOGLE all scraped genres will be saved to [CATALOGUE_FILE] [APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES] This flag signals whether or not to scrape categories (genres) missing from the [CATALOGUE_FILE] . If [CATALOGUE_SOURCE] is equal to GOOGLE , all genres are scraped anyway (this flag is ignored) RAPIDS provider \u00b6 The app category (genre) catalogue used in these features was originally created by Stachl et al . Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_applications_foreground_raw.csv - data/raw/ { pid } /phone_applications_foreground_with_datetime.csv - data/raw/ { pid } /phone_applications_foreground_with_datetime_with_categories.csv - data/interim/ { pid } /phone_applications_foreground_features/phone_applications_foreground_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_applications_foreground.csv Parameters description for [PHONE_APPLICATIONS_FOREGROUND][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_APPLICATIONS_FOREGROUND features from the RAPIDS provider [FEATURES] Features to be computed, see table below [SINGLE_CATEGORIES] An array of app categories to be included in the feature extraction computation. The special keyword all represents a category with all the apps from each participant. By default we use the category catalogue pointed by [APPLICATION_CATEGORIES][CATALOGUE_FILE] (see the Sensor parameters description table above) [MULTIPLE_CATEGORIES] An array of collections representing meta-categories (a group of categories). They key of each element is the name of the meta-category and the value is an array of member app categories. By default we use the category catalogue pointed by [APPLICATION_CATEGORIES][CATALOGUE_FILE] (see the Sensor parameters description table above) [SINGLE_APPS] An array of apps to be included in the feature extraction computation. Use their package name (e.g. com.google.android.youtube ) or the reserved keyword top1global (the most used app by a participant over the whole monitoring study) [EXCLUDED_CATEGORIES] An array of app categories to be excluded from the feature extraction computation. By default we use the category catalogue pointed by [APPLICATION_CATEGORIES][CATALOGUE_FILE] (see the Sensor parameters description table above) [EXCLUDED_APPS] An array of apps to be excluded from the feature extraction computation. Use their package name, for example: com.google.android.youtube Features description for [PHONE_APPLICATIONS_FOREGROUND][PROVIDERS][RAPIDS] : Feature Units Description count apps Number of times a single app or apps within a category were used (i.e. they were brought to the foreground either by tapping their icon or switching to it from another app) timeoffirstuse minutes The time in minutes between 12:00am (midnight) and the first use of a single app or apps within a category during a time_segment timeoflastuse minutes The time in minutes between 12:00am (midnight) and the last use of a single app or apps within a category during a time_segment frequencyentropy nats The entropy of the used apps within a category during a time_segment (each app is seen as a unique event, the more apps were used, the higher the entropy). This is especially relevant when computed over all apps. Entropy cannot be obtained for a single app Assumptions/Observations Features can be computed by app, by apps grouped under a single category (genre) and by multiple categories grouped together (meta-categories). For example, we can get features for Facebook (single app), for Social Network apps (a category including Facebook and other social media apps) or for Social (a meta-category formed by Social Network and Social Media Tools categories). Apps installed by default like YouTube are considered systems apps on some phones. We do an exact match to exclude apps where \u201cgenre\u201d == EXCLUDED_CATEGORIES or \u201cpackage_name\u201d == EXCLUDED_APPS . We provide three ways of classifying and app within a category (genre): a) by automatically scraping its official category from the Google Play Store, b) by using the catalogue created by Stachl et al. which we provide in RAPIDS ( data/external/stachl_application_genre_catalogue.csv ), or c) by manually creating a personalized catalogue. You can choose a, b or c by modifying [APPLICATION_GENRES] keys and values (see the Sensor parameters description table above).","title":"Phone Applications Foreground"},{"location":"features/phone-applications-foreground/#phone-applications-foreground","text":"Sensor parameters description for [PHONE_APPLICATIONS_FOREGROUND] (these parameters are used by the only provider available at the moment, RAPIDS): Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the applications foreground data is stored [APPLICATION_CATEGORIES][CATALOGUE_SOURCE] FILE or GOOGLE . If FILE , app categories (genres) are read from [CATALOGUE_FILE] . If [GOOGLE] , app categories (genres) are scrapped from the Play Store [APPLICATION_CATEGORIES][CATALOGUE_FILE] CSV file with a package_name and genre column. By default we provide the catalogue created by Stachl et al in data/external/stachl_application_genre_catalogue.csv [APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE] if [CATALOGUE_SOURCE] is equal to FILE , this flag signals whether or not to update [CATALOGUE_FILE] , if [CATALOGUE_SOURCE] is equal to GOOGLE all scraped genres will be saved to [CATALOGUE_FILE] [APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES] This flag signals whether or not to scrape categories (genres) missing from the [CATALOGUE_FILE] . If [CATALOGUE_SOURCE] is equal to GOOGLE , all genres are scraped anyway (this flag is ignored)","title":"Phone Applications Foreground"},{"location":"features/phone-applications-foreground/#rapids-provider","text":"The app category (genre) catalogue used in these features was originally created by Stachl et al . Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_applications_foreground_raw.csv - data/raw/ { pid } /phone_applications_foreground_with_datetime.csv - data/raw/ { pid } /phone_applications_foreground_with_datetime_with_categories.csv - data/interim/ { pid } /phone_applications_foreground_features/phone_applications_foreground_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_applications_foreground.csv Parameters description for [PHONE_APPLICATIONS_FOREGROUND][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_APPLICATIONS_FOREGROUND features from the RAPIDS provider [FEATURES] Features to be computed, see table below [SINGLE_CATEGORIES] An array of app categories to be included in the feature extraction computation. The special keyword all represents a category with all the apps from each participant. By default we use the category catalogue pointed by [APPLICATION_CATEGORIES][CATALOGUE_FILE] (see the Sensor parameters description table above) [MULTIPLE_CATEGORIES] An array of collections representing meta-categories (a group of categories). They key of each element is the name of the meta-category and the value is an array of member app categories. By default we use the category catalogue pointed by [APPLICATION_CATEGORIES][CATALOGUE_FILE] (see the Sensor parameters description table above) [SINGLE_APPS] An array of apps to be included in the feature extraction computation. Use their package name (e.g. com.google.android.youtube ) or the reserved keyword top1global (the most used app by a participant over the whole monitoring study) [EXCLUDED_CATEGORIES] An array of app categories to be excluded from the feature extraction computation. By default we use the category catalogue pointed by [APPLICATION_CATEGORIES][CATALOGUE_FILE] (see the Sensor parameters description table above) [EXCLUDED_APPS] An array of apps to be excluded from the feature extraction computation. Use their package name, for example: com.google.android.youtube Features description for [PHONE_APPLICATIONS_FOREGROUND][PROVIDERS][RAPIDS] : Feature Units Description count apps Number of times a single app or apps within a category were used (i.e. they were brought to the foreground either by tapping their icon or switching to it from another app) timeoffirstuse minutes The time in minutes between 12:00am (midnight) and the first use of a single app or apps within a category during a time_segment timeoflastuse minutes The time in minutes between 12:00am (midnight) and the last use of a single app or apps within a category during a time_segment frequencyentropy nats The entropy of the used apps within a category during a time_segment (each app is seen as a unique event, the more apps were used, the higher the entropy). This is especially relevant when computed over all apps. Entropy cannot be obtained for a single app Assumptions/Observations Features can be computed by app, by apps grouped under a single category (genre) and by multiple categories grouped together (meta-categories). For example, we can get features for Facebook (single app), for Social Network apps (a category including Facebook and other social media apps) or for Social (a meta-category formed by Social Network and Social Media Tools categories). Apps installed by default like YouTube are considered systems apps on some phones. We do an exact match to exclude apps where \u201cgenre\u201d == EXCLUDED_CATEGORIES or \u201cpackage_name\u201d == EXCLUDED_APPS . We provide three ways of classifying and app within a category (genre): a) by automatically scraping its official category from the Google Play Store, b) by using the catalogue created by Stachl et al. which we provide in RAPIDS ( data/external/stachl_application_genre_catalogue.csv ), or c) by manually creating a personalized catalogue. You can choose a, b or c by modifying [APPLICATION_GENRES] keys and values (see the Sensor parameters description table above).","title":"RAPIDS provider"},{"location":"features/phone-applications-notifications/","text":"Phone Applications Notifications \u00b6 Sensor parameters description for [PHONE_APPLICATIONS_NOTIFICATIONS] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the applications notifications data is stored [APPLICATION_CATEGORIES][CATALOGUE_SOURCE] FILE or GOOGLE . If FILE , app categories (genres) are read from [CATALOGUE_FILE] . If [GOOGLE] , app categories (genres) are scrapped from the Play Store [APPLICATION_CATEGORIES][CATALOGUE_FILE] CSV file with a package_name and genre column. By default we provide the catalogue created by Stachl et al in data/external/stachl_application_genre_catalogue.csv [APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE] if [CATALOGUE_SOURCE] is equal to FILE , this flag signals whether or not to update [CATALOGUE_FILE] , if [CATALOGUE_SOURCE] is equal to GOOGLE all scraped genres will be saved to [CATALOGUE_FILE] [APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES] This flag signals whether or not to scrape categories (genres) missing from the [CATALOGUE_FILE] . If [CATALOGUE_SOURCE] is equal to GOOGLE , all genres are scraped anyway (this flag is ignored) Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_APPLICATIONS_NOTIFICATIONS ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Applications Notifications"},{"location":"features/phone-applications-notifications/#phone-applications-notifications","text":"Sensor parameters description for [PHONE_APPLICATIONS_NOTIFICATIONS] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the applications notifications data is stored [APPLICATION_CATEGORIES][CATALOGUE_SOURCE] FILE or GOOGLE . If FILE , app categories (genres) are read from [CATALOGUE_FILE] . If [GOOGLE] , app categories (genres) are scrapped from the Play Store [APPLICATION_CATEGORIES][CATALOGUE_FILE] CSV file with a package_name and genre column. By default we provide the catalogue created by Stachl et al in data/external/stachl_application_genre_catalogue.csv [APPLICATION_CATEGORIES][UPDATE_CATALOGUE_FILE] if [CATALOGUE_SOURCE] is equal to FILE , this flag signals whether or not to update [CATALOGUE_FILE] , if [CATALOGUE_SOURCE] is equal to GOOGLE all scraped genres will be saved to [CATALOGUE_FILE] [APPLICATION_CATEGORIES][SCRAPE_MISSING_CATEGORIES] This flag signals whether or not to scrape categories (genres) missing from the [CATALOGUE_FILE] . If [CATALOGUE_SOURCE] is equal to GOOGLE , all genres are scraped anyway (this flag is ignored) Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_APPLICATIONS_NOTIFICATIONS ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Applications Notifications"},{"location":"features/phone-battery/","text":"Phone Battery \u00b6 Sensor parameters description for [PHONE_BATTERY] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the battery data is stored [EPISODE_THRESHOLD_BETWEEN_ROWS] Difference in minutes between any two rows for them to be considered part of the same battery charge or discharge episode RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_battery_raw.csv - data/interim/ { pid } /phone_battery_episodes.csv - data/interim/ { pid } /phone_battery_episodes_resampled.csv - data/interim/ { pid } /phone_battery_episodes_resampled_with_datetime.csv - data/interim/ { pid } /phone_battery_features/phone_battery_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_battery.csv Parameters description for [PHONE_BATTERY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_BATTERY features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_BATTERY][PROVIDERS][RAPIDS] : Feature Units Description countdischarge episodes Number of discharging episodes. sumdurationdischarge minutes The total duration of all discharging episodes. countcharge episodes Number of battery charging episodes. sumdurationcharge minutes The total duration of all charging episodes. avgconsumptionrate episodes/minutes The average of all episodes\u2019 consumption rates. An episode\u2019s consumption rate is defined as the ratio between its battery delta and duration maxconsumptionrate episodes/minutes The highest of all episodes\u2019 consumption rates. An episode\u2019s consumption rate is defined as the ratio between its battery delta and duration Assumptions/Observations We convert battery data collected with iOS client v1 (autodetected because battery status 4 do not exist) to match Android battery format: we swap status 3 for 5 and 1 for 3 We group battery data into discharge or charge episodes considering any contiguous rows with consecutive reductions or increases of the battery level if they are logged within [EPISODE_THRESHOLD_BETWEEN_ROWS] minutes from each other.","title":"Phone Battery"},{"location":"features/phone-battery/#phone-battery","text":"Sensor parameters description for [PHONE_BATTERY] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the battery data is stored [EPISODE_THRESHOLD_BETWEEN_ROWS] Difference in minutes between any two rows for them to be considered part of the same battery charge or discharge episode","title":"Phone Battery"},{"location":"features/phone-battery/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_battery_raw.csv - data/interim/ { pid } /phone_battery_episodes.csv - data/interim/ { pid } /phone_battery_episodes_resampled.csv - data/interim/ { pid } /phone_battery_episodes_resampled_with_datetime.csv - data/interim/ { pid } /phone_battery_features/phone_battery_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_battery.csv Parameters description for [PHONE_BATTERY][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_BATTERY features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_BATTERY][PROVIDERS][RAPIDS] : Feature Units Description countdischarge episodes Number of discharging episodes. sumdurationdischarge minutes The total duration of all discharging episodes. countcharge episodes Number of battery charging episodes. sumdurationcharge minutes The total duration of all charging episodes. avgconsumptionrate episodes/minutes The average of all episodes\u2019 consumption rates. An episode\u2019s consumption rate is defined as the ratio between its battery delta and duration maxconsumptionrate episodes/minutes The highest of all episodes\u2019 consumption rates. An episode\u2019s consumption rate is defined as the ratio between its battery delta and duration Assumptions/Observations We convert battery data collected with iOS client v1 (autodetected because battery status 4 do not exist) to match Android battery format: we swap status 3 for 5 and 1 for 3 We group battery data into discharge or charge episodes considering any contiguous rows with consecutive reductions or increases of the battery level if they are logged within [EPISODE_THRESHOLD_BETWEEN_ROWS] minutes from each other.","title":"RAPIDS provider"},{"location":"features/phone-bluetooth/","text":"Phone Bluetooth \u00b6 Sensor parameters description for [PHONE_BLUETOOTH] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the bluetooth data is stored RAPIDS provider \u00b6 Warning The features of this provider are deprecated in favor of DORYAB provider (see below). Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_bluetooth_raw.csv - data/raw/ { pid } /phone_bluetooth_with_datetime.csv - data/interim/ { pid } /phone_bluetooth_features/phone_bluetooth_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_bluetooth.csv \" Parameters description for [PHONE_BLUETOOTH][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_BLUETOOTH features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_BLUETOOTH][PROVIDERS][RAPIDS] : Feature Units Description countscans devices Number of scanned devices during a time segment, a device can be detected multiple times over time and these appearances are counted separately uniquedevices devices Number of unique devices during a time segment as identified by their hardware ( bt_address ) address countscansmostuniquedevice scans Number of scans of the most sensed device within each time segment instance Assumptions/Observations From v0.2.0 countscans , uniquedevices , countscansmostuniquedevice were deprecated because they overlap with the respective features for ALL devices of the PHONE_BLUETOOTH DORYAB provider DORYAB provider \u00b6 This provider is adapted from the work by Doryab et al . Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_bluetooth_raw.csv - data/raw/ { pid } /phone_bluetooth_with_datetime.csv - data/interim/ { pid } /phone_bluetooth_features/phone_bluetooth_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_bluetooth.csv \" Parameters description for [PHONE_BLUETOOTH][PROVIDERS][DORYAB] : Key Description [COMPUTE] Set to True to extract PHONE_BLUETOOTH features from the DORYAB provider [FEATURES] Features to be computed, see table below. These features are computed for three device categories: all devices, own devices and other devices. Features description for [PHONE_BLUETOOTH][PROVIDERS][DORYAB] : Feature Units Description countscans scans Number of scans (rows) from the devices sensed during a time segment instance. The more scans a bluetooth device has the longer it remained within range of the participant\u2019s phone uniquedevices devices Number of unique bluetooth devices sensed during a time segment instance as identified by their hardware addresses ( bt_address ) meanscans scans Mean of the scans of every sensed device within each time segment instance stdscans scans Standard deviation of the scans of every sensed device within each time segment instance countscans most frequentdevice within segments scans Number of scans of the most sensed device within each time segment instance countscans least frequentdevice within segments scans Number of scans of the least sensed device within each time segment instance countscans most frequentdevice across segments scans Number of scans of the most sensed device across time segment instances of the same type countscans least frequentdevice across segments scans Number of scans of the least sensed device across time segment instances of the same type per device countscans most frequentdevice acrossdataset scans Number of scans of the most sensed device across the entire dataset of every participant countscans least frequentdevice acrossdataset scans Number of scans of the least sensed device across the entire dataset of every participant Assumptions/Observations Devices are classified as belonging to the participant ( own ) or to other people ( others ) using k-means based on the number of times and the number of days each device was detected across each participant\u2019s dataset. See Doryab et al for more details. If ownership cannot be computed because all devices were detected on only one day, they are all considered as other . Thus all and other features will be equal. The likelihood of this scenario decreases the more days of data you have. The most and least frequent devices will be the same across time segment instances and across the entire dataset when every time segment instance covers every hour of a dataset. For example, daily segments (00:00 to 23:59) fall in this category but morning segments (06:00am to 11:59am) or periodic 30-minute segments don\u2019t. Example Simplified raw bluetooth data The following is a simplified example with bluetooth data from three days and two time segments: morning and afternoon. There are two own devices: 5C836F5-487E-405F-8E28-21DBD40FA4FF detected seven times across two days and 499A1EAF-DDF1-4657-986C-EA5032104448 detected eight times on a single day. local_date segment bt_address own_device 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 48872A52-68DE-420D-98DA-73339A1C4685 0 2016-11-29 afternoon 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 afternoon 48872A52-68DE-420D-98DA-73339A1C4685 0 2016-11-30 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-30 morning 48872A52-68DE-420D-98DA-73339A1C4685 0 2016-11-30 morning 25262DC7-780C-4AD5-AD3A-D9776AEF7FC1 0 2016-11-30 morning 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2016-11-30 morning 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2016-11-30 afternoon 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2017-05-07 morning 5C5A9C41-2F68-4CEB-96D0-77DE3729B729 0 2017-05-07 morning 25262DC7-780C-4AD5-AD3A-D9776AEF7FC1 0 2017-05-07 morning 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2017-05-07 morning 6C444841-FE64-4375-BC3F-FA410CDC0AC7 0 2017-05-07 morning 4DC7A22D-9F1F-4DEF-8576-086910AABCB5 0 2017-05-07 afternoon 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 The most and least frequent OTHER devices ( own_device == 0 ) during morning segments The most and least frequent ALL | OWN | OTHER devices are computed within each time segment instance, across time segment instances of the same type and across the entire dataset of each person. These are the most and least frequent devices for OTHER devices during morning segments. most frequent device across 2016-11-29 morning: '48872A52-68DE-420D-98DA-73339A1C4685' (this device is the only one in this instance) least frequent device across 2016-11-29 morning: '48872A52-68DE-420D-98DA-73339A1C4685' (this device is the only one in this instance) most frequent device across 2016-11-30 morning: '5B1E6981-2E50-4D9A-99D8-67AED430C5A8' least frequent device across 2016-11-30 morning: '25262DC7-780C-4AD5-AD3A-D9776AEF7FC1' (when tied, the first occurance is chosen) most frequent device across 2017-05-07 morning: '25262DC7-780C-4AD5-AD3A-D9776AEF7FC1' (when tied, the first occurance is chosen) least frequent device across 2017-05-07 morning: '25262DC7-780C-4AD5-AD3A-D9776AEF7FC1' (when tied, the first occurance is chosen) most frequent across morning segments: '5B1E6981-2E50-4D9A-99D8-67AED430C5A8' least frequent across morning segments: '6C444841-FE64-4375-BC3F-FA410CDC0AC7' (when tied, the first occurance is chosen) most frequent across dataset: '499A1EAF-DDF1-4657-986C-EA5032104448' (only taking into account \"morning\" segments) least frequent across dataset: '4DC7A22D-9F1F-4DEF-8576-086910AABCB5' (when tied, the first occurance is chosen) Bluetooth features for OTHER devices and morning segments For brevity we only show the following features for morning segments: OTHER : DEVICES : [ \"countscans\" , \"uniquedevices\" , \"meanscans\" , \"stdscans\" ] SCANS_MOST_FREQUENT_DEVICE : [ \"withinsegments\" , \"acrosssegments\" , \"acrossdataset\" ] Note that countscansmostfrequentdeviceacrossdatasetothers is all 0 s because 499A1EAF-DDF1-4657-986C-EA5032104448 is excluded from the count as is labelled as an own device (not other ). local_segment countscansothers uniquedevicesothers meanscansothers stdscansothers countscansmostfrequentdevicewithinsegmentsothers countscansmostfrequentdeviceacrosssegmentsothers countscansmostfrequentdeviceacrossdatasetothers 2016-11-29-morning 1 1 1.000000 NaN 1 0.0 0.0 2016-11-30-morning 4 3 1.333333 0.57735 2 2.0 2.0 2017-05-07-morning 5 5 1.000000 0.00000 1 1.0 1.0","title":"Phone Bluetooth"},{"location":"features/phone-bluetooth/#phone-bluetooth","text":"Sensor parameters description for [PHONE_BLUETOOTH] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the bluetooth data is stored","title":"Phone Bluetooth"},{"location":"features/phone-bluetooth/#rapids-provider","text":"Warning The features of this provider are deprecated in favor of DORYAB provider (see below). Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_bluetooth_raw.csv - data/raw/ { pid } /phone_bluetooth_with_datetime.csv - data/interim/ { pid } /phone_bluetooth_features/phone_bluetooth_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_bluetooth.csv \" Parameters description for [PHONE_BLUETOOTH][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_BLUETOOTH features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_BLUETOOTH][PROVIDERS][RAPIDS] : Feature Units Description countscans devices Number of scanned devices during a time segment, a device can be detected multiple times over time and these appearances are counted separately uniquedevices devices Number of unique devices during a time segment as identified by their hardware ( bt_address ) address countscansmostuniquedevice scans Number of scans of the most sensed device within each time segment instance Assumptions/Observations From v0.2.0 countscans , uniquedevices , countscansmostuniquedevice were deprecated because they overlap with the respective features for ALL devices of the PHONE_BLUETOOTH DORYAB provider","title":"RAPIDS provider"},{"location":"features/phone-bluetooth/#doryab-provider","text":"This provider is adapted from the work by Doryab et al . Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_bluetooth_raw.csv - data/raw/ { pid } /phone_bluetooth_with_datetime.csv - data/interim/ { pid } /phone_bluetooth_features/phone_bluetooth_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_bluetooth.csv \" Parameters description for [PHONE_BLUETOOTH][PROVIDERS][DORYAB] : Key Description [COMPUTE] Set to True to extract PHONE_BLUETOOTH features from the DORYAB provider [FEATURES] Features to be computed, see table below. These features are computed for three device categories: all devices, own devices and other devices. Features description for [PHONE_BLUETOOTH][PROVIDERS][DORYAB] : Feature Units Description countscans scans Number of scans (rows) from the devices sensed during a time segment instance. The more scans a bluetooth device has the longer it remained within range of the participant\u2019s phone uniquedevices devices Number of unique bluetooth devices sensed during a time segment instance as identified by their hardware addresses ( bt_address ) meanscans scans Mean of the scans of every sensed device within each time segment instance stdscans scans Standard deviation of the scans of every sensed device within each time segment instance countscans most frequentdevice within segments scans Number of scans of the most sensed device within each time segment instance countscans least frequentdevice within segments scans Number of scans of the least sensed device within each time segment instance countscans most frequentdevice across segments scans Number of scans of the most sensed device across time segment instances of the same type countscans least frequentdevice across segments scans Number of scans of the least sensed device across time segment instances of the same type per device countscans most frequentdevice acrossdataset scans Number of scans of the most sensed device across the entire dataset of every participant countscans least frequentdevice acrossdataset scans Number of scans of the least sensed device across the entire dataset of every participant Assumptions/Observations Devices are classified as belonging to the participant ( own ) or to other people ( others ) using k-means based on the number of times and the number of days each device was detected across each participant\u2019s dataset. See Doryab et al for more details. If ownership cannot be computed because all devices were detected on only one day, they are all considered as other . Thus all and other features will be equal. The likelihood of this scenario decreases the more days of data you have. The most and least frequent devices will be the same across time segment instances and across the entire dataset when every time segment instance covers every hour of a dataset. For example, daily segments (00:00 to 23:59) fall in this category but morning segments (06:00am to 11:59am) or periodic 30-minute segments don\u2019t. Example Simplified raw bluetooth data The following is a simplified example with bluetooth data from three days and two time segments: morning and afternoon. There are two own devices: 5C836F5-487E-405F-8E28-21DBD40FA4FF detected seven times across two days and 499A1EAF-DDF1-4657-986C-EA5032104448 detected eight times on a single day. local_date segment bt_address own_device 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 morning 48872A52-68DE-420D-98DA-73339A1C4685 0 2016-11-29 afternoon 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-29 afternoon 48872A52-68DE-420D-98DA-73339A1C4685 0 2016-11-30 morning 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2016-11-30 morning 48872A52-68DE-420D-98DA-73339A1C4685 0 2016-11-30 morning 25262DC7-780C-4AD5-AD3A-D9776AEF7FC1 0 2016-11-30 morning 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2016-11-30 morning 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2016-11-30 afternoon 55C836F5-487E-405F-8E28-21DBD40FA4FF 1 2017-05-07 morning 5C5A9C41-2F68-4CEB-96D0-77DE3729B729 0 2017-05-07 morning 25262DC7-780C-4AD5-AD3A-D9776AEF7FC1 0 2017-05-07 morning 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2017-05-07 morning 6C444841-FE64-4375-BC3F-FA410CDC0AC7 0 2017-05-07 morning 4DC7A22D-9F1F-4DEF-8576-086910AABCB5 0 2017-05-07 afternoon 5B1E6981-2E50-4D9A-99D8-67AED430C5A8 0 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 2017-05-07 afternoon 499A1EAF-DDF1-4657-986C-EA5032104448 1 The most and least frequent OTHER devices ( own_device == 0 ) during morning segments The most and least frequent ALL | OWN | OTHER devices are computed within each time segment instance, across time segment instances of the same type and across the entire dataset of each person. These are the most and least frequent devices for OTHER devices during morning segments. most frequent device across 2016-11-29 morning: '48872A52-68DE-420D-98DA-73339A1C4685' (this device is the only one in this instance) least frequent device across 2016-11-29 morning: '48872A52-68DE-420D-98DA-73339A1C4685' (this device is the only one in this instance) most frequent device across 2016-11-30 morning: '5B1E6981-2E50-4D9A-99D8-67AED430C5A8' least frequent device across 2016-11-30 morning: '25262DC7-780C-4AD5-AD3A-D9776AEF7FC1' (when tied, the first occurance is chosen) most frequent device across 2017-05-07 morning: '25262DC7-780C-4AD5-AD3A-D9776AEF7FC1' (when tied, the first occurance is chosen) least frequent device across 2017-05-07 morning: '25262DC7-780C-4AD5-AD3A-D9776AEF7FC1' (when tied, the first occurance is chosen) most frequent across morning segments: '5B1E6981-2E50-4D9A-99D8-67AED430C5A8' least frequent across morning segments: '6C444841-FE64-4375-BC3F-FA410CDC0AC7' (when tied, the first occurance is chosen) most frequent across dataset: '499A1EAF-DDF1-4657-986C-EA5032104448' (only taking into account \"morning\" segments) least frequent across dataset: '4DC7A22D-9F1F-4DEF-8576-086910AABCB5' (when tied, the first occurance is chosen) Bluetooth features for OTHER devices and morning segments For brevity we only show the following features for morning segments: OTHER : DEVICES : [ \"countscans\" , \"uniquedevices\" , \"meanscans\" , \"stdscans\" ] SCANS_MOST_FREQUENT_DEVICE : [ \"withinsegments\" , \"acrosssegments\" , \"acrossdataset\" ] Note that countscansmostfrequentdeviceacrossdatasetothers is all 0 s because 499A1EAF-DDF1-4657-986C-EA5032104448 is excluded from the count as is labelled as an own device (not other ). local_segment countscansothers uniquedevicesothers meanscansothers stdscansothers countscansmostfrequentdevicewithinsegmentsothers countscansmostfrequentdeviceacrosssegmentsothers countscansmostfrequentdeviceacrossdatasetothers 2016-11-29-morning 1 1 1.000000 NaN 1 0.0 0.0 2016-11-30-morning 4 3 1.333333 0.57735 2 2.0 2.0 2017-05-07-morning 5 5 1.000000 0.00000 1 1.0 1.0","title":"DORYAB provider"},{"location":"features/phone-calls/","text":"Phone Calls \u00b6 Sensor parameters description for [PHONE_CALLS] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the calls data is stored RAPIDS Provider \u00b6 Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_calls_raw.csv - data/raw/ { pid } /phone_calls_with_datetime.csv - data/interim/ { pid } /phone_calls_features/phone_calls_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_calls.csv Parameters description for [PHONE_CALLS][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_CALLS features from the RAPIDS provider [CALL_TYPES] The particular call_type that will be analyzed. The options for this parameter are incoming, outgoing or missed. [FEATURES] Features to be computed for outgoing , incoming , and missed calls. Note that the same features are available for both incoming and outgoing calls, while missed calls has its own set of features. See the tables below. Features description for [PHONE_CALLS][PROVIDERS][RAPIDS] incoming and outgoing calls: Feature Units Description count calls Number of calls of a particular call_type occurred during a particular time_segment . distinctcontacts contacts Number of distinct contacts that are associated with a particular call_type for a particular time_segment meanduration seconds The mean duration of all calls of a particular call_type during a particular time_segment . sumduration seconds The sum of the duration of all calls of a particular call_type during a particular time_segment . minduration seconds The duration of the shortest call of a particular call_type during a particular time_segment . maxduration seconds The duration of the longest call of a particular call_type during a particular time_segment . stdduration seconds The standard deviation of the duration of all the calls of a particular call_type during a particular time_segment . modeduration seconds The mode of the duration of all the calls of a particular call_type during a particular time_segment . entropyduration nats The estimate of the Shannon entropy for the the duration of all the calls of a particular call_type during a particular time_segment . timefirstcall minutes The time in minutes between 12:00am (midnight) and the first call of call_type . timelastcall minutes The time in minutes between 12:00am (midnight) and the last call of call_type . countmostfrequentcontact calls The number of calls of a particular call_type during a particular time_segment of the most frequent contact throughout the monitored period. Features description for [PHONE_CALLS][PROVIDERS][RAPIDS] missed calls: Feature Units Description count calls Number of missed calls that occurred during a particular time_segment . distinctcontacts contacts Number of distinct contacts that are associated with missed calls for a particular time_segment timefirstcall minutes The time in hours from 12:00am (Midnight) that the first missed call occurred. timelastcall minutes The time in hours from 12:00am (Midnight) that the last missed call occurred. countmostfrequentcontact calls The number of missed calls during a particular time_segment of the most frequent contact throughout the monitored period. Assumptions/Observations Traces for iOS calls are unique even for the same contact calling a participant more than once which renders countmostfrequentcontact meaningless and distinctcontacts equal to the total number of traces. [CALL_TYPES] and [FEATURES] keys in config.yaml need to match. For example, [CALL_TYPES] outgoing matches the [FEATURES] key outgoing iOS calls data is transformed to match Android calls data format. See our algorithm","title":"Phone Calls"},{"location":"features/phone-calls/#phone-calls","text":"Sensor parameters description for [PHONE_CALLS] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the calls data is stored","title":"Phone Calls"},{"location":"features/phone-calls/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_calls_raw.csv - data/raw/ { pid } /phone_calls_with_datetime.csv - data/interim/ { pid } /phone_calls_features/phone_calls_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_calls.csv Parameters description for [PHONE_CALLS][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_CALLS features from the RAPIDS provider [CALL_TYPES] The particular call_type that will be analyzed. The options for this parameter are incoming, outgoing or missed. [FEATURES] Features to be computed for outgoing , incoming , and missed calls. Note that the same features are available for both incoming and outgoing calls, while missed calls has its own set of features. See the tables below. Features description for [PHONE_CALLS][PROVIDERS][RAPIDS] incoming and outgoing calls: Feature Units Description count calls Number of calls of a particular call_type occurred during a particular time_segment . distinctcontacts contacts Number of distinct contacts that are associated with a particular call_type for a particular time_segment meanduration seconds The mean duration of all calls of a particular call_type during a particular time_segment . sumduration seconds The sum of the duration of all calls of a particular call_type during a particular time_segment . minduration seconds The duration of the shortest call of a particular call_type during a particular time_segment . maxduration seconds The duration of the longest call of a particular call_type during a particular time_segment . stdduration seconds The standard deviation of the duration of all the calls of a particular call_type during a particular time_segment . modeduration seconds The mode of the duration of all the calls of a particular call_type during a particular time_segment . entropyduration nats The estimate of the Shannon entropy for the the duration of all the calls of a particular call_type during a particular time_segment . timefirstcall minutes The time in minutes between 12:00am (midnight) and the first call of call_type . timelastcall minutes The time in minutes between 12:00am (midnight) and the last call of call_type . countmostfrequentcontact calls The number of calls of a particular call_type during a particular time_segment of the most frequent contact throughout the monitored period. Features description for [PHONE_CALLS][PROVIDERS][RAPIDS] missed calls: Feature Units Description count calls Number of missed calls that occurred during a particular time_segment . distinctcontacts contacts Number of distinct contacts that are associated with missed calls for a particular time_segment timefirstcall minutes The time in hours from 12:00am (Midnight) that the first missed call occurred. timelastcall minutes The time in hours from 12:00am (Midnight) that the last missed call occurred. countmostfrequentcontact calls The number of missed calls during a particular time_segment of the most frequent contact throughout the monitored period. Assumptions/Observations Traces for iOS calls are unique even for the same contact calling a participant more than once which renders countmostfrequentcontact meaningless and distinctcontacts equal to the total number of traces. [CALL_TYPES] and [FEATURES] keys in config.yaml need to match. For example, [CALL_TYPES] outgoing matches the [FEATURES] key outgoing iOS calls data is transformed to match Android calls data format. See our algorithm","title":"RAPIDS Provider"},{"location":"features/phone-conversation/","text":"Phone Conversation \u00b6 Sensor parameters description for [PHONE_CONVERSATION] : Key Description [CONTAINER][ANDROID] Data stream container (database table, CSV file, etc.) where the conversation data from Android devices is stored (the AWARE client saves this data on different tables for Android and iOS) [CONTAINER][IOS] Data stream container (database table, CSV file, etc.) where the conversation data from iOS devices is stored (the AWARE client saves this data on different tables for Android and iOS) RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_conversation_raw.csv - data/raw/ { pid } /phone_conversation_with_datetime.csv - data/interim/ { pid } /phone_conversation_features/phone_conversation_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_conversation.csv Parameters description for [PHONE_CONVERSATION][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_CONVERSATION features from the RAPIDS provider [FEATURES] Features to be computed, see table below [RECORDING_MINUTES] Minutes the plugin was recording audio (default 1 min) [PAUSED_MINUTES] Minutes the plugin was NOT recording audio (default 3 min) Features description for [PHONE_CONVERSATION][PROVIDERS][RAPIDS] : Feature Units Description minutessilence minutes Minutes labeled as silence minutesnoise minutes Minutes labeled as noise minutesvoice minutes Minutes labeled as voice minutesunknown minutes Minutes labeled as unknown sumconversationduration minutes Total duration of all conversations maxconversationduration minutes Longest duration of all conversations minconversationduration minutes Shortest duration of all conversations avgconversationduration minutes Average duration of all conversations sdconversationduration minutes Standard Deviation of the duration of all conversations timefirstconversation minutes Minutes since midnight when the first conversation for a time segment was detected timelastconversation minutes Minutes since midnight when the last conversation for a time segment was detected noisesumenergy L2-norm Sum of all energy values when inference is noise noiseavgenergy L2-norm Average of all energy values when inference is noise noisesdenergy L2-norm Standard Deviation of all energy values when inference is noise noiseminenergy L2-norm Minimum of all energy values when inference is noise noisemaxenergy L2-norm Maximum of all energy values when inference is noise voicesumenergy L2-norm Sum of all energy values when inference is voice voiceavgenergy L2-norm Average of all energy values when inference is voice voicesdenergy L2-norm Standard Deviation of all energy values when inference is voice voiceminenergy L2-norm Minimum of all energy values when inference is voice voicemaxenergy L2-norm Maximum of all energy values when inference is voice silencesensedfraction - Ratio between minutessilence and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) noisesensedfraction - Ratio between minutesnoise and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) voicesensedfraction - Ratio between minutesvoice and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) unknownsensedfraction - Ratio between minutesunknown and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) silenceexpectedfraction - Ration between minutessilence and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) noiseexpectedfraction - Ration between minutesnoise and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) voiceexpectedfraction - Ration between minutesvoice and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) unknownexpectedfraction - Ration between minutesunknown and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) Assumptions/Observations The timestamp of conversation rows in iOS is in seconds so we convert it to milliseconds to match Android\u2019s format","title":"Phone Conversation"},{"location":"features/phone-conversation/#phone-conversation","text":"Sensor parameters description for [PHONE_CONVERSATION] : Key Description [CONTAINER][ANDROID] Data stream container (database table, CSV file, etc.) where the conversation data from Android devices is stored (the AWARE client saves this data on different tables for Android and iOS) [CONTAINER][IOS] Data stream container (database table, CSV file, etc.) where the conversation data from iOS devices is stored (the AWARE client saves this data on different tables for Android and iOS)","title":"Phone Conversation"},{"location":"features/phone-conversation/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_conversation_raw.csv - data/raw/ { pid } /phone_conversation_with_datetime.csv - data/interim/ { pid } /phone_conversation_features/phone_conversation_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_conversation.csv Parameters description for [PHONE_CONVERSATION][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_CONVERSATION features from the RAPIDS provider [FEATURES] Features to be computed, see table below [RECORDING_MINUTES] Minutes the plugin was recording audio (default 1 min) [PAUSED_MINUTES] Minutes the plugin was NOT recording audio (default 3 min) Features description for [PHONE_CONVERSATION][PROVIDERS][RAPIDS] : Feature Units Description minutessilence minutes Minutes labeled as silence minutesnoise minutes Minutes labeled as noise minutesvoice minutes Minutes labeled as voice minutesunknown minutes Minutes labeled as unknown sumconversationduration minutes Total duration of all conversations maxconversationduration minutes Longest duration of all conversations minconversationduration minutes Shortest duration of all conversations avgconversationduration minutes Average duration of all conversations sdconversationduration minutes Standard Deviation of the duration of all conversations timefirstconversation minutes Minutes since midnight when the first conversation for a time segment was detected timelastconversation minutes Minutes since midnight when the last conversation for a time segment was detected noisesumenergy L2-norm Sum of all energy values when inference is noise noiseavgenergy L2-norm Average of all energy values when inference is noise noisesdenergy L2-norm Standard Deviation of all energy values when inference is noise noiseminenergy L2-norm Minimum of all energy values when inference is noise noisemaxenergy L2-norm Maximum of all energy values when inference is noise voicesumenergy L2-norm Sum of all energy values when inference is voice voiceavgenergy L2-norm Average of all energy values when inference is voice voicesdenergy L2-norm Standard Deviation of all energy values when inference is voice voiceminenergy L2-norm Minimum of all energy values when inference is voice voicemaxenergy L2-norm Maximum of all energy values when inference is voice silencesensedfraction - Ratio between minutessilence and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) noisesensedfraction - Ratio between minutesnoise and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) voicesensedfraction - Ratio between minutesvoice and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) unknownsensedfraction - Ratio between minutesunknown and the sum of (minutessilence, minutesnoise, minutesvoice, minutesunknown) silenceexpectedfraction - Ration between minutessilence and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) noiseexpectedfraction - Ration between minutesnoise and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) voiceexpectedfraction - Ration between minutesvoice and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) unknownexpectedfraction - Ration between minutesunknown and the number of minutes that in theory should have been sensed based on the record and pause cycle of the plugin (1440 / recordingMinutes+pausedMinutes) Assumptions/Observations The timestamp of conversation rows in iOS is in seconds so we convert it to milliseconds to match Android\u2019s format","title":"RAPIDS provider"},{"location":"features/phone-data-yield/","text":"Phone Data Yield \u00b6 This is a combinatorial sensor which means that we use the data from multiple sensors to extract data yield features. Data yield features can be used to remove rows ( time segments ) that do not contain enough data. You should decide what is your \u201cenough\u201d threshold depending on the type of sensors you collected (frequency vs event based, e.g. acceleroemter vs calls), the length of your study, and the rates of missing data that your analysis could handle. Why is data yield important? Imagine that you want to extract PHONE_CALL features on daily segments ( 00:00 to 23:59 ). Let\u2019s say that on day 1 the phone logged 10 calls and 23 hours of data from other sensors and on day 2 the phone logged 10 calls and only 2 hours of data from other sensors. It\u2019s more likely that other calls were placed on the 22 hours of data that you didn\u2019t log on day 2 than on the 1 hour of data you didn\u2019t log on day 1, and so including day 2 in your analysis could bias your results. Sensor parameters description for [PHONE_DATA_YIELD] : Key Description [SENSORS] One or more phone sensor config keys (e.g. PHONE_MESSAGE ). The more keys you include the more accurately RAPIDS can approximate the time an smartphone was sensing data. The supported phone sensors you can include in this list are outlined below ( do NOT include Fitbit sensors, ONLY include phone sensors ). Supported phone sensors for [PHONE_DATA_YIELD][SENSORS] PHONE_ACCELEROMETER PHONE_ACTIVITY_RECOGNITION PHONE_APPLICATIONS_CRASHES PHONE_APPLICATIONS_FOREGROUND PHONE_APPLICATIONS_NOTIFICATIONS PHONE_BATTERY PHONE_BLUETOOTH PHONE_CALLS PHONE_CONVERSATION PHONE_KEYBOARD PHONE_LIGHT PHONE_LOCATIONS PHONE_LOG PHONE_MESSAGES PHONE_SCREEN PHONE_WIFI_CONNECTED PHONE_WIFI_VISIBLE RAPIDS provider \u00b6 Before explaining the data yield features, let\u2019s define the following relevant concepts: A valid minute is any 60 second window when any phone sensor logged at least 1 row of data A valid hour is any 60 minute window with at least X valid minutes. The X or threshold is given by [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] The timestamps of all sensors are concatenated and then grouped per time segment. Minute and hour windows are created from the beginning of each time segment instance and these windows are marked as valid based on the definitions above. The duration of each time segment is taken into account to compute the features described below. Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } / { sensor } _raw.csv # one for every [PHONE_DATA_YIELD][SENSORS] - data/interim/ { pid } /phone_yielded_timestamps.csv - data/interim/ { pid } /phone_yielded_timestamps_with_datetime.csv - data/interim/ { pid } /phone_data_yield_features/phone_data_yield_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_data_yield.csv Parameters description for [PHONE_DATA_YIELD][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_DATA_YIELD features from the RAPIDS provider [FEATURES] Features to be computed, see table below [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] The proportion [0.0 ,1.0] of valid minutes in a 60-minute window necessary to flag that window as valid. Features description for [PHONE_DATA_YIELD][PROVIDERS][RAPIDS] : Feature Units Description ratiovalidyieldedminutes - The ratio between the number of valid minutes and the duration in minutes of a time segment. ratiovalidyieldedhours - The ratio between the number of valid hours and the duration in hours of a time segment. If the time segment is shorter than 1 hour this feature will always be 1. Assumptions/Observations We recommend using ratiovalidyieldedminutes on time segments that are shorter than two or three hours and ratiovalidyieldedhours for longer segments. This is because relying on yielded minutes only can be misleading when a big chunk of those missing minutes are clustered together. For example, let\u2019s assume we are working with a 24-hour time segment that is missing 12 hours of data. Two extreme cases can occur: the 12 missing hours are from the beginning of the segment or 30 minutes could be missing from every hour (24 * 30 minutes = 12 hours). ratiovalidyieldedminutes would be 0.5 for both a and b (hinting the missing circumstances are similar). However, ratiovalidyieldedhours would be 0.5 for a and 1.0 for b if [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] is between [0.0 and 0.49] (hinting that the missing circumstances might be more favorable for b . In other words, sensed data for b is more evenly spread compared to a .","title":"Phone Data Yield"},{"location":"features/phone-data-yield/#phone-data-yield","text":"This is a combinatorial sensor which means that we use the data from multiple sensors to extract data yield features. Data yield features can be used to remove rows ( time segments ) that do not contain enough data. You should decide what is your \u201cenough\u201d threshold depending on the type of sensors you collected (frequency vs event based, e.g. acceleroemter vs calls), the length of your study, and the rates of missing data that your analysis could handle. Why is data yield important? Imagine that you want to extract PHONE_CALL features on daily segments ( 00:00 to 23:59 ). Let\u2019s say that on day 1 the phone logged 10 calls and 23 hours of data from other sensors and on day 2 the phone logged 10 calls and only 2 hours of data from other sensors. It\u2019s more likely that other calls were placed on the 22 hours of data that you didn\u2019t log on day 2 than on the 1 hour of data you didn\u2019t log on day 1, and so including day 2 in your analysis could bias your results. Sensor parameters description for [PHONE_DATA_YIELD] : Key Description [SENSORS] One or more phone sensor config keys (e.g. PHONE_MESSAGE ). The more keys you include the more accurately RAPIDS can approximate the time an smartphone was sensing data. The supported phone sensors you can include in this list are outlined below ( do NOT include Fitbit sensors, ONLY include phone sensors ). Supported phone sensors for [PHONE_DATA_YIELD][SENSORS] PHONE_ACCELEROMETER PHONE_ACTIVITY_RECOGNITION PHONE_APPLICATIONS_CRASHES PHONE_APPLICATIONS_FOREGROUND PHONE_APPLICATIONS_NOTIFICATIONS PHONE_BATTERY PHONE_BLUETOOTH PHONE_CALLS PHONE_CONVERSATION PHONE_KEYBOARD PHONE_LIGHT PHONE_LOCATIONS PHONE_LOG PHONE_MESSAGES PHONE_SCREEN PHONE_WIFI_CONNECTED PHONE_WIFI_VISIBLE","title":"Phone Data Yield"},{"location":"features/phone-data-yield/#rapids-provider","text":"Before explaining the data yield features, let\u2019s define the following relevant concepts: A valid minute is any 60 second window when any phone sensor logged at least 1 row of data A valid hour is any 60 minute window with at least X valid minutes. The X or threshold is given by [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] The timestamps of all sensors are concatenated and then grouped per time segment. Minute and hour windows are created from the beginning of each time segment instance and these windows are marked as valid based on the definitions above. The duration of each time segment is taken into account to compute the features described below. Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } / { sensor } _raw.csv # one for every [PHONE_DATA_YIELD][SENSORS] - data/interim/ { pid } /phone_yielded_timestamps.csv - data/interim/ { pid } /phone_yielded_timestamps_with_datetime.csv - data/interim/ { pid } /phone_data_yield_features/phone_data_yield_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_data_yield.csv Parameters description for [PHONE_DATA_YIELD][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_DATA_YIELD features from the RAPIDS provider [FEATURES] Features to be computed, see table below [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] The proportion [0.0 ,1.0] of valid minutes in a 60-minute window necessary to flag that window as valid. Features description for [PHONE_DATA_YIELD][PROVIDERS][RAPIDS] : Feature Units Description ratiovalidyieldedminutes - The ratio between the number of valid minutes and the duration in minutes of a time segment. ratiovalidyieldedhours - The ratio between the number of valid hours and the duration in hours of a time segment. If the time segment is shorter than 1 hour this feature will always be 1. Assumptions/Observations We recommend using ratiovalidyieldedminutes on time segments that are shorter than two or three hours and ratiovalidyieldedhours for longer segments. This is because relying on yielded minutes only can be misleading when a big chunk of those missing minutes are clustered together. For example, let\u2019s assume we are working with a 24-hour time segment that is missing 12 hours of data. Two extreme cases can occur: the 12 missing hours are from the beginning of the segment or 30 minutes could be missing from every hour (24 * 30 minutes = 12 hours). ratiovalidyieldedminutes would be 0.5 for both a and b (hinting the missing circumstances are similar). However, ratiovalidyieldedhours would be 0.5 for a and 1.0 for b if [MINUTE_RATIO_THRESHOLD_FOR_VALID_YIELDED_HOURS] is between [0.0 and 0.49] (hinting that the missing circumstances might be more favorable for b . In other words, sensed data for b is more evenly spread compared to a .","title":"RAPIDS provider"},{"location":"features/phone-keyboard/","text":"Phone Keyboard \u00b6 Sensor parameters description for [PHONE_KEYBOARD] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the keyboard data is stored Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_KEYBOARD ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Keyboard"},{"location":"features/phone-keyboard/#phone-keyboard","text":"Sensor parameters description for [PHONE_KEYBOARD] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the keyboard data is stored Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_KEYBOARD ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Keyboard"},{"location":"features/phone-light/","text":"Phone Light \u00b6 Sensor parameters description for [PHONE_LIGHT] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the light data is stored RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_light_raw.csv - data/raw/ { pid } /phone_light_with_datetime.csv - data/interim/ { pid } /phone_light_features/phone_light_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_light.csv Parameters description for [PHONE_LIGHT][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_LIGHT features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_LIGHT][PROVIDERS][RAPIDS] : Feature Units Description count rows Number light sensor rows recorded. maxlux lux The maximum ambient luminance. minlux lux The minimum ambient luminance. avglux lux The average ambient luminance. medianlux lux The median ambient luminance. stdlux lux The standard deviation of ambient luminance. Assumptions/Observations NA","title":"Phone Light"},{"location":"features/phone-light/#phone-light","text":"Sensor parameters description for [PHONE_LIGHT] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the light data is stored","title":"Phone Light"},{"location":"features/phone-light/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_light_raw.csv - data/raw/ { pid } /phone_light_with_datetime.csv - data/interim/ { pid } /phone_light_features/phone_light_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_light.csv Parameters description for [PHONE_LIGHT][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_LIGHT features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_LIGHT][PROVIDERS][RAPIDS] : Feature Units Description count rows Number light sensor rows recorded. maxlux lux The maximum ambient luminance. minlux lux The minimum ambient luminance. avglux lux The average ambient luminance. medianlux lux The median ambient luminance. stdlux lux The standard deviation of ambient luminance. Assumptions/Observations NA","title":"RAPIDS provider"},{"location":"features/phone-locations/","text":"Phone Locations \u00b6 Sensor parameters description for [PHONE_LOCATIONS] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the location data is stored [LOCATIONS_TO_USE] Type of location data to use, one of ALL , GPS , ALL_RESAMPLED or FUSED_RESAMPLED . This filter is based on the provider column of the locations table, ALL includes every row, GPS only includes rows where the provider is gps, ALL_RESAMPLED includes all rows after being resampled, and FUSED_RESAMPLED only includes rows where the provider is fused after being resampled. [FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD] if ALL_RESAMPLED or FUSED_RESAMPLED is used, the original fused data has to be resampled, a location row is resampled to the next valid timestamp (see the Assumptions/Observations below) only if the time difference between them is less or equal than this threshold (in minutes). [FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION] if ALL_RESAMPLED or FUSED_RESAMPLED is used, the original fused data has to be resampled, a location row is resampled at most for this long (in minutes) Assumptions/Observations Types of location data to use Android and iOS clients can collect location coordinates through the phone\u2019s GPS, the network cellular towers around the phone, or Google\u2019s fused location API. If you want to use only the GPS provider, set [LOCATIONS_TO_USE] to GPS If you want to use all providers, set [LOCATIONS_TO_USE] to ALL If you collected location data from different providers, including the fused API, use ALL_RESAMPLED If your mobile client was configured to use fused location only or want to focus only on this provider, set [LOCATIONS_TO_USE] to RESAMPLE_FUSED . ALL_RESAMPLED and RESAMPLE_FUSED take the original location coordinates and replicate each pair forward in time as long as the phone was sensing data as indicated by the joined timestamps of [PHONE_DATA_YIELD][SENSORS] . This is done because Google\u2019s API only logs a new location coordinate pair when it is sufficiently different in time or space from the previous one and because GPS and network providers can log data at variable rates. There are two parameters associated with resampling fused location. FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD (in minutes, default 30) controls the maximum gap between any two coordinate pairs to replicate the last known pair. For example, participant A\u2019s phone did not collect data between 10.30 am and 10:50 am and between 11:05am and 11:40am, the last known coordinate pair is replicated during the first period but not the second. In other words, we assume that we cannot longer guarantee the participant stayed at the last known location if the phone did not sense data for more than 30 minutes. FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION (in minutes, default 720 or 12 hours) stops the last known fused location from being replicated longer than this threshold even if the phone was sensing data continuously. For example, participant A went home at 9 pm, and their phone was sensing data without gaps until 11 am the next morning, the last known location is replicated until 9 am. If you have suggestions to modify or improve this resampling, let us know. BARNETT provider \u00b6 These features are based on the original open-source implementation by Barnett et al and some features created by Canzian et al . Available time segments and platforms Available only for segments that start at 00:00:00 and end at 23:59:59 of the same or a different day (daily, weekly, weekend, etc.) Available for Android and iOS File Sequence - data/raw/ { pid } /phone_locations_raw.csv - data/interim/ { pid } /phone_locations_processed.csv - data/interim/ { pid } /phone_locations_processed_with_datetime.csv - data/interim/ { pid } /phone_locations_features/phone_locations_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_locations.csv Parameters description for [PHONE_LOCATIONS][PROVIDERS][BARNETT] : Key Description [COMPUTE] Set to True to extract PHONE_LOCATIONS features from the BARNETT provider [FEATURES] Features to be computed, see table below [ACCURACY_LIMIT] An integer in meters, any location rows with an accuracy higher than this is dropped. This number means there\u2019s a 68% probability the actual location is within this radius [IF_MULTIPLE_TIMEZONES] Currently, USE_MOST_COMMON is the only value supported. If the location data for a participant belongs to multiple time zones, we select the most common because Barnett\u2019s algorithm can only handle one time zone [MINUTES_DATA_USED] Set to True to include an extra column in the final location feature file containing the number of minutes used to compute the features on each time segment. Use this for quality control purposes; the more data minutes exist for a period, the more reliable its features should be. For fused location, a single minute can contain more than one coordinate pair if the participant is moving fast enough. Features description for [PHONE_LOCATIONS][PROVIDERS][BARNETT] adapted from Beiwe Summary Statistics : Feature Units Description hometime minutes Time at home. Time spent at home in minutes. Home is the most visited significant location between 8 pm and 8 am, including any pauses within a 200-meter radius. disttravelled meters Total distance traveled over a day (flights). rog meters The Radius of Gyration (rog) is a measure in meters of the area covered by a person over a day. A centroid is calculated for all the places (pauses) visited during a day, and a weighted distance between all the places and that centroid is computed. The weights are proportional to the time spent in each place. maxdiam meters The maximum diameter is the largest distance between any two pauses. maxhomedist meters The maximum distance from home in meters. siglocsvisited locations The number of significant locations visited during the day. Significant locations are computed using k-means clustering over pauses found in the whole monitoring period. The number of clusters is found iterating k from 1 to 200 stopping until the centroids of two significant locations are within 400 meters of one another. avgflightlen meters Mean length of all flights. stdflightlen meters Standard deviation of the length of all flights. avgflightdur seconds Mean duration of all flights. stdflightdur seconds The standard deviation of the duration of all flights. probpause - The fraction of a day spent in a pause (as opposed to a flight) siglocentropy nats Shannon\u2019s entropy measurement is based on the proportion of time spent at each significant location visited during a day. circdnrtn - A continuous metric quantifying a person\u2019s circadian routine that can take any value between 0 and 1, where 0 represents a daily routine completely different from any other sensed days and 1 a routine the same as every other sensed day. wkenddayrtn - Same as circdnrtn but computed separately for weekends and weekdays. Assumptions/Observations Multi day segment features Barnett\u2019s features are only available on time segments that span entire days (00:00:00 to 23:59:59). Such segments can be one-day long (daily) or multi-day (weekly, for example). Multi-day segment features are computed based on daily features summarized the following way: sum for hometime , disttravelled , siglocsvisited , and minutes_data_used max for maxdiam , and maxhomedist mean for rog , avgflightlen , stdflightlen , avgflightdur , stdflightdur , probpause , siglocentropy , circdnrtn , wkenddayrtn , and minsmissing Computation speed The process to extract these features can be slow compared to other sensors and providers due to the required simulation. How are these features computed? These features are based on a Pause-Flight model. A pause is defined as a mobility trace (location pings) within a certain duration and distance (by default, 300 seconds and 60 meters). A flight is any mobility trace between two pauses. Data is resampled and imputed before the features are computed. See Barnett et al for more information. In RAPIDS, we only expose one parameter for these features (accuracy limit). You can change other parameters in src/features/phone_locations/barnett/library/MobilityFeatures.R . Significant Locations Significant locations are determined using K-means clustering on pauses longer than 10 minutes. The number of clusters (K) is increased until no two clusters are within 400 meters from each other. After this, pauses within a certain range of a cluster (200 meters by default) count as a visit to that significant location. This description was adapted from the Supplementary Materials of Barnett et al . The Circadian Calculation For a detailed description of how this is calculated, see Canzian et al . DORYAB provider \u00b6 These features are based on the original implementation by Doryab et al. . Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_locations_raw.csv - data/interim/ { pid } /phone_locations_processed.csv - data/interim/ { pid } /phone_locations_processed_with_datetime.csv - data/interim/ { pid } /phone_locations_processed_with_datetime_with_home.csv - data/interim/ { pid } /phone_locations_features/phone_locations_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_locations.csv Parameters description for [PHONE_LOCATIONS][PROVIDERS][DORYAB] : Key Description [COMPUTE] Set to True to extract PHONE_LOCATIONS features from the BARNETT provider [FEATURES] Features to be computed, see table below [ACCURACY_LIMIT] An integer in meters, any location rows with an accuracy higher than this will be dropped. This number means there\u2019s a 68% probability the true location is within this radius [DBSCAN_EPS] The maximum distance in meters between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function. [DBSCAN_MINSAMPLES] The number of samples (or total weight) in a neighborhood for a point to be considered as a core point of a cluster. This includes the point itself. [THRESHOLD_STATIC] It is the threshold value in km/hr which labels a row as Static or Moving. [MAXIMUM_ROW_GAP] The maximum gap (in seconds) allowed between any two consecutive rows for them to be considered part of the same displacement. If this threshold is too high, it can throw speed and distance calculations off for periods when the phone was not sensing. [MAXIMUM_ROW_DURATION] The time difference between any two consecutive rows A and B is considered as the time a participant spent in A . If this difference is bigger than MAXIMUM_ROW_GAP we substitute it with MAXIMUM_ROW_DURATION . [MINUTES_DATA_USED] Set to True to include an extra column in the final location feature file containing the number of minutes used to compute the features on each time segment. Use this for quality control purposes; the more data minutes exist for a period, the more reliable its features should be. For fused location, a single minute can contain more than one coordinate pair if the participant is moving fast enough. [SAMPLING_FREQUENCY] Expected time difference between any two location rows in minutes. If set to 0 , the sampling frequency will be inferred automatically as the median of all the differences between two consecutive row timestamps (recommended if you are using FUSED_RESAMPLED data). This parameter impacts all the time calculations. [CLUSTER_ON] Set this flag to PARTICIPANT_DATASET to create clusters based on the entire participant\u2019s dataset or to TIME_SEGMENT to create clusters based on all the instances of the corresponding time segment (e.g. all mornings). [CLUSTERING_ALGORITHM] The original Doryab et al. implementation uses DBSCAN , OPTICS is also available with similar (but not identical) clustering results and lower memory consumption. [RADIUS_FOR_HOME] All location coordinates within this distance (meters) from the home location coordinates are considered a homestay (see timeathome feature). Features description for [PHONE_LOCATIONS][PROVIDERS][DORYAB] : Feature Units Description locationvariance \\(meters^2\\) The sum of the variances of the latitude and longitude columns. loglocationvariance - Log of the sum of the variances of the latitude and longitude columns. totaldistance meters Total distance traveled in a time segment using the haversine formula. averagespeed km/hr Average speed in a time segment considering only the instances labeled as Moving. varspeed km/hr Speed variance in a time segment considering only the instances labeled as Moving. circadianmovement - Not suggested for use now; see Observations below. \u201cIt encodes the extent to which a person\u2019s location patterns follow a 24-hour circadian cycle.\" Doryab et al. . numberofsignificantplaces places Number of significant locations visited. It is calculated using the DBSCAN/OPTICS clustering algorithm which takes in EPS and MIN_SAMPLES as parameters to identify clusters. Each cluster is a significant place. numberlocationtransitions transitions Number of movements between any two clusters in a time segment. radiusgyration meters Quantifies the area covered by a participant timeattop1location minutes Time spent at the most significant location. timeattop2location minutes Time spent at the 2 nd most significant location. timeattop3location minutes Time spent at the 3 rd most significant location. movingtostaticratio - Ratio between stationary time and total location sensed time. A lat/long coordinate pair is labeled as stationary if its speed (distance/time) to the next coordinate pair is less than 1km/hr. A higher value represents a more stationary routine. These times are computed using timeInSeconds feature. outlierstimepercent - Ratio between the time spent in non-significant clusters divided by the time spent in all clusters (total location sensed time). A higher value represents more time spent in non-significant clusters. These times are computed using timeInSeconds feature. maxlengthstayatclusters minutes Maximum time spent in a cluster (significant location). minlengthstayatclusters minutes Minimum time spent in a cluster (significant location). meanlengthstayatclusters minutes Average time spent in a cluster (significant location). stdlengthstayatclusters minutes Standard deviation of time spent in a cluster (significant location). locationentropy nats Shannon Entropy computed over the row count of each cluster (significant location), it is higher the more rows belong to a cluster (i.e., the more time a participant spent at a significant location). normalizedlocationentropy nats Shannon Entropy computed over the row count of each cluster (significant location) divided by the number of clusters; it is higher the more rows belong to a cluster (i.e., the more time a participant spent at a significant location). timeathome minutes Time spent at home (see Observations below for a description on how we compute home). Assumptions/Observations Significant Locations Identified Significant locations are determined using DBSCAN clustering on locations that a patient visit over the course of the period of data collection. Circadian Movement Calculation Note Feb 3 2021. It seems the implementation of this feature is not correct; we suggest not to use this feature until a fix is in place. For a detailed description of how this should be calculated, see Saeb et al . Fine-Tuning Clustering Parameters Based on an experiment where we collected fused location data for 7 days with a mean accuracy of 86 & SD of 350.874635, we determined that EPS/MAX_EPS =100 produced closer clustering results to reality. Higher values (>100) missed out on some significant places, like a short grocery visit, while lower values (<100) picked up traffic lights and stop signs while driving as significant locations. We recommend you set EPS based on your location data\u2019s accuracy (the more accurate your data is, the lower you should be able to set EPS). Duration Calculation To calculate the time duration component for our features, we compute the difference between consecutive rows\u2019 timestamps to take into account sampling rate variability. If this time difference is larger than a threshold (300 seconds by default), we replace it with a maximum duration (60 seconds by default, i.e., we assume a participant spent at least 60 seconds in their last known location) Home location Home is calculated using all location data of a participant between 12 am and 6 am, then applying a clustering algorithm ( DB_SCAN or OPTICS ) and considering the center of the biggest cluster home for that participant.","title":"Phone Locations"},{"location":"features/phone-locations/#phone-locations","text":"Sensor parameters description for [PHONE_LOCATIONS] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the location data is stored [LOCATIONS_TO_USE] Type of location data to use, one of ALL , GPS , ALL_RESAMPLED or FUSED_RESAMPLED . This filter is based on the provider column of the locations table, ALL includes every row, GPS only includes rows where the provider is gps, ALL_RESAMPLED includes all rows after being resampled, and FUSED_RESAMPLED only includes rows where the provider is fused after being resampled. [FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD] if ALL_RESAMPLED or FUSED_RESAMPLED is used, the original fused data has to be resampled, a location row is resampled to the next valid timestamp (see the Assumptions/Observations below) only if the time difference between them is less or equal than this threshold (in minutes). [FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION] if ALL_RESAMPLED or FUSED_RESAMPLED is used, the original fused data has to be resampled, a location row is resampled at most for this long (in minutes) Assumptions/Observations Types of location data to use Android and iOS clients can collect location coordinates through the phone\u2019s GPS, the network cellular towers around the phone, or Google\u2019s fused location API. If you want to use only the GPS provider, set [LOCATIONS_TO_USE] to GPS If you want to use all providers, set [LOCATIONS_TO_USE] to ALL If you collected location data from different providers, including the fused API, use ALL_RESAMPLED If your mobile client was configured to use fused location only or want to focus only on this provider, set [LOCATIONS_TO_USE] to RESAMPLE_FUSED . ALL_RESAMPLED and RESAMPLE_FUSED take the original location coordinates and replicate each pair forward in time as long as the phone was sensing data as indicated by the joined timestamps of [PHONE_DATA_YIELD][SENSORS] . This is done because Google\u2019s API only logs a new location coordinate pair when it is sufficiently different in time or space from the previous one and because GPS and network providers can log data at variable rates. There are two parameters associated with resampling fused location. FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD (in minutes, default 30) controls the maximum gap between any two coordinate pairs to replicate the last known pair. For example, participant A\u2019s phone did not collect data between 10.30 am and 10:50 am and between 11:05am and 11:40am, the last known coordinate pair is replicated during the first period but not the second. In other words, we assume that we cannot longer guarantee the participant stayed at the last known location if the phone did not sense data for more than 30 minutes. FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION (in minutes, default 720 or 12 hours) stops the last known fused location from being replicated longer than this threshold even if the phone was sensing data continuously. For example, participant A went home at 9 pm, and their phone was sensing data without gaps until 11 am the next morning, the last known location is replicated until 9 am. If you have suggestions to modify or improve this resampling, let us know.","title":"Phone Locations"},{"location":"features/phone-locations/#barnett-provider","text":"These features are based on the original open-source implementation by Barnett et al and some features created by Canzian et al . Available time segments and platforms Available only for segments that start at 00:00:00 and end at 23:59:59 of the same or a different day (daily, weekly, weekend, etc.) Available for Android and iOS File Sequence - data/raw/ { pid } /phone_locations_raw.csv - data/interim/ { pid } /phone_locations_processed.csv - data/interim/ { pid } /phone_locations_processed_with_datetime.csv - data/interim/ { pid } /phone_locations_features/phone_locations_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_locations.csv Parameters description for [PHONE_LOCATIONS][PROVIDERS][BARNETT] : Key Description [COMPUTE] Set to True to extract PHONE_LOCATIONS features from the BARNETT provider [FEATURES] Features to be computed, see table below [ACCURACY_LIMIT] An integer in meters, any location rows with an accuracy higher than this is dropped. This number means there\u2019s a 68% probability the actual location is within this radius [IF_MULTIPLE_TIMEZONES] Currently, USE_MOST_COMMON is the only value supported. If the location data for a participant belongs to multiple time zones, we select the most common because Barnett\u2019s algorithm can only handle one time zone [MINUTES_DATA_USED] Set to True to include an extra column in the final location feature file containing the number of minutes used to compute the features on each time segment. Use this for quality control purposes; the more data minutes exist for a period, the more reliable its features should be. For fused location, a single minute can contain more than one coordinate pair if the participant is moving fast enough. Features description for [PHONE_LOCATIONS][PROVIDERS][BARNETT] adapted from Beiwe Summary Statistics : Feature Units Description hometime minutes Time at home. Time spent at home in minutes. Home is the most visited significant location between 8 pm and 8 am, including any pauses within a 200-meter radius. disttravelled meters Total distance traveled over a day (flights). rog meters The Radius of Gyration (rog) is a measure in meters of the area covered by a person over a day. A centroid is calculated for all the places (pauses) visited during a day, and a weighted distance between all the places and that centroid is computed. The weights are proportional to the time spent in each place. maxdiam meters The maximum diameter is the largest distance between any two pauses. maxhomedist meters The maximum distance from home in meters. siglocsvisited locations The number of significant locations visited during the day. Significant locations are computed using k-means clustering over pauses found in the whole monitoring period. The number of clusters is found iterating k from 1 to 200 stopping until the centroids of two significant locations are within 400 meters of one another. avgflightlen meters Mean length of all flights. stdflightlen meters Standard deviation of the length of all flights. avgflightdur seconds Mean duration of all flights. stdflightdur seconds The standard deviation of the duration of all flights. probpause - The fraction of a day spent in a pause (as opposed to a flight) siglocentropy nats Shannon\u2019s entropy measurement is based on the proportion of time spent at each significant location visited during a day. circdnrtn - A continuous metric quantifying a person\u2019s circadian routine that can take any value between 0 and 1, where 0 represents a daily routine completely different from any other sensed days and 1 a routine the same as every other sensed day. wkenddayrtn - Same as circdnrtn but computed separately for weekends and weekdays. Assumptions/Observations Multi day segment features Barnett\u2019s features are only available on time segments that span entire days (00:00:00 to 23:59:59). Such segments can be one-day long (daily) or multi-day (weekly, for example). Multi-day segment features are computed based on daily features summarized the following way: sum for hometime , disttravelled , siglocsvisited , and minutes_data_used max for maxdiam , and maxhomedist mean for rog , avgflightlen , stdflightlen , avgflightdur , stdflightdur , probpause , siglocentropy , circdnrtn , wkenddayrtn , and minsmissing Computation speed The process to extract these features can be slow compared to other sensors and providers due to the required simulation. How are these features computed? These features are based on a Pause-Flight model. A pause is defined as a mobility trace (location pings) within a certain duration and distance (by default, 300 seconds and 60 meters). A flight is any mobility trace between two pauses. Data is resampled and imputed before the features are computed. See Barnett et al for more information. In RAPIDS, we only expose one parameter for these features (accuracy limit). You can change other parameters in src/features/phone_locations/barnett/library/MobilityFeatures.R . Significant Locations Significant locations are determined using K-means clustering on pauses longer than 10 minutes. The number of clusters (K) is increased until no two clusters are within 400 meters from each other. After this, pauses within a certain range of a cluster (200 meters by default) count as a visit to that significant location. This description was adapted from the Supplementary Materials of Barnett et al . The Circadian Calculation For a detailed description of how this is calculated, see Canzian et al .","title":"BARNETT provider"},{"location":"features/phone-locations/#doryab-provider","text":"These features are based on the original implementation by Doryab et al. . Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_locations_raw.csv - data/interim/ { pid } /phone_locations_processed.csv - data/interim/ { pid } /phone_locations_processed_with_datetime.csv - data/interim/ { pid } /phone_locations_processed_with_datetime_with_home.csv - data/interim/ { pid } /phone_locations_features/phone_locations_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_locations.csv Parameters description for [PHONE_LOCATIONS][PROVIDERS][DORYAB] : Key Description [COMPUTE] Set to True to extract PHONE_LOCATIONS features from the BARNETT provider [FEATURES] Features to be computed, see table below [ACCURACY_LIMIT] An integer in meters, any location rows with an accuracy higher than this will be dropped. This number means there\u2019s a 68% probability the true location is within this radius [DBSCAN_EPS] The maximum distance in meters between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function. [DBSCAN_MINSAMPLES] The number of samples (or total weight) in a neighborhood for a point to be considered as a core point of a cluster. This includes the point itself. [THRESHOLD_STATIC] It is the threshold value in km/hr which labels a row as Static or Moving. [MAXIMUM_ROW_GAP] The maximum gap (in seconds) allowed between any two consecutive rows for them to be considered part of the same displacement. If this threshold is too high, it can throw speed and distance calculations off for periods when the phone was not sensing. [MAXIMUM_ROW_DURATION] The time difference between any two consecutive rows A and B is considered as the time a participant spent in A . If this difference is bigger than MAXIMUM_ROW_GAP we substitute it with MAXIMUM_ROW_DURATION . [MINUTES_DATA_USED] Set to True to include an extra column in the final location feature file containing the number of minutes used to compute the features on each time segment. Use this for quality control purposes; the more data minutes exist for a period, the more reliable its features should be. For fused location, a single minute can contain more than one coordinate pair if the participant is moving fast enough. [SAMPLING_FREQUENCY] Expected time difference between any two location rows in minutes. If set to 0 , the sampling frequency will be inferred automatically as the median of all the differences between two consecutive row timestamps (recommended if you are using FUSED_RESAMPLED data). This parameter impacts all the time calculations. [CLUSTER_ON] Set this flag to PARTICIPANT_DATASET to create clusters based on the entire participant\u2019s dataset or to TIME_SEGMENT to create clusters based on all the instances of the corresponding time segment (e.g. all mornings). [CLUSTERING_ALGORITHM] The original Doryab et al. implementation uses DBSCAN , OPTICS is also available with similar (but not identical) clustering results and lower memory consumption. [RADIUS_FOR_HOME] All location coordinates within this distance (meters) from the home location coordinates are considered a homestay (see timeathome feature). Features description for [PHONE_LOCATIONS][PROVIDERS][DORYAB] : Feature Units Description locationvariance \\(meters^2\\) The sum of the variances of the latitude and longitude columns. loglocationvariance - Log of the sum of the variances of the latitude and longitude columns. totaldistance meters Total distance traveled in a time segment using the haversine formula. averagespeed km/hr Average speed in a time segment considering only the instances labeled as Moving. varspeed km/hr Speed variance in a time segment considering only the instances labeled as Moving. circadianmovement - Not suggested for use now; see Observations below. \u201cIt encodes the extent to which a person\u2019s location patterns follow a 24-hour circadian cycle.\" Doryab et al. . numberofsignificantplaces places Number of significant locations visited. It is calculated using the DBSCAN/OPTICS clustering algorithm which takes in EPS and MIN_SAMPLES as parameters to identify clusters. Each cluster is a significant place. numberlocationtransitions transitions Number of movements between any two clusters in a time segment. radiusgyration meters Quantifies the area covered by a participant timeattop1location minutes Time spent at the most significant location. timeattop2location minutes Time spent at the 2 nd most significant location. timeattop3location minutes Time spent at the 3 rd most significant location. movingtostaticratio - Ratio between stationary time and total location sensed time. A lat/long coordinate pair is labeled as stationary if its speed (distance/time) to the next coordinate pair is less than 1km/hr. A higher value represents a more stationary routine. These times are computed using timeInSeconds feature. outlierstimepercent - Ratio between the time spent in non-significant clusters divided by the time spent in all clusters (total location sensed time). A higher value represents more time spent in non-significant clusters. These times are computed using timeInSeconds feature. maxlengthstayatclusters minutes Maximum time spent in a cluster (significant location). minlengthstayatclusters minutes Minimum time spent in a cluster (significant location). meanlengthstayatclusters minutes Average time spent in a cluster (significant location). stdlengthstayatclusters minutes Standard deviation of time spent in a cluster (significant location). locationentropy nats Shannon Entropy computed over the row count of each cluster (significant location), it is higher the more rows belong to a cluster (i.e., the more time a participant spent at a significant location). normalizedlocationentropy nats Shannon Entropy computed over the row count of each cluster (significant location) divided by the number of clusters; it is higher the more rows belong to a cluster (i.e., the more time a participant spent at a significant location). timeathome minutes Time spent at home (see Observations below for a description on how we compute home). Assumptions/Observations Significant Locations Identified Significant locations are determined using DBSCAN clustering on locations that a patient visit over the course of the period of data collection. Circadian Movement Calculation Note Feb 3 2021. It seems the implementation of this feature is not correct; we suggest not to use this feature until a fix is in place. For a detailed description of how this should be calculated, see Saeb et al . Fine-Tuning Clustering Parameters Based on an experiment where we collected fused location data for 7 days with a mean accuracy of 86 & SD of 350.874635, we determined that EPS/MAX_EPS =100 produced closer clustering results to reality. Higher values (>100) missed out on some significant places, like a short grocery visit, while lower values (<100) picked up traffic lights and stop signs while driving as significant locations. We recommend you set EPS based on your location data\u2019s accuracy (the more accurate your data is, the lower you should be able to set EPS). Duration Calculation To calculate the time duration component for our features, we compute the difference between consecutive rows\u2019 timestamps to take into account sampling rate variability. If this time difference is larger than a threshold (300 seconds by default), we replace it with a maximum duration (60 seconds by default, i.e., we assume a participant spent at least 60 seconds in their last known location) Home location Home is calculated using all location data of a participant between 12 am and 6 am, then applying a clustering algorithm ( DB_SCAN or OPTICS ) and considering the center of the biggest cluster home for that participant.","title":"DORYAB provider"},{"location":"features/phone-log/","text":"Phone Log \u00b6 Sensor parameters description for [PHONE_LOG] : Key Description [CONTAINER][ANDROID] Data stream container (database table, CSV file, etc.) where a data log is stored for Android devices [CONTAINER][IOS] Data stream container (database table, CSV file, etc.) where a data log is stored for iOS devices Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_LOG ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Log"},{"location":"features/phone-log/#phone-log","text":"Sensor parameters description for [PHONE_LOG] : Key Description [CONTAINER][ANDROID] Data stream container (database table, CSV file, etc.) where a data log is stored for Android devices [CONTAINER][IOS] Data stream container (database table, CSV file, etc.) where a data log is stored for iOS devices Note No feature providers have been implemented for this sensor yet, however you can use its key ( PHONE_LOG ) to improve PHONE_DATA_YIELD or you can implement your own features .","title":"Phone Log"},{"location":"features/phone-messages/","text":"Phone Messages \u00b6 Sensor parameters description for [PHONE_MESSAGES] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the messages data is stored RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_messages_raw.csv - data/raw/ { pid } /phone_messages_with_datetime.csv - data/interim/ { pid } /phone_messages_features/phone_messages_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_messages.csv Parameters description for [PHONE_MESSAGES][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_MESSAGES features from the RAPIDS provider [MESSAGES_TYPES] The messages_type that will be analyzed. The options for this parameter are received or sent . [FEATURES] Features to be computed, see table below for [MESSAGES_TYPES] received and sent Features description for [PHONE_MESSAGES][PROVIDERS][RAPIDS] : Feature Units Description count messages Number of messages of type messages_type that occurred during a particular time_segment . distinctcontacts contacts Number of distinct contacts that are associated with a particular messages_type during a particular time_segment . timefirstmessages minutes Number of minutes between 12:00am (midnight) and the first message of a particular messages_type during a particular time_segment . timelastmessages minutes Number of minutes between 12:00am (midnight) and the last message of a particular messages_type during a particular time_segment . countmostfrequentcontact messages Number of messages from the contact with the most messages of messages_type during a time_segment throughout the whole dataset of each participant. Assumptions/Observations [MESSAGES_TYPES] and [FEATURES] keys in config.yaml need to match. For example, [MESSAGES_TYPES] sent matches the [FEATURES] key sent","title":"Phone Messages"},{"location":"features/phone-messages/#phone-messages","text":"Sensor parameters description for [PHONE_MESSAGES] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the messages data is stored","title":"Phone Messages"},{"location":"features/phone-messages/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_messages_raw.csv - data/raw/ { pid } /phone_messages_with_datetime.csv - data/interim/ { pid } /phone_messages_features/phone_messages_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_messages.csv Parameters description for [PHONE_MESSAGES][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_MESSAGES features from the RAPIDS provider [MESSAGES_TYPES] The messages_type that will be analyzed. The options for this parameter are received or sent . [FEATURES] Features to be computed, see table below for [MESSAGES_TYPES] received and sent Features description for [PHONE_MESSAGES][PROVIDERS][RAPIDS] : Feature Units Description count messages Number of messages of type messages_type that occurred during a particular time_segment . distinctcontacts contacts Number of distinct contacts that are associated with a particular messages_type during a particular time_segment . timefirstmessages minutes Number of minutes between 12:00am (midnight) and the first message of a particular messages_type during a particular time_segment . timelastmessages minutes Number of minutes between 12:00am (midnight) and the last message of a particular messages_type during a particular time_segment . countmostfrequentcontact messages Number of messages from the contact with the most messages of messages_type during a time_segment throughout the whole dataset of each participant. Assumptions/Observations [MESSAGES_TYPES] and [FEATURES] keys in config.yaml need to match. For example, [MESSAGES_TYPES] sent matches the [FEATURES] key sent","title":"RAPIDS provider"},{"location":"features/phone-screen/","text":"Phone Screen \u00b6 Sensor parameters description for [PHONE_SCREEN] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the screen data is stored RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_screen_raw.csv - data/raw/ { pid } /phone_screen_with_datetime.csv - data/interim/ { pid } /phone_screen_episodes.csv - data/interim/ { pid } /phone_screen_episodes_resampled.csv - data/interim/ { pid } /phone_screen_episodes_resampled_with_datetime.csv - data/interim/ { pid } /phone_screen_features/phone_screen_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_screen.csv Parameters description for [PHONE_SCREEN][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_SCREEN features from the RAPIDS provider [FEATURES] Features to be computed, see table below [REFERENCE_HOUR_FIRST_USE] The reference point from which firstuseafter is to be computed, default is midnight [IGNORE_EPISODES_SHORTER_THAN] Ignore episodes that are shorter than this threshold (minutes). Set to 0 to disable this filter. [IGNORE_EPISODES_LONGER_THAN] Ignore episodes that are longer than this threshold (minutes). Set to 0 to disable this filter. [EPISODE_TYPES] Currently we only support unlock episodes (from when the phone is unlocked until the screen is off) Features description for [PHONE_SCREEN][PROVIDERS][RAPIDS] : Feature Units Description sumduration minutes Total duration of all unlock episodes. maxduration minutes Longest duration of any unlock episode. minduration minutes Shortest duration of any unlock episode. avgduration minutes Average duration of all unlock episodes. stdduration minutes Standard deviation duration of all unlock episodes. countepisode episodes Number of all unlock episodes firstuseafter minutes Minutes until the first unlock episode. Assumptions/Observations In Android, lock events can happen right after an off event, after a few seconds of an off event, or never happen depending on the phone's settings, therefore, an unlock episode is defined as the time between an unlock and a off event. In iOS, on and off events do not exist, so an unlock episode is defined as the time between an unlock and a lock event. Events in iOS are recorded reliably albeit some duplicated lock events within milliseconds from each other, so we only keep consecutive unlock/lock pairs. In Android you cand find multiple consecutive unlock or lock events, so we only keep consecutive unlock/off pairs. In our experiments these cases are less than 10% of the screen events collected and this happens because ACTION_SCREEN_OFF and ACTION_SCREEN_ON are sent when the device becomes non-interactive which may have nothing to do with the screen turning off . In addition to unlock/off episodes, in Android it is possible to measure the time spent on the lock screen before an unlock event as well as the total screen time (i.e. ON to OFF ) but these are not implemented at the moment. We transform iOS screen events to match Android\u2019s format, we replace lock episodes with off episodes (2 with 0) in iOS. However, as mentioned above this is still computing unlock to lock episodes.","title":"Phone Screen"},{"location":"features/phone-screen/#phone-screen","text":"Sensor parameters description for [PHONE_SCREEN] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the screen data is stored","title":"Phone Screen"},{"location":"features/phone-screen/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_screen_raw.csv - data/raw/ { pid } /phone_screen_with_datetime.csv - data/interim/ { pid } /phone_screen_episodes.csv - data/interim/ { pid } /phone_screen_episodes_resampled.csv - data/interim/ { pid } /phone_screen_episodes_resampled_with_datetime.csv - data/interim/ { pid } /phone_screen_features/phone_screen_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_screen.csv Parameters description for [PHONE_SCREEN][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_SCREEN features from the RAPIDS provider [FEATURES] Features to be computed, see table below [REFERENCE_HOUR_FIRST_USE] The reference point from which firstuseafter is to be computed, default is midnight [IGNORE_EPISODES_SHORTER_THAN] Ignore episodes that are shorter than this threshold (minutes). Set to 0 to disable this filter. [IGNORE_EPISODES_LONGER_THAN] Ignore episodes that are longer than this threshold (minutes). Set to 0 to disable this filter. [EPISODE_TYPES] Currently we only support unlock episodes (from when the phone is unlocked until the screen is off) Features description for [PHONE_SCREEN][PROVIDERS][RAPIDS] : Feature Units Description sumduration minutes Total duration of all unlock episodes. maxduration minutes Longest duration of any unlock episode. minduration minutes Shortest duration of any unlock episode. avgduration minutes Average duration of all unlock episodes. stdduration minutes Standard deviation duration of all unlock episodes. countepisode episodes Number of all unlock episodes firstuseafter minutes Minutes until the first unlock episode. Assumptions/Observations In Android, lock events can happen right after an off event, after a few seconds of an off event, or never happen depending on the phone's settings, therefore, an unlock episode is defined as the time between an unlock and a off event. In iOS, on and off events do not exist, so an unlock episode is defined as the time between an unlock and a lock event. Events in iOS are recorded reliably albeit some duplicated lock events within milliseconds from each other, so we only keep consecutive unlock/lock pairs. In Android you cand find multiple consecutive unlock or lock events, so we only keep consecutive unlock/off pairs. In our experiments these cases are less than 10% of the screen events collected and this happens because ACTION_SCREEN_OFF and ACTION_SCREEN_ON are sent when the device becomes non-interactive which may have nothing to do with the screen turning off . In addition to unlock/off episodes, in Android it is possible to measure the time spent on the lock screen before an unlock event as well as the total screen time (i.e. ON to OFF ) but these are not implemented at the moment. We transform iOS screen events to match Android\u2019s format, we replace lock episodes with off episodes (2 with 0) in iOS. However, as mentioned above this is still computing unlock to lock episodes.","title":"RAPIDS provider"},{"location":"features/phone-wifi-connected/","text":"Phone WiFi Connected \u00b6 Sensor parameters description for [PHONE_WIFI_CONNECTED] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the wifi (connected) data is stored RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_wifi_connected_raw.csv - data/raw/ { pid } /phone_wifi_connected_with_datetime.csv - data/interim/ { pid } /phone_wifi_connected_features/phone_wifi_connected_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_wifi_connected.csv Parameters description for [PHONE_WIFI_CONNECTED][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_WIFI_CONNECTED features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_WIFI_CONNECTED][PROVIDERS][RAPIDS] : Feature Units Description countscans devices Number of scanned WiFi access points connected during a time_segment, an access point can be detected multiple times over time and these appearances are counted separately uniquedevices devices Number of unique access point during a time_segment as identified by their hardware address countscansmostuniquedevice scans Number of scans of the most scanned access point during a time_segment across the whole monitoring period Assumptions/Observations A connected WiFI access point is one that a phone was connected to. By default AWARE stores this data in the sensor_wifi table.","title":"Phone WiFI Connected"},{"location":"features/phone-wifi-connected/#phone-wifi-connected","text":"Sensor parameters description for [PHONE_WIFI_CONNECTED] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the wifi (connected) data is stored","title":"Phone WiFi Connected"},{"location":"features/phone-wifi-connected/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android and iOS File Sequence - data/raw/ { pid } /phone_wifi_connected_raw.csv - data/raw/ { pid } /phone_wifi_connected_with_datetime.csv - data/interim/ { pid } /phone_wifi_connected_features/phone_wifi_connected_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_wifi_connected.csv Parameters description for [PHONE_WIFI_CONNECTED][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_WIFI_CONNECTED features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_WIFI_CONNECTED][PROVIDERS][RAPIDS] : Feature Units Description countscans devices Number of scanned WiFi access points connected during a time_segment, an access point can be detected multiple times over time and these appearances are counted separately uniquedevices devices Number of unique access point during a time_segment as identified by their hardware address countscansmostuniquedevice scans Number of scans of the most scanned access point during a time_segment across the whole monitoring period Assumptions/Observations A connected WiFI access point is one that a phone was connected to. By default AWARE stores this data in the sensor_wifi table.","title":"RAPIDS provider"},{"location":"features/phone-wifi-visible/","text":"Phone WiFi Visible \u00b6 Sensor parameters description for [PHONE_WIFI_VISIBLE] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the wifi (visible) data is stored RAPIDS provider \u00b6 Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_wifi_visible_raw.csv - data/raw/ { pid } /phone_wifi_visible_with_datetime.csv - data/interim/ { pid } /phone_wifi_visible_features/phone_wifi_visible_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_wifi_visible.csv Parameters description for [PHONE_WIFI_VISIBLE][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_WIFI_VISIBLE features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_WIFI_VISIBLE][PROVIDERS][RAPIDS] : Feature Units Description countscans devices Number of scanned WiFi access points visible during a time_segment, an access point can be detected multiple times over time and these appearances are counted separately uniquedevices devices Number of unique access point during a time_segment as identified by their hardware address countscansmostuniquedevice scans Number of scans of the most scanned access point during a time_segment across the whole monitoring period Assumptions/Observations A visible WiFI access point is one that a phone sensed around itself but that it was not connected to. Due to API restrictions, this sensor is not available on iOS. By default AWARE stores this data in the wifi table.","title":"Phone WiFI Visible"},{"location":"features/phone-wifi-visible/#phone-wifi-visible","text":"Sensor parameters description for [PHONE_WIFI_VISIBLE] : Key Description [CONTAINER] Data stream container (database table, CSV file, etc.) where the wifi (visible) data is stored","title":"Phone WiFi Visible"},{"location":"features/phone-wifi-visible/#rapids-provider","text":"Available time segments and platforms Available for all time segments Available for Android only File Sequence - data/raw/ { pid } /phone_wifi_visible_raw.csv - data/raw/ { pid } /phone_wifi_visible_with_datetime.csv - data/interim/ { pid } /phone_wifi_visible_features/phone_wifi_visible_ { language } _ { provider_key } .csv - data/processed/features/ { pid } /phone_wifi_visible.csv Parameters description for [PHONE_WIFI_VISIBLE][PROVIDERS][RAPIDS] : Key Description [COMPUTE] Set to True to extract PHONE_WIFI_VISIBLE features from the RAPIDS provider [FEATURES] Features to be computed, see table below Features description for [PHONE_WIFI_VISIBLE][PROVIDERS][RAPIDS] : Feature Units Description countscans devices Number of scanned WiFi access points visible during a time_segment, an access point can be detected multiple times over time and these appearances are counted separately uniquedevices devices Number of unique access point during a time_segment as identified by their hardware address countscansmostuniquedevice scans Number of scans of the most scanned access point during a time_segment across the whole monitoring period Assumptions/Observations A visible WiFI access point is one that a phone sensed around itself but that it was not connected to. Due to API restrictions, this sensor is not available on iOS. By default AWARE stores this data in the wifi table.","title":"RAPIDS provider"},{"location":"setup/configuration/","text":"Configuration \u00b6 You need to follow these steps to configure your RAPIDS deployment before you can extract behavioral features Verify RAPIDS can process your data streams Create your participants files Select what time segments you want to extract features on Choose the timezone of your study Configure your data streams Select what sensors and features you want to process When you are done with this configuration, go to executing RAPIDS . Hint Every time you see config[\"KEY\"] or [KEY] in these docs we are referring to the corresponding key in the config.yaml file. Supported data streams \u00b6 A data stream refers to sensor data collected using a specific type of device with a specific format and stored in a specific container . For example, the aware_mysql data stream handles smartphone data ( device ) collected with the AWARE Framework ( format ) stored in a MySQL database ( container ). Check the table in introduction to data streams to know what data streams we support. If your data stream is supported, continue to the next configuration section, you will use its label later in this guide (e.g. aware_mysql ). If your steam is not supported but you want to implement it, follow the tutorial to add support for new data streams and get in touch by email or in Slack if you have any questions. Participant files \u00b6 Participant files link together multiple devices (smartphones and wearables) to specific participants and identify them throughout RAPIDS. You can create these files manually or automatically . Participant files are stored in data/external/participant_files/pxx.yaml and follow a unified structure . Remember to modify the config.yaml file with your PIDS The list PIDS in config.yaml needs to have the participant file names of the people you want to process. For example, if you created p01.yaml , p02.yaml and p03.yaml files in /data/external/participant_files/ , then PIDS should be: PIDS : [ p01 , p02 , p03 ] Optional: Migrating participants files with the old format If you were using the pre-release version of RAPIDS with participant files in plain text (as opposed to yaml), you can run the following command and your old files will be converted into yaml files stored in data/external/participant_files/ python tools/update_format_participant_files.py Structure of participants files \u00b6 Example of the structure of a participant file In this example, the participant used an android phone, an ios phone, a fitbit device, and a Empatica device throughout the study between Apr 23 rd 2020 and Oct 28 th 2020 If your participants didn\u2019t use a [PHONE] , [FITBIT] or [EMPATICA] device, it is not necessary to include that section in their participant file. In other words, you can analyse data from 1 or more devices per participant. PHONE : DEVICE_IDS : [ a748ee1a-1d0b-4ae9-9074-279a2b6ba524 , dsadas-2324-fgsf-sdwr-gdfgs4rfsdf43 ] PLATFORMS : [ android , ios ] LABEL : test01 START_DATE : 2020-04-23 END_DATE : 2020-10-28 FITBIT : DEVICE_IDS : [ fitbit1 ] LABEL : test01 START_DATE : 2020-04-23 END_DATE : 2020-10-28 EMPATICA : DEVICE_IDS : [ empatica1 ] LABEL : test01 START_DATE : 2020-04-23 END_DATE : 2020-10-28 [PHONE] Key Description [DEVICE_IDS] An array of the strings that uniquely identify each smartphone, you can have more than one for when participants changed phones in the middle of the study. [PLATFORMS] An array that specifies the OS of each smartphone in [DEVICE_IDS] , use a combination of android or ios (we support participants that changed platforms in the middle of your study!). You can set [PLATFORMS]: [infer] and RAPIDS will infer them automatically (each phone data stream infer this differently, e.g. aware_mysql uses the aware_device table). [LABEL] A string that is used in reports and visualizations. [START_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected after this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [END_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected before this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [FITBIT] Key Description [DEVICE_IDS] An array of the strings that uniquely identify each Fitbit, you can have more than one in case the participant changed devices in the middle of the study. [LABEL] A string that is used in reports and visualizations. [START_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected after this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [END_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected before this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [EMPATICA] Key Description [DEVICE_IDS] An array of the strings that uniquely identify each Empatica device used by this participant. Since the most common use case involves having multiple zip files from a single device for each person, set this device id to an arbitrary string (we usually use their pid ) [LABEL] A string that is used in reports and visualizations. [START_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected after this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [END_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected before this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . Automatic creation of participant files \u00b6 You can use a CSV file with a row per participant to automatically create participant files. AWARE_DEVICE_TABLE was deprecated In previous versions of RAPIDS, you could create participant files automatically using the aware_device table. We deprecated this option but you can still achieve the same results if you export the output of the following SQL query as a CSV file and follow the instructions below: SELECT device_id , device_id as fitbit_id , CONCAT ( \"p\" , _id ) as empatica_id , CONCAT ( \"p\" , _id ) as pid , if ( brand = \"iPhone\" , \"ios\" , \"android\" ) as platform , CONCAT ( \"p\" , _id ) as label , DATE_FORMAT ( FROM_UNIXTIME (( timestamp / 1000 ) - 86400 ), \"%Y-%m-%d\" ) as start_date , CURRENT_DATE as end_date from aware_device order by _id ; In your config.yaml : Set CSV_FILE_PATH to a CSV file path that complies with the specs described below Set the devices ( PHONE , FITBIT , EMPATICA ) [ADD] flag to TRUE depending on what devices you used in your study. CREATE_PARTICIPANT_FILES : CSV_FILE_PATH : \"your_path/to_your.csv\" PHONE_SECTION : ADD : TRUE # or FALSE IGNORED_DEVICE_IDS : [] FITBIT_SECTION : ADD : TRUE # or FALSE IGNORED_DEVICE_IDS : [] EMPATICA_SECTION : ADD : TRUE # or FALSE IGNORED_DEVICE_IDS : [] Your CSV file ( [CSV_FILE_PATH] ) should have the following columns (headers) but the values within each column can be empty: Column Description device_id Phone device id. Separate multiple ids with ; fitbit_id Fitbit device id. Separate multiple ids with ; empatica_id Empatica device id. Since the most common use case involves having multiple zip files from a single device for each person, set this device id to an arbitrary string (we usually use their pid ) pid Unique identifiers with the format pXXX (your participant files will be named with this string) platform Use android , ios or infer as explained above, separate values with ; label A human readable string that is used in reports and visualizations. start_date A string with format YYY-MM-DD or YYYY-MM-DD HH:MM:SS . By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . end_date A string with format YYY-MM-DD or YYYY-MM-DD HH:MM:SS . By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . Example We added white spaces to this example to make it easy to read but you don\u2019t have to. device_id ,fitbit_id, empatica_id ,pid ,label ,platform ,start_date ,end_date a748ee1a-1d0b-4ae9-9074-279a2b6ba524;dsadas-2324-fgsf-sdwr-gdfgs4rfsdf43 ,fitbit1 , p01 ,p01 ,julio ,android;ios ,2020-01-01 ,2021-01-01 4c4cf7a1-0340-44bc-be0f-d5053bf7390c ,fitbit2 , p02 ,p02 ,meng ,ios ,2021-01-01 ,2022-01-01 Then run snakemake -j1 create_participants_files Time Segments \u00b6 Time segments (or epochs) are the time windows on which you want to extract behavioral features. For example, you might want to process data on every day, every morning, or only during weekends. RAPIDS offers three categories of time segments that are flexible enough to cover most use cases: frequency (short time windows every day), periodic (arbitrary time windows on any day), and event (arbitrary time windows around events of interest). See also our examples . Frequency Segments These segments are computed on every day and all have the same duration (for example 30 minutes). Set the following keys in your config.yaml TIME_SEGMENTS : &time_segments TYPE : FREQUENCY FILE : \"data/external/your_frequency_segments.csv\" INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE The file pointed by [TIME_SEGMENTS][FILE] should have the following format and can only have 1 row. Column Description label A string that is used as a prefix in the name of your time segments length An integer representing the duration of your time segments in minutes Example label,length thirtyminutes,30 This configuration will compute 48 time segments for every day when any data from any participant was sensed. For example: start_time,length,label 00:00,30,thirtyminutes0000 00:30,30,thirtyminutes0001 01:00,30,thirtyminutes0002 01:30,30,thirtyminutes0003 ... Periodic Segments These segments can be computed every day, or on specific days of the week, month, quarter, and year. Their minimum duration is 1 minute but they can be as long as you want. Set the following keys in your config.yaml . TIME_SEGMENTS : &time_segments TYPE : PERIODIC FILE : \"data/external/your_periodic_segments.csv\" INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE # or TRUE If [INCLUDE_PAST_PERIODIC_SEGMENTS] is set to TRUE , RAPIDS will consider instances of your segments back enough in the past as to include the first row of data of each participant. For example, if the first row of data from a participant happened on Saturday March 7 th 2020 and the requested segment duration is 7 days starting on every Sunday, the first segment to be considered would start on Sunday March 1 st if [INCLUDE_PAST_PERIODIC_SEGMENTS] is TRUE or on Sunday March 8 th if FALSE . The file pointed by [TIME_SEGMENTS][FILE] should have the following format and can have multiple rows. Column Description label A string that is used as a prefix in the name of your time segments. It has to be unique between rows start_time A string with format HH:MM:SS representing the starting time of this segment on any day length A string representing the length of this segment.It can have one or more of the following strings XXD XXH XXM XXS to represent days, hours, minutes and seconds. For example 7D 23H 59M 59S repeats_on One of the follow options every_day , wday , qday , mday , and yday . The last four represent a week, quarter, month and year day repeats_value An integer complementing repeats_on . If you set repeats_on to every_day set this to 0 , otherwise 1-7 represent a wday starting from Mondays, 1-31 represent a mday , 1-91 represent a qday , and 1-366 represent a yday Example label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 morning,06:00:00,5H 59M 59S,every_day,0 afternoon,12:00:00,5H 59M 59S,every_day,0 evening,18:00:00,5H 59M 59S,every_day,0 night,00:00:00,5H 59M 59S,every_day,0 This configuration will create five segments instances ( daily , morning , afternoon , evening , night ) on any given day ( every_day set to 0). The daily segment will start at midnight and will last 23:59:59 , the other four segments will start at 6am, 12pm, 6pm, and 12am respectively and last for 05:59:59 . Event segments These segments can be computed before or after an event of interest (defined as any UNIX timestamp). Their minimum duration is 1 minute but they can be as long as you want. The start of each segment can be shifted backwards or forwards from the specified timestamp. Set the following keys in your config.yaml . TIME_SEGMENTS : &time_segments TYPE : EVENT FILE : \"data/external/your_event_segments.csv\" INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE # or TRUE The file pointed by [TIME_SEGMENTS][FILE] should have the following format and can have multiple rows. Column Description label A string that is used as a prefix in the name of your time segments. If labels are unique, every segment is independent; if two or more segments have the same label, their data will be grouped when computing auxiliary data for features like the most frequent contact for calls (the most frequent contact will be computed across all these segments). There cannot be two overlaping event segments with the same label (RAPIDS will throw an error) event_timestamp A UNIX timestamp that represents the moment an event of interest happened (clinical relapse, survey, readmission, etc.). The corresponding time segment will be computed around this moment using length , shift , and shift_direction length A string representing the length of this segment. It can have one or more of the following keys XXD XXH XXM XXS to represent a number of days, hours, minutes, and seconds. For example 7D 23H 59M 59S shift A string representing the time shift from event_timestamp . It can have one or more of the following keys XXD XXH XXM XXS to represent a number of days, hours, minutes and seconds. For example 7D 23H 59M 59S . Use this value to change the start of a segment with respect to its event_timestamp . For example, set this variable to 1H to create a segment that starts 1 hour from an event of interest ( shift_direction determines if it\u2019s before or after). shift_direction An integer representing whether the shift is before ( -1 ) or after ( 1 ) an event_timestamp device_id The device id (smartphone or fitbit) to whom this segment belongs to. You have to create a line in this event segment file for each event of a participant that you want to analyse. If you have participants with multiple device ids you can choose any of them Example label,event_timestamp,length,shift,shift_direction,device_id stress1,1587661220000,1H,5M,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress2,1587747620000,4H,4H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress3,1587906020000,3H,5M,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress4,1584291600000,7H,4H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress5,1588172420000,9H,5M,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 mood,1587661220000,1H,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 mood,1587747620000,1D,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 mood,1587906020000,7D,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 This example will create eight segments for a single participant ( a748ee1a... ), five independent stressX segments with various lengths (1,4,3,7, and 9 hours). Segments stress1 , stress3 , and stress5 are shifted forwards by 5 minutes and stress2 and stress4 are shifted backwards by 4 hours (that is, if the stress4 event happened on March 15 th at 1pm EST ( 1584291600000 ), the time segment will start on that day at 9am and end at 4pm). The three mood segments are 1 hour, 1 day and 7 days long and have no shift. In addition, these mood segments are grouped together, meaning that although RAPIDS will compute features on each one of them, some necessary information to compute a few of such features will be extracted from all three segments, for example the phone contact that called a participant the most or the location clusters visited by a participant. Date time labels of event segments In the final feature file, you will find a row per event segment. The local_segment column of each row has a label , a start date-time string, and an end date-time string. weeklysurvey2060#2020-09-12 01 :00:00,2020-09-18 23 :59:59 All sensor data is always segmented based on timestamps, and the date-time strings are attached for informative purposes. For example, you can plot your features based on these strings. When you configure RAPIDS to work with a single time zone, such tz code will be used to convert start/end timestamps (the ones you typed in the event segments file) into start/end date-time strings. However, when you configure RAPIDS to work with multiple time zones, RAPIDS will use the most common time zone across all devices of every participant to do the conversion. The most common time zone is the one in which a participant spent the most time. In practical terms, this means that the date-time strings of event segments that happened in uncommon time zones will have shifted start/end date-time labels. However, the data within each segment was correctly filtered based on timestamps. Segment Examples \u00b6 5-minutes Use the following Frequency segment file to create 288 (12 * 60 * 24) 5-minute segments starting from midnight of every day in your study label,length fiveminutes,5 Daily Use the following Periodic segment file to create daily segments starting from midnight of every day in your study label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 Morning Use the following Periodic segment file to create morning segments starting at 06:00:00 and ending at 11:59:59 of every day in your study label,start_time,length,repeats_on,repeats_value morning,06:00:00,5H 59M 59S,every_day,0 Overnight Use the following Periodic segment file to create overnight segments starting at 20:00:00 and ending at 07:59:59 (next day) of every day in your study label,start_time,length,repeats_on,repeats_value morning,20:00:00,11H 59M 59S,every_day,0 Weekly Use the following Periodic segment file to create non-overlapping weekly segments starting at midnight of every Monday in your study label,start_time,length,repeats_on,repeats_value weekly,00:00:00,6D 23H 59M 59S,wday,1 Use the following Periodic segment file to create overlapping weekly segments starting at midnight of every day in your study label,start_time,length,repeats_on,repeats_value weekly,00:00:00,6D 23H 59M 59S,every_day,0 Week-ends Use the following Periodic segment file to create week-end segments starting at midnight of every Saturday in your study label,start_time,length,repeats_on,repeats_value weekend,00:00:00,1D 23H 59M 59S,wday,6 Around surveys Use the following Event segment file to create two 2-hour segments that start 1 hour before surveys answered by 3 participants label,event_timestamp,length,shift,shift_direction,device_id survey1,1587661220000,2H,1H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 survey2,1587747620000,2H,1H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 survey1,1587906020000,2H,1H,-1,rqtertsd-43ff-34fr-3eeg-efe4fergregr survey2,1584291600000,2H,1H,-1,rqtertsd-43ff-34fr-3eeg-efe4fergregr survey1,1588172420000,2H,1H,-1,klj34oi2-8frk-2343-21kk-324ljklewlr3 survey2,1584291600000,2H,1H,-1,klj34oi2-8frk-2343-21kk-324ljklewlr3 Timezone of your study \u00b6 Single timezone \u00b6 If your study only happened in a single time zone or you want to ignore short trips of your participants to different time zones, select the appropriate code form this list and change the following config key. Double-check your timezone code pick, for example, US Eastern Time is America/New_York not EST TIMEZONE : TYPE : SINGLE TZCODE : America/New_York Multiple timezones \u00b6 If your participants lived in different time zones or they traveled across time zones, and you know when participants\u2019 devices were in a specific time zone, RAPIDS can use this data to process your data streams with the correct date-time. You need to provide RAPIDS with the time zone data in a CSV file ( [TZCODES_FILE] ) in the format described below. TIMEZONE : TYPE : MULTIPLE SINGLE : TZCODE : America/New_York MULTIPLE : TZCODES_FILE : path_to/time_zones_csv.file IF_MISSING_TZCODE : STOP DEFAULT_TZCODE : America/New_York FITBIT : ALLOW_MULTIPLE_TZ_PER_DEVICE : False INFER_FROM_SMARTPHONE_TZ : False Parameters for [TIMEZONE] Parameter Description [TYPE] Either SINGLE or MULTIPLE as explained above [SINGLE][TZCODE] The time zone code from this list to be used across all devices [MULTIPLE][TZCODES_FILE] A CSV file containing the time zones in which participants\u2019 devices sensed data (see the required format below). Multiple devices can be linked to the same person, read more in Participants Files [MULTIPLE][IF_MISSING_TZCODE] When a device is missing from [TZCODES_FILE] Set this flag to STOP to stop RAPIDS execution and show an error, or to USE_DEFAULT to assign the time zone specified in [DEFAULT_TZCODE] to any such devices [MULTIPLE][FITBIT][ALLOW_MULTIPLE_TZ_PER_DEVICE] You only need to care about this flag if one or more Fitbit devices sensed data in one or more time zones, and you want RAPIDS to take into account this in its feature computation. Read more in \u201cHow does RAPIDS handle Fitbit devices?\u201d below. [MULTIPLE][FITBIT][INFER_FROM_SMARTPHONE_TZ] You only need to care about this flag if one or more Fitbit devices sensed data in one or more time zones, and you want RAPIDS to take into account this in its feature computation. Read more in \u201cHow does RAPIDS handle Fitbit devices?\u201d below. Format of TZCODES_FILE TZCODES_FILE has three columns and a row for each time zone a device visited (a device can be a smartphone or wearable (Fitbit/Empatica)): Column Description device_id A string that uniquely identifies a smartphone or wearable tzcode A string with the appropriate code from this list that represents the time zone where the device sensed data timestamp A UNIX timestamp indicating when was the first time this device_id sensed data in tzcode device_id, tzcode, timestamp 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/New_York, 1587500000000 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/Mexico_City, 1587600000000 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/Los_Angeles, 1587700000000 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Amsterdam, 1587100000000 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Berlin, 1587200000000 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Amsterdam, 1587300000000 Using this file, RAPDIS will create time zone intervals per device, for example for 13dbc8a3-dae3-4834-823a-4bc96a7d459d : Interval 1 [1587500000000, 1587599999999] for America/New_York Interval 2 [1587600000000, 1587699999999] for America/Mexico_City Interval 3 [1587700000000, now] for America/Los_Angeles Any sensor data row from a device will be assigned a timezone if it falls within that interval, for example: A screen row sensed at 1587533333333 will be assigned to America/New_York because it falls within Interval 1 A screen row sensed at 1587400000000 will be discarded because it was logged outside any interval. Can I get the TZCODES_FILE from the time zone table collected automatically by the AWARE app? Sure. You can put your timezone table ( timezone.csv ) collected by the AWARE app under data/external folder and run: python tools/create_multi_timezones_file.py The TZCODES_FILE will be saved as data/external/multiple_timezones.csv file. What happens if participant X lives in Los Angeles but participant Y lives in Amsterdam and they both stayed there during my study? Add a row per participant and set timestamp to 0 : device_id, tzcode, timestamp 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/Los_Angeles, 0 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Amsterdam, 0 What happens if I forget to add a timezone for one or more devices? It depends on [IF_MISSING_TZCODE] . If [IF_MISSING_TZCODE] is set to STOP , RAPIDS will stop its execution and show you an error message. If [IF_MISSING_TZCODE] is set to USE_DEFAULT , it will assign the time zone specified in [DEFAULT_TZCODE] to any devices with missing time zone information in [TZCODES_FILE] . This is helpful if only a few of your participants had multiple timezones and you don\u2019t want to specify the same time zone for the rest. How does RAPIDS handle Fitbit devices? Fitbit devices are not time zone aware and they always log data with a local date-time string. When none of the Fitbit devices in your study changed time zones (e.g., p01 was always in New York and p02 was always in Amsterdam), you can set a single time zone per Fitbit device id along with a timestamp 0 (you can still assign multiple time zones to smartphone device ids) device_id, tzcode, timestamp fitbit123, America/New_York, 0 fitbit999, Europe/Amsterdam, 0 On the other hand, when at least one of your Fitbit devices changed time zones AND you want RAPIDS to take into account these changes, you need to set [ALLOW_MULTIPLE_TZ_PER_DEVICE] to True . You have to manually allow this option because you need to be aware it can produce inaccurate features around the times when time zones changed . This is because we cannot know exactly when the Fitbit device detected and processed the time zone change. If you want to ALLOW_MULTIPLE_TZ_PER_DEVICE you will need to add any time zone changes per device in the TZCODES_FILE as explained above. You could obtain this data by hand but if your participants also used a smartphone during your study, you can use their time zone logs. Recall that in RAPIDS every participant is represented with a participant file pXX.yaml , this file links together multiple devices and we will use it to know what smartphone time zone data should be applied to Fitbit devices. Thus set INFER_FROM_SMARTPHONE_TZ to TRUE , if you have included smartphone time zone data in your TZCODE_FILE and you want to make a participant\u2019s Fitbit data time zone aware with their respective smartphone data. Data Stream Configuration \u00b6 Modify the following keys in your config.yaml depending on the data stream you want to process. Phone Set [PHONE_DATA_STREAMS][TYPE] to the smartphone data stream you want to process (e.g. aware_mysql ) and configure its parameters (e.g. [DATABASE_GROUP] ). Ignore the parameters of streams you are not using (e.g. [FOLDER] of aware_csv ). PHONE_DATA_STREAMS : USE : aware_mysql # AVAILABLE: aware_mysql : DATABASE_GROUP : MY_GROUP aware_csv : FOLDER : data/external/aware_csv aware_mysql Key Description [DATABASE_GROUP] A database credentials group. Read the instructions below to set it up Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine. aware_csv Key Description [FOLDER] Folder where you have to place a CSV file per phone sensor. Each file has to contain all the data from every participant you want to process. Fitbit Set [FITBIT_DATA_STREAMS][TYPE] to the Fitbit data stream you want to process (e.g. fitbitjson_mysql ) and configure its parameters (e.g. [DATABASE_GROUP] ). Ignore the parameters of the other streams you are not using (e.g. [FOLDER] of aware_csv ). Warning You will probably have to tell RAPIDS the name of the columns where you stored your Fitbit data. To do this, modify your chosen stream\u2019s format.yaml column mappings to match your raw data column names. FITBIT_DATA_STREAMS : USE : fitbitjson_mysql # AVAILABLE: fitbitjson_mysql : DATABASE_GROUP : MY_GROUP SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitjson_csv : FOLDER : data/external/fitbit_csv SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitparsed_mysql : DATABASE_GROUP : MY_GROUP SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitparsed_csv : FOLDER : data/external/fitbit_csv SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitjson_mysql This data stream process Fitbit data inside a JSON column as obtained from the Fitbit API and stored in a MySQL database. Read more about its column mappings and mutations in fitbitjson_mysql . Key Description [DATABASE_GROUP] A database credentials group. Read the instructions below to set it up [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine. fitbitjson_csv This data stream process Fitbit data inside a JSON column as obtained from the Fitbit API and stored in a CSV file. Read more about its column mappings and mutations in fitbitjson_csv . Key Description [FOLDER] Folder where you have to place a CSV file per Fitbit sensor. Each file has to contain all the data from every participant you want to process. [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). fitbitparsed_mysql This data stream process Fitbit data stored in multiple columns after being parsed from the JSON column returned by Fitbit API and stored in a MySQL database. Read more about its column mappings and mutations in fitbitparsed_mysql . Key Description [DATABASE_GROUP] A database credentials group. Read the instructions below to set it up [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine. fitbitparsed_csv This data stream process Fitbit data stored in multiple columns (plain text) after being parsed from the JSON column returned by Fitbit API and stored in a CSV file. Read more about its column mappings and mutations in fitbitparsed_csv . Key Description [FOLDER] Folder where you have to place a CSV file per Fitbit sensor. Each file has to contain all the data from every participant you want to process. [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). Empatica Set [USE] to the Empatica data stream you want to use; see the table in introduction to data streams . Configure any parameters as indicated below. EMPATICA_DATA_STREAMS : USE : empatica_zip # AVAILABLE: empatica_zip : FOLDER : data/external/empatica empatica_zip Key Description [FOLDER] The relative path to a folder containing one subfolder per participant. The name of a participant folder should match their device_id assigned in their participant file. Each participant folder can have one or more zip files with any name; in other words, the sensor data in those zip files belong to a single participant. The zip files are automatically generated by Empatica and have a CSV file per sensor ( ACC , HR , TEMP , EDA , BVP , TAGS ). All CSV files of the same type contained in one or more zip files are uncompressed, parsed, sorted by timestamp, and joined together. Example of an EMPATICA FOLDER In the file tree below, we want to process three participants\u2019 data: p01 , p02 , and p03 . p01 has two zip files, p02 has only one zip file, and p03 has three zip files. Each zip has a CSV file per sensor that are joined together and processed by RAPIDS. data/ # this folder exists in the root RAPIDS folder external/ empatica/ p01/ file1.zip file2.zip p02/ aaaa.zip p03/ t1.zip t2.zip t3.zip Sensor and Features to Process \u00b6 Finally, you need to modify the config.yaml section of the sensors you want to extract behavioral features from. All sensors follow the same naming nomenclature ( DEVICE_SENSOR ) and parameter structure which we explain in the Behavioral Features Introduction . Done Head over to Execution to learn how to execute RAPIDS.","title":"Configuration"},{"location":"setup/configuration/#configuration","text":"You need to follow these steps to configure your RAPIDS deployment before you can extract behavioral features Verify RAPIDS can process your data streams Create your participants files Select what time segments you want to extract features on Choose the timezone of your study Configure your data streams Select what sensors and features you want to process When you are done with this configuration, go to executing RAPIDS . Hint Every time you see config[\"KEY\"] or [KEY] in these docs we are referring to the corresponding key in the config.yaml file.","title":"Configuration"},{"location":"setup/configuration/#supported-data-streams","text":"A data stream refers to sensor data collected using a specific type of device with a specific format and stored in a specific container . For example, the aware_mysql data stream handles smartphone data ( device ) collected with the AWARE Framework ( format ) stored in a MySQL database ( container ). Check the table in introduction to data streams to know what data streams we support. If your data stream is supported, continue to the next configuration section, you will use its label later in this guide (e.g. aware_mysql ). If your steam is not supported but you want to implement it, follow the tutorial to add support for new data streams and get in touch by email or in Slack if you have any questions.","title":"Supported data streams"},{"location":"setup/configuration/#participant-files","text":"Participant files link together multiple devices (smartphones and wearables) to specific participants and identify them throughout RAPIDS. You can create these files manually or automatically . Participant files are stored in data/external/participant_files/pxx.yaml and follow a unified structure . Remember to modify the config.yaml file with your PIDS The list PIDS in config.yaml needs to have the participant file names of the people you want to process. For example, if you created p01.yaml , p02.yaml and p03.yaml files in /data/external/participant_files/ , then PIDS should be: PIDS : [ p01 , p02 , p03 ] Optional: Migrating participants files with the old format If you were using the pre-release version of RAPIDS with participant files in plain text (as opposed to yaml), you can run the following command and your old files will be converted into yaml files stored in data/external/participant_files/ python tools/update_format_participant_files.py","title":"Participant files"},{"location":"setup/configuration/#structure-of-participants-files","text":"Example of the structure of a participant file In this example, the participant used an android phone, an ios phone, a fitbit device, and a Empatica device throughout the study between Apr 23 rd 2020 and Oct 28 th 2020 If your participants didn\u2019t use a [PHONE] , [FITBIT] or [EMPATICA] device, it is not necessary to include that section in their participant file. In other words, you can analyse data from 1 or more devices per participant. PHONE : DEVICE_IDS : [ a748ee1a-1d0b-4ae9-9074-279a2b6ba524 , dsadas-2324-fgsf-sdwr-gdfgs4rfsdf43 ] PLATFORMS : [ android , ios ] LABEL : test01 START_DATE : 2020-04-23 END_DATE : 2020-10-28 FITBIT : DEVICE_IDS : [ fitbit1 ] LABEL : test01 START_DATE : 2020-04-23 END_DATE : 2020-10-28 EMPATICA : DEVICE_IDS : [ empatica1 ] LABEL : test01 START_DATE : 2020-04-23 END_DATE : 2020-10-28 [PHONE] Key Description [DEVICE_IDS] An array of the strings that uniquely identify each smartphone, you can have more than one for when participants changed phones in the middle of the study. [PLATFORMS] An array that specifies the OS of each smartphone in [DEVICE_IDS] , use a combination of android or ios (we support participants that changed platforms in the middle of your study!). You can set [PLATFORMS]: [infer] and RAPIDS will infer them automatically (each phone data stream infer this differently, e.g. aware_mysql uses the aware_device table). [LABEL] A string that is used in reports and visualizations. [START_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected after this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [END_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected before this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [FITBIT] Key Description [DEVICE_IDS] An array of the strings that uniquely identify each Fitbit, you can have more than one in case the participant changed devices in the middle of the study. [LABEL] A string that is used in reports and visualizations. [START_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected after this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [END_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected before this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [EMPATICA] Key Description [DEVICE_IDS] An array of the strings that uniquely identify each Empatica device used by this participant. Since the most common use case involves having multiple zip files from a single device for each person, set this device id to an arbitrary string (we usually use their pid ) [LABEL] A string that is used in reports and visualizations. [START_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected after this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . [END_DATE] A string with format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS . Only data collected before this date time will be included in the analysis. By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 .","title":"Structure of participants files"},{"location":"setup/configuration/#automatic-creation-of-participant-files","text":"You can use a CSV file with a row per participant to automatically create participant files. AWARE_DEVICE_TABLE was deprecated In previous versions of RAPIDS, you could create participant files automatically using the aware_device table. We deprecated this option but you can still achieve the same results if you export the output of the following SQL query as a CSV file and follow the instructions below: SELECT device_id , device_id as fitbit_id , CONCAT ( \"p\" , _id ) as empatica_id , CONCAT ( \"p\" , _id ) as pid , if ( brand = \"iPhone\" , \"ios\" , \"android\" ) as platform , CONCAT ( \"p\" , _id ) as label , DATE_FORMAT ( FROM_UNIXTIME (( timestamp / 1000 ) - 86400 ), \"%Y-%m-%d\" ) as start_date , CURRENT_DATE as end_date from aware_device order by _id ; In your config.yaml : Set CSV_FILE_PATH to a CSV file path that complies with the specs described below Set the devices ( PHONE , FITBIT , EMPATICA ) [ADD] flag to TRUE depending on what devices you used in your study. CREATE_PARTICIPANT_FILES : CSV_FILE_PATH : \"your_path/to_your.csv\" PHONE_SECTION : ADD : TRUE # or FALSE IGNORED_DEVICE_IDS : [] FITBIT_SECTION : ADD : TRUE # or FALSE IGNORED_DEVICE_IDS : [] EMPATICA_SECTION : ADD : TRUE # or FALSE IGNORED_DEVICE_IDS : [] Your CSV file ( [CSV_FILE_PATH] ) should have the following columns (headers) but the values within each column can be empty: Column Description device_id Phone device id. Separate multiple ids with ; fitbit_id Fitbit device id. Separate multiple ids with ; empatica_id Empatica device id. Since the most common use case involves having multiple zip files from a single device for each person, set this device id to an arbitrary string (we usually use their pid ) pid Unique identifiers with the format pXXX (your participant files will be named with this string) platform Use android , ios or infer as explained above, separate values with ; label A human readable string that is used in reports and visualizations. start_date A string with format YYY-MM-DD or YYYY-MM-DD HH:MM:SS . By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . end_date A string with format YYY-MM-DD or YYYY-MM-DD HH:MM:SS . By default, YYYY-MM-DD is interpreted as YYYY-MM-DD 00:00:00 . Example We added white spaces to this example to make it easy to read but you don\u2019t have to. device_id ,fitbit_id, empatica_id ,pid ,label ,platform ,start_date ,end_date a748ee1a-1d0b-4ae9-9074-279a2b6ba524;dsadas-2324-fgsf-sdwr-gdfgs4rfsdf43 ,fitbit1 , p01 ,p01 ,julio ,android;ios ,2020-01-01 ,2021-01-01 4c4cf7a1-0340-44bc-be0f-d5053bf7390c ,fitbit2 , p02 ,p02 ,meng ,ios ,2021-01-01 ,2022-01-01 Then run snakemake -j1 create_participants_files","title":"Automatic creation of participant files"},{"location":"setup/configuration/#time-segments","text":"Time segments (or epochs) are the time windows on which you want to extract behavioral features. For example, you might want to process data on every day, every morning, or only during weekends. RAPIDS offers three categories of time segments that are flexible enough to cover most use cases: frequency (short time windows every day), periodic (arbitrary time windows on any day), and event (arbitrary time windows around events of interest). See also our examples . Frequency Segments These segments are computed on every day and all have the same duration (for example 30 minutes). Set the following keys in your config.yaml TIME_SEGMENTS : &time_segments TYPE : FREQUENCY FILE : \"data/external/your_frequency_segments.csv\" INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE The file pointed by [TIME_SEGMENTS][FILE] should have the following format and can only have 1 row. Column Description label A string that is used as a prefix in the name of your time segments length An integer representing the duration of your time segments in minutes Example label,length thirtyminutes,30 This configuration will compute 48 time segments for every day when any data from any participant was sensed. For example: start_time,length,label 00:00,30,thirtyminutes0000 00:30,30,thirtyminutes0001 01:00,30,thirtyminutes0002 01:30,30,thirtyminutes0003 ... Periodic Segments These segments can be computed every day, or on specific days of the week, month, quarter, and year. Their minimum duration is 1 minute but they can be as long as you want. Set the following keys in your config.yaml . TIME_SEGMENTS : &time_segments TYPE : PERIODIC FILE : \"data/external/your_periodic_segments.csv\" INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE # or TRUE If [INCLUDE_PAST_PERIODIC_SEGMENTS] is set to TRUE , RAPIDS will consider instances of your segments back enough in the past as to include the first row of data of each participant. For example, if the first row of data from a participant happened on Saturday March 7 th 2020 and the requested segment duration is 7 days starting on every Sunday, the first segment to be considered would start on Sunday March 1 st if [INCLUDE_PAST_PERIODIC_SEGMENTS] is TRUE or on Sunday March 8 th if FALSE . The file pointed by [TIME_SEGMENTS][FILE] should have the following format and can have multiple rows. Column Description label A string that is used as a prefix in the name of your time segments. It has to be unique between rows start_time A string with format HH:MM:SS representing the starting time of this segment on any day length A string representing the length of this segment.It can have one or more of the following strings XXD XXH XXM XXS to represent days, hours, minutes and seconds. For example 7D 23H 59M 59S repeats_on One of the follow options every_day , wday , qday , mday , and yday . The last four represent a week, quarter, month and year day repeats_value An integer complementing repeats_on . If you set repeats_on to every_day set this to 0 , otherwise 1-7 represent a wday starting from Mondays, 1-31 represent a mday , 1-91 represent a qday , and 1-366 represent a yday Example label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 morning,06:00:00,5H 59M 59S,every_day,0 afternoon,12:00:00,5H 59M 59S,every_day,0 evening,18:00:00,5H 59M 59S,every_day,0 night,00:00:00,5H 59M 59S,every_day,0 This configuration will create five segments instances ( daily , morning , afternoon , evening , night ) on any given day ( every_day set to 0). The daily segment will start at midnight and will last 23:59:59 , the other four segments will start at 6am, 12pm, 6pm, and 12am respectively and last for 05:59:59 . Event segments These segments can be computed before or after an event of interest (defined as any UNIX timestamp). Their minimum duration is 1 minute but they can be as long as you want. The start of each segment can be shifted backwards or forwards from the specified timestamp. Set the following keys in your config.yaml . TIME_SEGMENTS : &time_segments TYPE : EVENT FILE : \"data/external/your_event_segments.csv\" INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE # or TRUE The file pointed by [TIME_SEGMENTS][FILE] should have the following format and can have multiple rows. Column Description label A string that is used as a prefix in the name of your time segments. If labels are unique, every segment is independent; if two or more segments have the same label, their data will be grouped when computing auxiliary data for features like the most frequent contact for calls (the most frequent contact will be computed across all these segments). There cannot be two overlaping event segments with the same label (RAPIDS will throw an error) event_timestamp A UNIX timestamp that represents the moment an event of interest happened (clinical relapse, survey, readmission, etc.). The corresponding time segment will be computed around this moment using length , shift , and shift_direction length A string representing the length of this segment. It can have one or more of the following keys XXD XXH XXM XXS to represent a number of days, hours, minutes, and seconds. For example 7D 23H 59M 59S shift A string representing the time shift from event_timestamp . It can have one or more of the following keys XXD XXH XXM XXS to represent a number of days, hours, minutes and seconds. For example 7D 23H 59M 59S . Use this value to change the start of a segment with respect to its event_timestamp . For example, set this variable to 1H to create a segment that starts 1 hour from an event of interest ( shift_direction determines if it\u2019s before or after). shift_direction An integer representing whether the shift is before ( -1 ) or after ( 1 ) an event_timestamp device_id The device id (smartphone or fitbit) to whom this segment belongs to. You have to create a line in this event segment file for each event of a participant that you want to analyse. If you have participants with multiple device ids you can choose any of them Example label,event_timestamp,length,shift,shift_direction,device_id stress1,1587661220000,1H,5M,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress2,1587747620000,4H,4H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress3,1587906020000,3H,5M,1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress4,1584291600000,7H,4H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 stress5,1588172420000,9H,5M,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 mood,1587661220000,1H,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 mood,1587747620000,1D,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 mood,1587906020000,7D,0,0,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 This example will create eight segments for a single participant ( a748ee1a... ), five independent stressX segments with various lengths (1,4,3,7, and 9 hours). Segments stress1 , stress3 , and stress5 are shifted forwards by 5 minutes and stress2 and stress4 are shifted backwards by 4 hours (that is, if the stress4 event happened on March 15 th at 1pm EST ( 1584291600000 ), the time segment will start on that day at 9am and end at 4pm). The three mood segments are 1 hour, 1 day and 7 days long and have no shift. In addition, these mood segments are grouped together, meaning that although RAPIDS will compute features on each one of them, some necessary information to compute a few of such features will be extracted from all three segments, for example the phone contact that called a participant the most or the location clusters visited by a participant. Date time labels of event segments In the final feature file, you will find a row per event segment. The local_segment column of each row has a label , a start date-time string, and an end date-time string. weeklysurvey2060#2020-09-12 01 :00:00,2020-09-18 23 :59:59 All sensor data is always segmented based on timestamps, and the date-time strings are attached for informative purposes. For example, you can plot your features based on these strings. When you configure RAPIDS to work with a single time zone, such tz code will be used to convert start/end timestamps (the ones you typed in the event segments file) into start/end date-time strings. However, when you configure RAPIDS to work with multiple time zones, RAPIDS will use the most common time zone across all devices of every participant to do the conversion. The most common time zone is the one in which a participant spent the most time. In practical terms, this means that the date-time strings of event segments that happened in uncommon time zones will have shifted start/end date-time labels. However, the data within each segment was correctly filtered based on timestamps.","title":"Time Segments"},{"location":"setup/configuration/#segment-examples","text":"5-minutes Use the following Frequency segment file to create 288 (12 * 60 * 24) 5-minute segments starting from midnight of every day in your study label,length fiveminutes,5 Daily Use the following Periodic segment file to create daily segments starting from midnight of every day in your study label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 Morning Use the following Periodic segment file to create morning segments starting at 06:00:00 and ending at 11:59:59 of every day in your study label,start_time,length,repeats_on,repeats_value morning,06:00:00,5H 59M 59S,every_day,0 Overnight Use the following Periodic segment file to create overnight segments starting at 20:00:00 and ending at 07:59:59 (next day) of every day in your study label,start_time,length,repeats_on,repeats_value morning,20:00:00,11H 59M 59S,every_day,0 Weekly Use the following Periodic segment file to create non-overlapping weekly segments starting at midnight of every Monday in your study label,start_time,length,repeats_on,repeats_value weekly,00:00:00,6D 23H 59M 59S,wday,1 Use the following Periodic segment file to create overlapping weekly segments starting at midnight of every day in your study label,start_time,length,repeats_on,repeats_value weekly,00:00:00,6D 23H 59M 59S,every_day,0 Week-ends Use the following Periodic segment file to create week-end segments starting at midnight of every Saturday in your study label,start_time,length,repeats_on,repeats_value weekend,00:00:00,1D 23H 59M 59S,wday,6 Around surveys Use the following Event segment file to create two 2-hour segments that start 1 hour before surveys answered by 3 participants label,event_timestamp,length,shift,shift_direction,device_id survey1,1587661220000,2H,1H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 survey2,1587747620000,2H,1H,-1,a748ee1a-1d0b-4ae9-9074-279a2b6ba524 survey1,1587906020000,2H,1H,-1,rqtertsd-43ff-34fr-3eeg-efe4fergregr survey2,1584291600000,2H,1H,-1,rqtertsd-43ff-34fr-3eeg-efe4fergregr survey1,1588172420000,2H,1H,-1,klj34oi2-8frk-2343-21kk-324ljklewlr3 survey2,1584291600000,2H,1H,-1,klj34oi2-8frk-2343-21kk-324ljklewlr3","title":"Segment Examples"},{"location":"setup/configuration/#timezone-of-your-study","text":"","title":"Timezone of your study"},{"location":"setup/configuration/#single-timezone","text":"If your study only happened in a single time zone or you want to ignore short trips of your participants to different time zones, select the appropriate code form this list and change the following config key. Double-check your timezone code pick, for example, US Eastern Time is America/New_York not EST TIMEZONE : TYPE : SINGLE TZCODE : America/New_York","title":"Single timezone"},{"location":"setup/configuration/#multiple-timezones","text":"If your participants lived in different time zones or they traveled across time zones, and you know when participants\u2019 devices were in a specific time zone, RAPIDS can use this data to process your data streams with the correct date-time. You need to provide RAPIDS with the time zone data in a CSV file ( [TZCODES_FILE] ) in the format described below. TIMEZONE : TYPE : MULTIPLE SINGLE : TZCODE : America/New_York MULTIPLE : TZCODES_FILE : path_to/time_zones_csv.file IF_MISSING_TZCODE : STOP DEFAULT_TZCODE : America/New_York FITBIT : ALLOW_MULTIPLE_TZ_PER_DEVICE : False INFER_FROM_SMARTPHONE_TZ : False Parameters for [TIMEZONE] Parameter Description [TYPE] Either SINGLE or MULTIPLE as explained above [SINGLE][TZCODE] The time zone code from this list to be used across all devices [MULTIPLE][TZCODES_FILE] A CSV file containing the time zones in which participants\u2019 devices sensed data (see the required format below). Multiple devices can be linked to the same person, read more in Participants Files [MULTIPLE][IF_MISSING_TZCODE] When a device is missing from [TZCODES_FILE] Set this flag to STOP to stop RAPIDS execution and show an error, or to USE_DEFAULT to assign the time zone specified in [DEFAULT_TZCODE] to any such devices [MULTIPLE][FITBIT][ALLOW_MULTIPLE_TZ_PER_DEVICE] You only need to care about this flag if one or more Fitbit devices sensed data in one or more time zones, and you want RAPIDS to take into account this in its feature computation. Read more in \u201cHow does RAPIDS handle Fitbit devices?\u201d below. [MULTIPLE][FITBIT][INFER_FROM_SMARTPHONE_TZ] You only need to care about this flag if one or more Fitbit devices sensed data in one or more time zones, and you want RAPIDS to take into account this in its feature computation. Read more in \u201cHow does RAPIDS handle Fitbit devices?\u201d below. Format of TZCODES_FILE TZCODES_FILE has three columns and a row for each time zone a device visited (a device can be a smartphone or wearable (Fitbit/Empatica)): Column Description device_id A string that uniquely identifies a smartphone or wearable tzcode A string with the appropriate code from this list that represents the time zone where the device sensed data timestamp A UNIX timestamp indicating when was the first time this device_id sensed data in tzcode device_id, tzcode, timestamp 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/New_York, 1587500000000 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/Mexico_City, 1587600000000 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/Los_Angeles, 1587700000000 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Amsterdam, 1587100000000 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Berlin, 1587200000000 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Amsterdam, 1587300000000 Using this file, RAPDIS will create time zone intervals per device, for example for 13dbc8a3-dae3-4834-823a-4bc96a7d459d : Interval 1 [1587500000000, 1587599999999] for America/New_York Interval 2 [1587600000000, 1587699999999] for America/Mexico_City Interval 3 [1587700000000, now] for America/Los_Angeles Any sensor data row from a device will be assigned a timezone if it falls within that interval, for example: A screen row sensed at 1587533333333 will be assigned to America/New_York because it falls within Interval 1 A screen row sensed at 1587400000000 will be discarded because it was logged outside any interval. Can I get the TZCODES_FILE from the time zone table collected automatically by the AWARE app? Sure. You can put your timezone table ( timezone.csv ) collected by the AWARE app under data/external folder and run: python tools/create_multi_timezones_file.py The TZCODES_FILE will be saved as data/external/multiple_timezones.csv file. What happens if participant X lives in Los Angeles but participant Y lives in Amsterdam and they both stayed there during my study? Add a row per participant and set timestamp to 0 : device_id, tzcode, timestamp 13dbc8a3-dae3-4834-823a-4bc96a7d459d, America/Los_Angeles, 0 65sa66a5-2d2d-4524-946v-44ascbv4sad7, Europe/Amsterdam, 0 What happens if I forget to add a timezone for one or more devices? It depends on [IF_MISSING_TZCODE] . If [IF_MISSING_TZCODE] is set to STOP , RAPIDS will stop its execution and show you an error message. If [IF_MISSING_TZCODE] is set to USE_DEFAULT , it will assign the time zone specified in [DEFAULT_TZCODE] to any devices with missing time zone information in [TZCODES_FILE] . This is helpful if only a few of your participants had multiple timezones and you don\u2019t want to specify the same time zone for the rest. How does RAPIDS handle Fitbit devices? Fitbit devices are not time zone aware and they always log data with a local date-time string. When none of the Fitbit devices in your study changed time zones (e.g., p01 was always in New York and p02 was always in Amsterdam), you can set a single time zone per Fitbit device id along with a timestamp 0 (you can still assign multiple time zones to smartphone device ids) device_id, tzcode, timestamp fitbit123, America/New_York, 0 fitbit999, Europe/Amsterdam, 0 On the other hand, when at least one of your Fitbit devices changed time zones AND you want RAPIDS to take into account these changes, you need to set [ALLOW_MULTIPLE_TZ_PER_DEVICE] to True . You have to manually allow this option because you need to be aware it can produce inaccurate features around the times when time zones changed . This is because we cannot know exactly when the Fitbit device detected and processed the time zone change. If you want to ALLOW_MULTIPLE_TZ_PER_DEVICE you will need to add any time zone changes per device in the TZCODES_FILE as explained above. You could obtain this data by hand but if your participants also used a smartphone during your study, you can use their time zone logs. Recall that in RAPIDS every participant is represented with a participant file pXX.yaml , this file links together multiple devices and we will use it to know what smartphone time zone data should be applied to Fitbit devices. Thus set INFER_FROM_SMARTPHONE_TZ to TRUE , if you have included smartphone time zone data in your TZCODE_FILE and you want to make a participant\u2019s Fitbit data time zone aware with their respective smartphone data.","title":"Multiple timezones"},{"location":"setup/configuration/#data-stream-configuration","text":"Modify the following keys in your config.yaml depending on the data stream you want to process. Phone Set [PHONE_DATA_STREAMS][TYPE] to the smartphone data stream you want to process (e.g. aware_mysql ) and configure its parameters (e.g. [DATABASE_GROUP] ). Ignore the parameters of streams you are not using (e.g. [FOLDER] of aware_csv ). PHONE_DATA_STREAMS : USE : aware_mysql # AVAILABLE: aware_mysql : DATABASE_GROUP : MY_GROUP aware_csv : FOLDER : data/external/aware_csv aware_mysql Key Description [DATABASE_GROUP] A database credentials group. Read the instructions below to set it up Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine. aware_csv Key Description [FOLDER] Folder where you have to place a CSV file per phone sensor. Each file has to contain all the data from every participant you want to process. Fitbit Set [FITBIT_DATA_STREAMS][TYPE] to the Fitbit data stream you want to process (e.g. fitbitjson_mysql ) and configure its parameters (e.g. [DATABASE_GROUP] ). Ignore the parameters of the other streams you are not using (e.g. [FOLDER] of aware_csv ). Warning You will probably have to tell RAPIDS the name of the columns where you stored your Fitbit data. To do this, modify your chosen stream\u2019s format.yaml column mappings to match your raw data column names. FITBIT_DATA_STREAMS : USE : fitbitjson_mysql # AVAILABLE: fitbitjson_mysql : DATABASE_GROUP : MY_GROUP SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitjson_csv : FOLDER : data/external/fitbit_csv SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitparsed_mysql : DATABASE_GROUP : MY_GROUP SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitparsed_csv : FOLDER : data/external/fitbit_csv SLEEP_SUMMARY_EPISODE_DAY_ANCHOR : False fitbitjson_mysql This data stream process Fitbit data inside a JSON column as obtained from the Fitbit API and stored in a MySQL database. Read more about its column mappings and mutations in fitbitjson_mysql . Key Description [DATABASE_GROUP] A database credentials group. Read the instructions below to set it up [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine. fitbitjson_csv This data stream process Fitbit data inside a JSON column as obtained from the Fitbit API and stored in a CSV file. Read more about its column mappings and mutations in fitbitjson_csv . Key Description [FOLDER] Folder where you have to place a CSV file per Fitbit sensor. Each file has to contain all the data from every participant you want to process. [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). fitbitparsed_mysql This data stream process Fitbit data stored in multiple columns after being parsed from the JSON column returned by Fitbit API and stored in a MySQL database. Read more about its column mappings and mutations in fitbitparsed_mysql . Key Description [DATABASE_GROUP] A database credentials group. Read the instructions below to set it up [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine. fitbitparsed_csv This data stream process Fitbit data stored in multiple columns (plain text) after being parsed from the JSON column returned by Fitbit API and stored in a CSV file. Read more about its column mappings and mutations in fitbitparsed_csv . Key Description [FOLDER] Folder where you have to place a CSV file per Fitbit sensor. Each file has to contain all the data from every participant you want to process. [SLEEP_SUMMARY_EPISODE_DAY_ANCHOR] One of start or end . Summary sleep episodes are considered as events based on either the start timestamp or end timestamp (they will belong to the day where they start or end). Empatica Set [USE] to the Empatica data stream you want to use; see the table in introduction to data streams . Configure any parameters as indicated below. EMPATICA_DATA_STREAMS : USE : empatica_zip # AVAILABLE: empatica_zip : FOLDER : data/external/empatica empatica_zip Key Description [FOLDER] The relative path to a folder containing one subfolder per participant. The name of a participant folder should match their device_id assigned in their participant file. Each participant folder can have one or more zip files with any name; in other words, the sensor data in those zip files belong to a single participant. The zip files are automatically generated by Empatica and have a CSV file per sensor ( ACC , HR , TEMP , EDA , BVP , TAGS ). All CSV files of the same type contained in one or more zip files are uncompressed, parsed, sorted by timestamp, and joined together. Example of an EMPATICA FOLDER In the file tree below, we want to process three participants\u2019 data: p01 , p02 , and p03 . p01 has two zip files, p02 has only one zip file, and p03 has three zip files. Each zip has a CSV file per sensor that are joined together and processed by RAPIDS. data/ # this folder exists in the root RAPIDS folder external/ empatica/ p01/ file1.zip file2.zip p02/ aaaa.zip p03/ t1.zip t2.zip t3.zip","title":"Data Stream Configuration"},{"location":"setup/configuration/#sensor-and-features-to-process","text":"Finally, you need to modify the config.yaml section of the sensors you want to extract behavioral features from. All sensors follow the same naming nomenclature ( DEVICE_SENSOR ) and parameter structure which we explain in the Behavioral Features Introduction . Done Head over to Execution to learn how to execute RAPIDS.","title":"Sensor and Features to Process"},{"location":"setup/execution/","text":"Execution \u00b6 After you have installed and configured RAPIDS, use the following command to execute it. ./rapids -j1 Ready to extract behavioral features If you are ready to extract features head over to the Behavioral Features Introduction We wrap Snakemake The script ./rapids is a wrapper around Snakemake so you can pass any parameters that Snakemake accepts (e.g. -j1 ). Updating RAPIDS output after modifying config.yaml Any changes to the config.yaml file will be applied automatically and only the relevant files will be updated. This means that after modifying the features list for PHONE_MESSAGE for example, RAPIDS will execute the script that computes MESSAGES features and update its output file. Multi-core You can run RAPIDS over multiple cores by modifying the -j argument (e.g. use -j8 to use 8 cores). However , take into account that this means multiple sensor datasets for different participants will be loaded in memory at the same time. If RAPIDS crashes because it ran out of memory, reduce the number of cores and try again. As reference, we have run RAPIDS over 12 cores and 32 Gb of RAM without problems for a study with 200 participants with 14 days of low-frequency smartphone data (no accelerometer, gyroscope, or magnetometer). Deleting RAPIDS output If you want to delete all the output files RAPIDS produces, you can execute the following command: ./rapids -j1 --delete-all-output Forcing a complete rerun or updating your raw data in RAPIDS If you want to update your raw data or rerun the whole pipeline from scratch, run the following commands: ./rapids -j1 --delete-all-output ./rapids -j1","title":"Execution"},{"location":"setup/execution/#execution","text":"After you have installed and configured RAPIDS, use the following command to execute it. ./rapids -j1 Ready to extract behavioral features If you are ready to extract features head over to the Behavioral Features Introduction We wrap Snakemake The script ./rapids is a wrapper around Snakemake so you can pass any parameters that Snakemake accepts (e.g. -j1 ). Updating RAPIDS output after modifying config.yaml Any changes to the config.yaml file will be applied automatically and only the relevant files will be updated. This means that after modifying the features list for PHONE_MESSAGE for example, RAPIDS will execute the script that computes MESSAGES features and update its output file. Multi-core You can run RAPIDS over multiple cores by modifying the -j argument (e.g. use -j8 to use 8 cores). However , take into account that this means multiple sensor datasets for different participants will be loaded in memory at the same time. If RAPIDS crashes because it ran out of memory, reduce the number of cores and try again. As reference, we have run RAPIDS over 12 cores and 32 Gb of RAM without problems for a study with 200 participants with 14 days of low-frequency smartphone data (no accelerometer, gyroscope, or magnetometer). Deleting RAPIDS output If you want to delete all the output files RAPIDS produces, you can execute the following command: ./rapids -j1 --delete-all-output Forcing a complete rerun or updating your raw data in RAPIDS If you want to update your raw data or rerun the whole pipeline from scratch, run the following commands: ./rapids -j1 --delete-all-output ./rapids -j1","title":"Execution"},{"location":"setup/installation/","text":"Installation \u00b6 You can install RAPIDS using Docker (the fastest), or native instructions for MacOS and Linux (Ubuntu). Windows is supported through Docker or WSL. Docker Install Docker Pull our RAPIDS container docker pull moshiresearch/rapids:latest Run RAPIDS' container (after this step is done you should see a prompt in the main RAPIDS folder with its python environment active) docker run -it moshiresearch/rapids:latest Pull the latest version of RAPIDS git pull Make RAPIDS script executable chmod +x rapids Check that RAPIDS is working ./rapids -j1 Optional . You can edit RAPIDS files with vim but we recommend using Visual Studio Code and its Remote Containers extension How to configure Remote Containers extension Make sure RAPIDS container is running Install the Remote - Containers extension Go to the Remote Explorer panel on the left hand sidebar On the top right dropdown menu choose Containers Double click on the moshiresearch/rapids container in the CONTAINERS tree A new VS Code session should open on RAPIDS main folder inside the container. Warning If you installed RAPIDS using Docker for Windows on Windows 10, the container will have limits on the amount of RAM it can use. If you find that RAPIDS crashes due to running out of memory, increase this limit. MacOS We tested these instructions in Catalina and Big Sur M1 Macs RAPIDS can run on M1 Macs, the only changes as of Feb 21, 2021 are: R needs to be installed via brew under Rosetta (x86 arch) due to incompatibility issues with some R libraries. To do this, run your terminal via Rosetta , then proceed with the usual brew installation command. Use x86 brew to install R and restore RAPIDS\u2019 packages ( snakemake -j1 renv_install & snakemake -j1 renv_restore ). There is a bug related to timezone codes. We set the correct TZ_DIR in renv/activate.R (line #19) Sys.setenv(\"TZDIR\" = file.path(R.home(), \"share\", \"zoneinfo\")) (RAPIDS does this automatically). Install brew Install MySQL brew install mysql brew services start mysql Install R 4.0, pandoc and rmarkdown. If you have other instances of R, we recommend uninstalling them brew install r brew install pandoc Rscript --vanilla -e 'install.packages(\"rmarkdown\", repos=\"http://cran.us.r-project.org\")' Install miniconda (restart your terminal afterwards) brew cask install miniconda conda init zsh # (or conda init bash) Clone our repo git clone https://github.com/carissalow/rapids Create a python virtual environment cd rapids conda env create -f environment.yml -n rapids conda activate rapids Install R packages and virtual environment: snakemake -j1 renv_install snakemake -j1 renv_restore Note This step could take several minutes to complete, especially if you have less than 3Gb of RAM or packages need to be compiled from source. Please be patient and let it run until completion. Make RAPIDS script executable chmod +x rapids Check that RAPIDS is working ./rapids -j1 Ubuntu We tested RAPIDS on Ubuntu 18.04 & 20.04. Note that the necessary Python and R packages are available in other Linux distributions, so if you decide to give it a try, let us know and we can update these docs. Install dependencies sudo apt install libcurl4-openssl-dev sudo apt install libssl-dev sudo apt install libxml2-dev sudo apt install libglpk40 Install MySQL sudo apt install libmysqlclient-dev sudo apt install mysql-server Add key for R\u2019s repository. sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 Add R\u2019s repository Ubuntu 18.04 Bionic sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' Ubuntu 20.04 Focal sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' Install R 4.0. If you have other instances of R, we recommend uninstalling them sudo apt update sudo apt install r-base Install Pandoc and rmarkdown sudo apt install pandoc Rscript --vanilla -e 'install.packages(\"rmarkdown\", repos=\"http://cran.us.r-project.org\")' Install git sudo apt install git Install miniconda Restart your current shell Clone our repo: git clone https://github.com/carissalow/rapids Create a python virtual environment: cd rapids conda env create -f environment.yml -n MY_ENV_NAME conda activate MY_ENV_NAME Install the R virtual environment management package (renv) snakemake -j1 renv_install Restore the R virtual environment Ubuntu 18.04 Bionic (fast) Run the following command to restore the R virtual environment using RSPM binaries R -e 'renv::restore(repos = c(CRAN = \"https://packagemanager.rstudio.com/all/__linux__/bionic/latest\"))' Ubuntu 20.04 Focal (fast) Run the following command to restore the R virtual environment using RSPM binaries R -e 'renv::restore(repos = c(CRAN = \"https://packagemanager.rstudio.com/all/__linux__/focal/latest\"))' Ubuntu (slow) If the fast installation command failed for some reason, you can restore the R virtual environment from source: R -e 'renv::restore()' Note This step could take several minutes to complete, especially if you have less than 3Gb of RAM or packages need to be compiled from source. Please be patient and let it run until completion. Make RAPIDS script executable chmod +x rapids Check that RAPIDS is working ./rapids -j1 Windows There are several options varying in complexity: You can use our Docker instructions (tested) You can use our Ubuntu 20.04 instructions on WSL2 (not tested but it will likely work) Native installation (experimental). If you would like to contribute to RAPIDS you could try to install MySQL, miniconda, Python, and R 4.0+ in Windows and restore the Python and R virtual environments using steps 6 and 7 of the instructions for Mac. You can get in touch if you would like to discuss this with the team.","title":"Installation"},{"location":"setup/installation/#installation","text":"You can install RAPIDS using Docker (the fastest), or native instructions for MacOS and Linux (Ubuntu). Windows is supported through Docker or WSL. Docker Install Docker Pull our RAPIDS container docker pull moshiresearch/rapids:latest Run RAPIDS' container (after this step is done you should see a prompt in the main RAPIDS folder with its python environment active) docker run -it moshiresearch/rapids:latest Pull the latest version of RAPIDS git pull Make RAPIDS script executable chmod +x rapids Check that RAPIDS is working ./rapids -j1 Optional . You can edit RAPIDS files with vim but we recommend using Visual Studio Code and its Remote Containers extension How to configure Remote Containers extension Make sure RAPIDS container is running Install the Remote - Containers extension Go to the Remote Explorer panel on the left hand sidebar On the top right dropdown menu choose Containers Double click on the moshiresearch/rapids container in the CONTAINERS tree A new VS Code session should open on RAPIDS main folder inside the container. Warning If you installed RAPIDS using Docker for Windows on Windows 10, the container will have limits on the amount of RAM it can use. If you find that RAPIDS crashes due to running out of memory, increase this limit. MacOS We tested these instructions in Catalina and Big Sur M1 Macs RAPIDS can run on M1 Macs, the only changes as of Feb 21, 2021 are: R needs to be installed via brew under Rosetta (x86 arch) due to incompatibility issues with some R libraries. To do this, run your terminal via Rosetta , then proceed with the usual brew installation command. Use x86 brew to install R and restore RAPIDS\u2019 packages ( snakemake -j1 renv_install & snakemake -j1 renv_restore ). There is a bug related to timezone codes. We set the correct TZ_DIR in renv/activate.R (line #19) Sys.setenv(\"TZDIR\" = file.path(R.home(), \"share\", \"zoneinfo\")) (RAPIDS does this automatically). Install brew Install MySQL brew install mysql brew services start mysql Install R 4.0, pandoc and rmarkdown. If you have other instances of R, we recommend uninstalling them brew install r brew install pandoc Rscript --vanilla -e 'install.packages(\"rmarkdown\", repos=\"http://cran.us.r-project.org\")' Install miniconda (restart your terminal afterwards) brew cask install miniconda conda init zsh # (or conda init bash) Clone our repo git clone https://github.com/carissalow/rapids Create a python virtual environment cd rapids conda env create -f environment.yml -n rapids conda activate rapids Install R packages and virtual environment: snakemake -j1 renv_install snakemake -j1 renv_restore Note This step could take several minutes to complete, especially if you have less than 3Gb of RAM or packages need to be compiled from source. Please be patient and let it run until completion. Make RAPIDS script executable chmod +x rapids Check that RAPIDS is working ./rapids -j1 Ubuntu We tested RAPIDS on Ubuntu 18.04 & 20.04. Note that the necessary Python and R packages are available in other Linux distributions, so if you decide to give it a try, let us know and we can update these docs. Install dependencies sudo apt install libcurl4-openssl-dev sudo apt install libssl-dev sudo apt install libxml2-dev sudo apt install libglpk40 Install MySQL sudo apt install libmysqlclient-dev sudo apt install mysql-server Add key for R\u2019s repository. sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 Add R\u2019s repository Ubuntu 18.04 Bionic sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' Ubuntu 20.04 Focal sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' Install R 4.0. If you have other instances of R, we recommend uninstalling them sudo apt update sudo apt install r-base Install Pandoc and rmarkdown sudo apt install pandoc Rscript --vanilla -e 'install.packages(\"rmarkdown\", repos=\"http://cran.us.r-project.org\")' Install git sudo apt install git Install miniconda Restart your current shell Clone our repo: git clone https://github.com/carissalow/rapids Create a python virtual environment: cd rapids conda env create -f environment.yml -n MY_ENV_NAME conda activate MY_ENV_NAME Install the R virtual environment management package (renv) snakemake -j1 renv_install Restore the R virtual environment Ubuntu 18.04 Bionic (fast) Run the following command to restore the R virtual environment using RSPM binaries R -e 'renv::restore(repos = c(CRAN = \"https://packagemanager.rstudio.com/all/__linux__/bionic/latest\"))' Ubuntu 20.04 Focal (fast) Run the following command to restore the R virtual environment using RSPM binaries R -e 'renv::restore(repos = c(CRAN = \"https://packagemanager.rstudio.com/all/__linux__/focal/latest\"))' Ubuntu (slow) If the fast installation command failed for some reason, you can restore the R virtual environment from source: R -e 'renv::restore()' Note This step could take several minutes to complete, especially if you have less than 3Gb of RAM or packages need to be compiled from source. Please be patient and let it run until completion. Make RAPIDS script executable chmod +x rapids Check that RAPIDS is working ./rapids -j1 Windows There are several options varying in complexity: You can use our Docker instructions (tested) You can use our Ubuntu 20.04 instructions on WSL2 (not tested but it will likely work) Native installation (experimental). If you would like to contribute to RAPIDS you could try to install MySQL, miniconda, Python, and R 4.0+ in Windows and restore the Python and R virtual environments using steps 6 and 7 of the instructions for Mac. You can get in touch if you would like to discuss this with the team.","title":"Installation"},{"location":"setup/overview/","text":"Overview \u00b6 Let\u2019s review some key concepts we use throughout these docs: Definition Description Data Stream Set of sensor data collected using a specific type of device with a specific format and stored in a specific container . For example, smartphone (device) data collected with the AWARE Framework (format) and stored in a MySQL database (container). Device A mobile or wearable device, like smartphones, Fitbit wrist bands, Oura Rings, etc. Sensor A physical or digital module builtin in a device that produces a data stream. For example, a smartphone\u2019s accelerometer or screen. Format A file in RAPIDS that describes how sensor data from a device matches RAPIDS data representation. Container An electronic repository of data, it can be a database, a file, a Web API, etc. RAPIDS connects to containers through container scripts. Participant A person that took part in a monitoring study Behavioral feature A metric computed from raw sensor data quantifying the behavior of a participant. For example, time spent at home computed from location data. These are also known as digital biomarkers Time segment Time segments (or epochs) are the time windows on which RAPIDS extracts behavioral features. For example, you might want to compute participants\u2019 time at home every morning or only during weekends. You define time segments in a CSV file that RAPIDS processes. Time zone A string code like America/New_York that represents a time zone where a device logged data. You can process data collected in single or multiple time zones. Provider A script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . config.yaml A YAML file where you can modify parameters to process data streams and behavioral features. This is the heart of RAPIDS and the file that you will modify the most. credentials.yaml A YAML file where you can define credential groups (user, password, host, etc.) if your data stream needs to connect to a database or Web API Participant file(s) A YAML file that links one or more smartphone or wearable devices that a single participant used. RAPIDS needs one file per participant. What can I do with RAPIDS? You can do one or more of these things with RAPIDS: Extract behavioral features from smartphone, Fitbit, and Empatica\u2019s supported data streams Add your own behavioral features (we can include them in RAPIDS if you want to share them with the community) Add support for new data streams if yours cannot be processed by RAPIDS yet Create visualizations for data quality control and feature inspection Extending RAPIDS to organize your analysis and publish a code repository along with your code Hint In order to follow any of the previous tutorials, you will have to Install , Configure , and learn how to Execute RAPIDS. We recommend you follow the Minimal Example tutorial to get familiar with RAPIDS Email us , leave a comment in these docs, create a Github issue or text us in Slack if you have any questions Frequently Asked Questions \u00b6 General \u00b6 What exactly is RAPIDS? RAPIDS is a group of configuration files and R and Python scripts that are executed by Snakemake . You can get a copy of RAPIDS by cloning our Github repository. RAPIDS is not a web application or server; all the processing is done in your laptop, server, or computer cluster. How does RAPIDS work? You will most of the time only have to modify configuration files in YAML format ( config.yaml , credentials.yaml , and participant files pxx.yaml ), and in CSV format (time zones and time segments). RAPIDS pulls data from different data containers and processes it in steps. The input/output of each step is saved as a CSV file for inspection; you can check the files that are created for each sensor on its documentation page. All data is stored in data/ , and all processing Python and R scripts are stored in src/ . User and File interactions in RAPIDS In the figure below, we represent the interactions between users and files. After a user modifies the configuration files mentioned above, the Snakefile file will search for and execute the Snakemake rules that contain the Python or R scripts necessary to generate or update the required output files (behavioral features, plots, etc.). Interaction diagram between the user, and important files in RAPIDS Data flow in RAPIDS In the figure below, we represent the flow of data in RAPIDS. In broad terms, smartphone and wearable devices log data streams with a certain format to a data container (database, file, etc.). RAPIDS can connect to these containers if it has a format.yaml and a container.[R|py] script used to pull the correct data and mutate it to comply with RAPIDS\u2019 internal data representation. Once the data stream is in RAPIDS, it goes through some basic transformations (scripts), one that assigns a time segment and a time zone to each data row, and another one that creates \u201cepisodes\u201d of data for some sensors that need it (like screen, battery, activity recognition, and sleep intraday data). After this, RAPIDS executes the requested PROVIDER script that computes behavioral features per time segment instance. After every feature is computed, they are joined per sensor, per participant, and study. Visualizations are built based on raw data or based on computed features. Data stream flow in RAPIDS Is my data private? Absolutely, you are processing your data with your own copy of RAPIDS in your laptop, server, or computer cluster, so neither we nor anyone else can have access to your datasets. Do I need to have coding skills to use RAPIDS? If you want to extract the behavioral features or visualizations that RAPIDS offers out of the box, the answer is no. However, you need to be comfortable running commands in your terminal and familiar with editing YAML files and CSV files. If you want to add support for new data streams or behavioral features, you need to be familiar with R or Python. Is RAPIDS open-source or free? Yes, RAPIDS is both open-source and free. How do I cite RAPIDS? Please refer to our Citation guide ; depending on what parts of RAPIDS you used, we also ask you to cite the work of other authors that shared their work. I have a lot of data, can RAPIDS handle it/ is RAPIDS fast enough? Yes, we use Snakemake under the hood, so you can automatically distribute RAPIDS execution over multiple cores or clusters . RAPIDS processes data per sensor and participant, so it can take advantage of this parallel processing. What are the advantages of using RAPIDS over implementing my own analysis code? We believe RAPIDS can benefit your analysis in several ways: RAPIDS has more than 250 behavioral features available, many of them tested and used by other researchers. RAPIDS can extract features in dynamic time segments (for example, every x minutes, x hours, x days, x weeks, x months, etc.). This is handy because you don\u2019t have to deal with time zones, day light saving changes, or date arithmetic. Your analysis is less prone to errors. Every participant sensor dataset is analyzed in the same way and isolated from each other. If you have lots of data, out-of-the-box parallel execution will speed up your analysis and if your computer crashes, RAPIDS will start from where it left of. You can publish your analysis code along with your papers and be sure it will run exactly as it does in your computer. You can still add your own behavioral features and data streams if you need to, and the community will be able to reuse your work. Data Streams \u00b6 Can I process smartphone data collected with Beiwe, PurpleRobot, or app X? Yes, but you need to add a new data stream to RAPIDS (a new format.yaml and container script in R or Python). Follow this tutorial . Email us , create a Github issue or text us in Slack if you have any questions. If you do so, let us know so we can integrate your work into RAPIDS. Can I process data from Oura Rings, Actigraphs, or wearable X? The only wearables we support at the moment are Empatica and Fitbit. However, get in touch if you need to process data from a different wearable. We have limited resources so we add support for different devices on an as-needed basis, but we would be happy to collaborate with you to add new wearables. Email us , create a Github issue or text us in Slack if you have any questions. Can I process smartphone or wearable data stored in PostgreSQL, Oracle, SQLite, CSV files, or data container X? Yes, but you need to add a new data stream to RAPIDS (a new format.yaml and container script in R or Python). Follow this tutorial . If you are processing data streams we already support like AWARE, Fitbit, or Empatica and are just connecting to a different container; you can reuse their format.yaml and only implement a new container script. Email us , create a Github issue or text us in Slack if you have any questions. If you do so, let us know so we can integrate your work into RAPIDS. I have participants that live in different time zones and some that travel; can RAPIDS handle this? Yes, RAPIDS can handle single or multiple timezones per participant. You can use time zone data collected by smartphones or collected by hand. Some of my participants used more than one device during my study; can RAPIDS handle this? Yes, you can link more than one smartphone or wearable device to a single participant. RAPIDS will merge them and sort them automatically. Some of my participants switched from Android to iOS or vice-versa during my study; can RAPIDS handle this? Yes, data from multiple smartphones can be linked to a single participant. All iOS data is converted to Android data before merging it. Extending RAPIDS \u00b6 Can I add my own behavioral features/digital biomarkers? Yes, you can implement your own features in R or Python following this tutorial Can I extract behavioral features based on two or more sensors? Yes, we do this for PHONE_DATA_YIELD (combines all phone sensors), PHONE_LOCATIONS (combines location and data yield data), PHONE_APPLICATIONS_BACKGROUND (combines screen and app usage data), and FITBIT_INTRADAY_STEPS (combines Fitbit and sleep and step data). However, we haven\u2019t come up with a user-friendly way to configure this, and currently, we join sensors on a case-by-case basis. This is mainly because not enough users have needed this functionality so far. Get in touch, and we can set it up together; the more use cases we are aware of, the easier it will be to integrate this into RAPIDS. I know how to program in Python or R but not both. Can I still use or extend RAPIDS? Yes, you don\u2019t need to write any code to use RAPIDS out of the box. If you need to add support for new data streams or behavioral features you can use scripts in either language. I have scripts that clean raw data from X sensor, can I use them with RAPIDS? Yes, you can add them as a [MUTATION][SCRIPT] in the format.yaml of the data stream you are using. You will add a main function that will receive a data frame with the raw data for that sensor that in turn will be used to compute behavioral features.","title":"Overview"},{"location":"setup/overview/#overview","text":"Let\u2019s review some key concepts we use throughout these docs: Definition Description Data Stream Set of sensor data collected using a specific type of device with a specific format and stored in a specific container . For example, smartphone (device) data collected with the AWARE Framework (format) and stored in a MySQL database (container). Device A mobile or wearable device, like smartphones, Fitbit wrist bands, Oura Rings, etc. Sensor A physical or digital module builtin in a device that produces a data stream. For example, a smartphone\u2019s accelerometer or screen. Format A file in RAPIDS that describes how sensor data from a device matches RAPIDS data representation. Container An electronic repository of data, it can be a database, a file, a Web API, etc. RAPIDS connects to containers through container scripts. Participant A person that took part in a monitoring study Behavioral feature A metric computed from raw sensor data quantifying the behavior of a participant. For example, time spent at home computed from location data. These are also known as digital biomarkers Time segment Time segments (or epochs) are the time windows on which RAPIDS extracts behavioral features. For example, you might want to compute participants\u2019 time at home every morning or only during weekends. You define time segments in a CSV file that RAPIDS processes. Time zone A string code like America/New_York that represents a time zone where a device logged data. You can process data collected in single or multiple time zones. Provider A script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . config.yaml A YAML file where you can modify parameters to process data streams and behavioral features. This is the heart of RAPIDS and the file that you will modify the most. credentials.yaml A YAML file where you can define credential groups (user, password, host, etc.) if your data stream needs to connect to a database or Web API Participant file(s) A YAML file that links one or more smartphone or wearable devices that a single participant used. RAPIDS needs one file per participant. What can I do with RAPIDS? You can do one or more of these things with RAPIDS: Extract behavioral features from smartphone, Fitbit, and Empatica\u2019s supported data streams Add your own behavioral features (we can include them in RAPIDS if you want to share them with the community) Add support for new data streams if yours cannot be processed by RAPIDS yet Create visualizations for data quality control and feature inspection Extending RAPIDS to organize your analysis and publish a code repository along with your code Hint In order to follow any of the previous tutorials, you will have to Install , Configure , and learn how to Execute RAPIDS. We recommend you follow the Minimal Example tutorial to get familiar with RAPIDS Email us , leave a comment in these docs, create a Github issue or text us in Slack if you have any questions","title":"Overview"},{"location":"setup/overview/#frequently-asked-questions","text":"","title":"Frequently Asked Questions"},{"location":"setup/overview/#general","text":"What exactly is RAPIDS? RAPIDS is a group of configuration files and R and Python scripts that are executed by Snakemake . You can get a copy of RAPIDS by cloning our Github repository. RAPIDS is not a web application or server; all the processing is done in your laptop, server, or computer cluster. How does RAPIDS work? You will most of the time only have to modify configuration files in YAML format ( config.yaml , credentials.yaml , and participant files pxx.yaml ), and in CSV format (time zones and time segments). RAPIDS pulls data from different data containers and processes it in steps. The input/output of each step is saved as a CSV file for inspection; you can check the files that are created for each sensor on its documentation page. All data is stored in data/ , and all processing Python and R scripts are stored in src/ . User and File interactions in RAPIDS In the figure below, we represent the interactions between users and files. After a user modifies the configuration files mentioned above, the Snakefile file will search for and execute the Snakemake rules that contain the Python or R scripts necessary to generate or update the required output files (behavioral features, plots, etc.). Interaction diagram between the user, and important files in RAPIDS Data flow in RAPIDS In the figure below, we represent the flow of data in RAPIDS. In broad terms, smartphone and wearable devices log data streams with a certain format to a data container (database, file, etc.). RAPIDS can connect to these containers if it has a format.yaml and a container.[R|py] script used to pull the correct data and mutate it to comply with RAPIDS\u2019 internal data representation. Once the data stream is in RAPIDS, it goes through some basic transformations (scripts), one that assigns a time segment and a time zone to each data row, and another one that creates \u201cepisodes\u201d of data for some sensors that need it (like screen, battery, activity recognition, and sleep intraday data). After this, RAPIDS executes the requested PROVIDER script that computes behavioral features per time segment instance. After every feature is computed, they are joined per sensor, per participant, and study. Visualizations are built based on raw data or based on computed features. Data stream flow in RAPIDS Is my data private? Absolutely, you are processing your data with your own copy of RAPIDS in your laptop, server, or computer cluster, so neither we nor anyone else can have access to your datasets. Do I need to have coding skills to use RAPIDS? If you want to extract the behavioral features or visualizations that RAPIDS offers out of the box, the answer is no. However, you need to be comfortable running commands in your terminal and familiar with editing YAML files and CSV files. If you want to add support for new data streams or behavioral features, you need to be familiar with R or Python. Is RAPIDS open-source or free? Yes, RAPIDS is both open-source and free. How do I cite RAPIDS? Please refer to our Citation guide ; depending on what parts of RAPIDS you used, we also ask you to cite the work of other authors that shared their work. I have a lot of data, can RAPIDS handle it/ is RAPIDS fast enough? Yes, we use Snakemake under the hood, so you can automatically distribute RAPIDS execution over multiple cores or clusters . RAPIDS processes data per sensor and participant, so it can take advantage of this parallel processing. What are the advantages of using RAPIDS over implementing my own analysis code? We believe RAPIDS can benefit your analysis in several ways: RAPIDS has more than 250 behavioral features available, many of them tested and used by other researchers. RAPIDS can extract features in dynamic time segments (for example, every x minutes, x hours, x days, x weeks, x months, etc.). This is handy because you don\u2019t have to deal with time zones, day light saving changes, or date arithmetic. Your analysis is less prone to errors. Every participant sensor dataset is analyzed in the same way and isolated from each other. If you have lots of data, out-of-the-box parallel execution will speed up your analysis and if your computer crashes, RAPIDS will start from where it left of. You can publish your analysis code along with your papers and be sure it will run exactly as it does in your computer. You can still add your own behavioral features and data streams if you need to, and the community will be able to reuse your work.","title":"General"},{"location":"setup/overview/#data-streams","text":"Can I process smartphone data collected with Beiwe, PurpleRobot, or app X? Yes, but you need to add a new data stream to RAPIDS (a new format.yaml and container script in R or Python). Follow this tutorial . Email us , create a Github issue or text us in Slack if you have any questions. If you do so, let us know so we can integrate your work into RAPIDS. Can I process data from Oura Rings, Actigraphs, or wearable X? The only wearables we support at the moment are Empatica and Fitbit. However, get in touch if you need to process data from a different wearable. We have limited resources so we add support for different devices on an as-needed basis, but we would be happy to collaborate with you to add new wearables. Email us , create a Github issue or text us in Slack if you have any questions. Can I process smartphone or wearable data stored in PostgreSQL, Oracle, SQLite, CSV files, or data container X? Yes, but you need to add a new data stream to RAPIDS (a new format.yaml and container script in R or Python). Follow this tutorial . If you are processing data streams we already support like AWARE, Fitbit, or Empatica and are just connecting to a different container; you can reuse their format.yaml and only implement a new container script. Email us , create a Github issue or text us in Slack if you have any questions. If you do so, let us know so we can integrate your work into RAPIDS. I have participants that live in different time zones and some that travel; can RAPIDS handle this? Yes, RAPIDS can handle single or multiple timezones per participant. You can use time zone data collected by smartphones or collected by hand. Some of my participants used more than one device during my study; can RAPIDS handle this? Yes, you can link more than one smartphone or wearable device to a single participant. RAPIDS will merge them and sort them automatically. Some of my participants switched from Android to iOS or vice-versa during my study; can RAPIDS handle this? Yes, data from multiple smartphones can be linked to a single participant. All iOS data is converted to Android data before merging it.","title":"Data Streams"},{"location":"setup/overview/#extending-rapids","text":"Can I add my own behavioral features/digital biomarkers? Yes, you can implement your own features in R or Python following this tutorial Can I extract behavioral features based on two or more sensors? Yes, we do this for PHONE_DATA_YIELD (combines all phone sensors), PHONE_LOCATIONS (combines location and data yield data), PHONE_APPLICATIONS_BACKGROUND (combines screen and app usage data), and FITBIT_INTRADAY_STEPS (combines Fitbit and sleep and step data). However, we haven\u2019t come up with a user-friendly way to configure this, and currently, we join sensors on a case-by-case basis. This is mainly because not enough users have needed this functionality so far. Get in touch, and we can set it up together; the more use cases we are aware of, the easier it will be to integrate this into RAPIDS. I know how to program in Python or R but not both. Can I still use or extend RAPIDS? Yes, you don\u2019t need to write any code to use RAPIDS out of the box. If you need to add support for new data streams or behavioral features you can use scripts in either language. I have scripts that clean raw data from X sensor, can I use them with RAPIDS? Yes, you can add them as a [MUTATION][SCRIPT] in the format.yaml of the data stream you are using. You will add a main function that will receive a data frame with the raw data for that sensor that in turn will be used to compute behavioral features.","title":"Extending RAPIDS"},{"location":"snippets/aware_format/","text":"If you collected sensor data with the vanilla (original) AWARE mobile clients, you shouldn\u2019t need to modify this format (described below). Remember that a format maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs . The yaml file that describes the format of this data stream is at: src/data/streams/aware_csv/format.yaml For some sensors, we need to transform iOS data into Android format; you can refer to OS complex mapping for learn how this works. Hint The mappings in this stream (RAPIDS/Stream) are the same names because AWARE data was the first stream RAPIDS supported, meaning that it considers AWARE column names the default. PHONE_ACCELEROMETER ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_VALUES_0 double_values_0 DOUBLE_VALUES_1 double_values_1 DOUBLE_VALUES_2 double_values_2 MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_ACTIVITY_RECOGNITION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME activity_name ACTIVITY_TYPE activity_type CONFIDENCE confidence MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id ACTIVITY_NAME FLAG_TO_MUTATE ACTIVITY_TYPE FLAG_TO_MUTATE CONFIDENCE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column ACTIVITIES activities CONFIDENCE confidence SCRIPTS src/data/streams/mutations/phone/aware/activity_recogniton_ios_unification.R Note For RAPIDS columns of ACTIVITY_NAME and ACTIVITY_TYPE : if stream\u2019s activities field is automotive, set ACTIVITY_NAME = in_vehicle and ACTIVITY_TYPE = 0 if stream\u2019s activities field is cycling, set ACTIVITY_NAME = on_bicycle and ACTIVITY_TYPE = 1 if stream\u2019s activities field is walking, set ACTIVITY_NAME = walking and ACTIVITY_TYPE = 7 if stream\u2019s activities field is running, set ACTIVITY_NAME = running and ACTIVITY_TYPE = 8 if stream\u2019s activities field is stationary, set ACTIVITY_NAME = still and ACTIVITY_TYPE = 3 if stream\u2019s activities field is unknown, set ACTIVITY_NAME = unknown and ACTIVITY_TYPE = 4 For RAPIDS CONFIDENCE column: if stream\u2019s confidence field is 0, set CONFIDENCE = 0 if stream\u2019s confidence field is 1, set CONFIDENCE = 50 if stream\u2019s confidence field is 2, set CONFIDENCE = 100 PHONE_APPLICATIONS_CRASHES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name APPLICATION_VERSION application_version ERROR_SHORT error_short ERROR_LONG error_long ERROR_CONDITION error_condition IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_FOREGROUND ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name IS_SYSTEM_APP is_system_app MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_APPLICATIONS_NOTIFICATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name APPLICATION_NAME application_name TEXT text SOUND sound VIBRATE vibrate DEFAULTS defaults FLAGS flags MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_BATTERY ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS battery_status BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Client V1 RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BATTERY_STATUS FLAG_TO_MUTATE BATTERY_LEVEL battery_level BATTERY_SCALE battery_scale MUTATION COLUMN_MAPPINGS Script column Stream column BATTERY_STATUS battery_status SCRIPTS src/data/streams/mutations/phone/aware/battery_ios_unification.R Note For RAPIDS BATTERY_STATUS column: if stream\u2019s battery_status field is 3, set BATTERY_STATUS = 5 (full status) if stream\u2019s battery_status field is 1, set BATTERY_STATUS = 3 (discharge) IOS Client V2 Same as ANDROID PHONE_BLUETOOTH ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id BT_ADDRESS bt_address BT_NAME bt_name BT_RSSI bt_rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android). PHONE_CALLS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE call_type CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id CALL_TYPE FLAG_TO_MUTATE CALL_DURATION call_duration TRACE trace MUTATION COLUMN_MAPPINGS Script column Stream column CALL_TYPE call_type SCRIPTS src/data/streams/mutations/phone/aware/calls_ios_unification.R Note We transform iOS call logs into Android\u2019s format. iOS stores call status: 1=incoming, 2=connected, 3=dialing, 4=disconnected, as opposed to Android\u2019s events: 1=incoming, 2=outgoing, 3=missed. We follow this algorithm to convert iOS call data (there are some inaccuracies in the way we handle sequences, see new rules below): Search for the disconnected (4) status as it is common to all calls Group all events that preceded every status 4 We convert every 1,2,4 (or 2,1,4) sequence to an incoming call We convert every 3,2,4 (or 2,3,4) sequence to an outgoing call We convert every 1,4 or 3,4 sequence to a missed call (either incoming or outgoing) We set the duration of the call to be the sum of every status (dialing/ringing to hangup) as opposed to the duration of the last status (pick up to hang up) Tested with an Android (OnePlus 7T) and an iPhone XR Call type Android (duration) iOS (duration) New Rule Outgoing missed ended by me 2 (0) 3,4 (0,X) 3,4 is converted to 2 with duration 0 Outgoing missed ended by them 2(0) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2* Incoming missed ended by me NA** 1,4 (0,X) 1,4 is converted to 3 with duration 0 Incoming missed ended by them 3(0) 1,4 (0,X) 1,4 is converted to 3 with duration 0 Outgoing answered 2(X excluding dialing time) 3,2,4 (0,X,X2) 3,2,4 is converted to 2 with duration X2 Incoming answered 1(X excluding dialing time) 1,2,4 (0,X,X2) 1,2,4 is converted to 1 with duration X2 .* There is no way to differentiate an outgoing missed call ended by them from an outgoing answered call because the phone goes directly to voice mail and it counts as call time (essentially the voice mail answered). .** Android does not record incoming missed calls ended by the participant, just those ended by the person calling or ignored by the participant. PHONE_CONVERSATION ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_ENERGY double_energy INFERENCE inference DOUBLE_CONVO_START FLAG_TO_MUTATE DOUBLE_CONVO_END FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column DOUBLE_CONVO_START double_convo_start DOUBLE_CONVO_END double_convo_end SCRIPTS src/data/streams/mutations/phone/aware/conversation_ios_timestamp.R Note For RAPIDS columns of DOUBLE_CONVO_START and DOUBLE_CONVO_END : if stream\u2019s double_convo_start field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_START = 1000 * double_convo_start . if stream\u2019s double_convo_end field is smaller than 9999999999, it is in seconds instead of milliseconds. Set DOUBLE_CONVO_END = 1000 * double_convo_end . PHONE_KEYBOARD ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id PACKAGE_NAME package_name BEFORE_TEXT before_text CURRENT_TEXT current_text IS_PASSWORD is_password MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LIGHT ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LIGHT_LUX double_light_lux ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_LOCATIONS ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id DOUBLE_LATITUDE double_latitude DOUBLE_LONGITUDE double_longitude DOUBLE_BEARING double_bearing DOUBLE_SPEED double_speed DOUBLE_ALTITUDE double_altitude PROVIDER provider ACCURACY accuracy MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_LOG ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id LOG_MESSAGE log_message MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_MESSAGES ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MESSAGE_TYPE message_type TRACE trace MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS This sensor is not supported by iOS devices. PHONE_SCREEN ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS screen_status MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SCREEN_STATUS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column SCREEN_STATUS screen_status SCRIPTS src/data/streams/mutations/phone/aware/screen_ios_unification.R Note For SCREEN_STATUS RAPIDS column: if stream\u2019s screen_status field is 2 (lock episode), set SCREEN_STATUS = 0 (off episode). PHONE_WIFI_CONNECTED ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id MAC_ADDRESS mac_address SSID ssid BSSID bssid MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Same as ANDROID PHONE_WIFI_VISIBLE ANDROID RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP timestamp DEVICE_ID device_id SSID ssid BSSID bssid SECURITY security FREQUENCY frequency RSSI rssi MUTATION COLUMN_MAPPINGS (None) SCRIPTS (None) IOS Only old iOS versions supported this sensor (same mapping as Android).","title":"Aware format"},{"location":"snippets/database/","text":"Setting up a DATABASE_GROUP and its connection credentials. If you haven\u2019t done so, create an empty file called credentials.yaml in your RAPIDS root directory: Add the following lines to credentials.yaml and replace your database-specific credentials (user, password, host, and database): MY_GROUP : database : MY_DATABASE host : MY_HOST password : MY_PASSWORD port : 3306 user : MY_USER Notes The label [MY_GROUP] is arbitrary but it has to match the [DATABASE_GROUP] attribute of the data stream you choose to use. Indentation matters You can have more than one credentials group in credentials.yaml Upgrading from ./.env from RAPIDS 0.x In RAPIDS versions 0.x, database credentials were stored in a ./.env file. If you are migrating from that type of file, you have two options: Migrate your credentials by hand: change .env format [ MY_GROUP ] user=MY_USER password=MY_PASSWORD host=MY_HOST port=3306 database=MY_DATABASE to credentials.yaml format MY_GROUP : user : MY_USER password : MY_PASSWORD host : MY_HOST port : 3306 database : MY_DATABASE Use the migration script we provide (make sure your conda environment is active): python tools / update_format_env . py Connecting to localhost (host machine) from inside our docker container. If you are using RAPIDS\u2019 docker container and Docker-for-mac or Docker-for-Windows 18.03+, you can connect to a MySQL database in your host machine using host.docker.internal instead of 127.0.0.1 or localhost . In a Linux host, you need to run our docker container using docker run --network=\"host\" -d moshiresearch/rapids:latest and then 127.0.0.1 will point to your host machine.","title":"Database"},{"location":"snippets/feature_introduction_example/","text":"Sensor section Each sensor (accelerometer, screen, etc.) of every supported device (smartphone, Fitbit, etc.) has a section in the config.yaml with parameters and feature PROVIDERS . Sensor Parameters. Each sensor section has one or more parameters. These are parameters that affect different aspects of how the raw data is pulled, and processed. The CONTAINER parameter exists for every sensor, but some sensors will have extra parameters like [PHONE_LOCATIONS] . We explain these parameters in a table at the top of each sensor documentation page. Sensor Providers Each object in this list represents a feature PROVIDER . Each sensor can have zero, one, or more providers. A PROVIDER is a script that creates behavioral features for a specific sensor. Providers are created by the core RAPIDS team or by the community, which are named after its first author like [PHONE_LOCATIONS][DORYAB] . In this example, there are two accelerometer feature providers RAPIDS and PANDA . PROVIDER Parameters Each PROVIDER has parameters that affect the computation of the behavioral features it offers. These parameters include at least a [COMPUTE] flag that you switch to True to extract a provider\u2019s behavioral features. We explain every provider\u2019s parameter in a table under the Parameters description heading on each provider documentation page. PROVIDER Features Each PROVIDER offers a set of behavioral features. These features are grouped in an array for some providers, like those for RAPIDS provider. For others, they are grouped in a collection of arrays, like those for PANDAS provider. In either case, you can delete the features you are not interested in, and they will not be included in the sensor\u2019s output feature file. We explain each behavioral feature in a table under the Features description heading on each provider documentation page. PROVIDER script Each PROVIDER has a SRC_SCRIPT that points to the script implementing its behavioral features. It has to be a relative path from RAPIDS\u2019 root folder and the script\u2019s parent folder should be named after the provider, e.g. panda .","title":"Feature introduction example"},{"location":"snippets/jsonfitbit_format/","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitjson_csv/format.yaml If you want RAPIDS to process Fitbit sensor data using this stream, you will need to map DEVICE_ID and JSON_FITBIT_COLUMN to your own raw data columns inside each sensor section in format.yaml . FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESOUTOFRANGE FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESFATBURN FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESCARDIO FLAG_TO_MUTATE HEARTRATE_DAILY_CALORIESPEAK FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id HEARTRATE FLAG_TO_MUTATE HEARTRATE_ZONE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py Note All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the raw data RAPIDS expects for this data stream device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1200.6102,\u201dmax\u201d:88,\u201dmin\u201d:31,\u201dminutes\u201d:1058,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:760.3020,\u201dmax\u201d:120,\u201dmin\u201d:86,\u201dminutes\u201d:366,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:15.2048,\u201dmax\u201d:146,\u201dmin\u201d:120,\u201dminutes\u201d:2,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:72}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:68},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:67},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:67},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:1100.1120,\u201dmax\u201d:89,\u201dmin\u201d:30,\u201dminutes\u201d:921,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:660.0012,\u201dmax\u201d:118,\u201dmin\u201d:82,\u201dminutes\u201d:361,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:23.7088,\u201dmax\u201d:142,\u201dmin\u201d:108,\u201dminutes\u201d:3,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:221,\u201dmin\u201d:148,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:70}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:77},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:75},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:73},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201cactivities-heart\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:{\u201ccustomHeartRateZones\u201d:[],\u201dheartRateZones\u201d:[{\u201ccaloriesOut\u201d:750.3615,\u201dmax\u201d:77,\u201dmin\u201d:30,\u201dminutes\u201d:851,\u201dname\u201d:\u201dOut of Range\u201d},{\u201ccaloriesOut\u201d:734.1516,\u201dmax\u201d:107,\u201dmin\u201d:77,\u201dminutes\u201d:550,\u201dname\u201d:\u201dFat Burn\u201d},{\u201ccaloriesOut\u201d:131.8579,\u201dmax\u201d:130,\u201dmin\u201d:107,\u201dminutes\u201d:29,\u201dname\u201d:\u201dCardio\u201d},{\u201ccaloriesOut\u201d:0,\u201dmax\u201d:220,\u201dmin\u201d:130,\u201dminutes\u201d:0,\u201dname\u201d:\u201dPeak\u201d}],\u201drestingHeartRate\u201d:69}}],\u201dactivities-heart-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:90},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:89},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:88},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE LOCAL_START_DATE_TIME FLAG_TO_MUTATE LOCAL_END_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id EFFICIENCY FLAG_TO_MUTATE MINUTES_AFTER_WAKEUP FLAG_TO_MUTATE MINUTES_ASLEEP FLAG_TO_MUTATE MINUTES_AWAKE FLAG_TO_MUTATE MINUTES_TO_FALL_ASLEEP FLAG_TO_MUTATE MINUTES_IN_BED FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_summary_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME FLAG_TO_MUTATE DEVICE_ID device_id TYPE_EPISODE_ID FLAG_TO_MUTATE DURATION FLAG_TO_MUTATE IS_MAIN_SLEEP FLAG_TO_MUTATE TYPE FLAG_TO_MUTATE LEVEL FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py Note Fitbit API has two versions for sleep data, v1 and v1.2, we support both. All columns except DEVICE_ID are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:3600000,\u201defficiency\u201d:92,\u201dendTime\u201d:\u201d2020-10-10T16:37:00.000\u201d,\u201dinfoCode\u201d:2,\u201disMainSleep\u201d:false,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-10T15:37:30.000\u201d,\u201dlevel\u201d:\u201dasleep\u201d,\u201dseconds\u201d:660},{\u201cdateTime\u201d:\u201d2020-10-10T15:48:30.000\u201d,\u201dlevel\u201d:\u201drestless\u201d,\u201dseconds\u201d:60},\u2026], \u201csummary\u201d:{\u201casleep\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:56},\u201dawake\u201d:{\u201ccount\u201d:0,\u201dminutes\u201d:0},\u201drestless\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:4}}},\u201dlogId\u201d:26315914306,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:55,\u201dminutesAwake\u201d:5,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T15:36:30.000\u201d,\u201dtimeInBed\u201d:60,\u201dtype\u201d:\u201dclassic\u201d},{\u201cdateOfSleep\u201d:\u201d2020-10-10\u201d,\u201dduration\u201d:22980000,\u201defficiency\u201d:88,\u201dendTime\u201d:\u201d2020-10-10T08:10:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:420},{\u201cdateTime\u201d:\u201d2020-10-10T01:53:30.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:1230},{\u201cdateTime\u201d:\u201d2020-10-10T02:14:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:360},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:3,\u201dminutes\u201d:92,\u201dthirtyDayAvgMinutes\u201d:0},\u201dlight\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:193,\u201dthirtyDayAvgMinutes\u201d:0},\u201drem\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:33,\u201dthirtyDayAvgMinutes\u201d:0},\u201dwake\u201d:{\u201ccount\u201d:28,\u201dminutes\u201d:65,\u201dthirtyDayAvgMinutes\u201d:0}}},\u201dlogId\u201d:26311786557,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:318,\u201dminutesAwake\u201d:65,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-10T01:46:30.000\u201d,\u201dtimeInBed\u201d:383,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:92,\u201dlight\u201d:193,\u201drem\u201d:33,\u201dwake\u201d:65},\u201dtotalMinutesAsleep\u201d:373,\u201dtotalSleepRecords\u201d:2,\u201dtotalTimeInBed\u201d:443}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-11\u201d,\u201dduration\u201d:41640000,\u201defficiency\u201d:89,\u201dendTime\u201d:\u201d2020-10-11T11:47:00.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:450},{\u201cdateTime\u201d:\u201d2020-10-11T00:20:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:870},{\u201cdateTime\u201d:\u201d2020-10-11T00:34:30.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:780},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:52,\u201dthirtyDayAvgMinutes\u201d:62},\u201dlight\u201d:{\u201ccount\u201d:32,\u201dminutes\u201d:442,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:6,\u201dminutes\u201d:68,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:29,\u201dminutes\u201d:132,\u201dthirtyDayAvgMinutes\u201d:94}}},\u201dlogId\u201d:26589710670,\u201dminutesAfterWakeup\u201d:1,\u201dminutesAsleep\u201d:562,\u201dminutesAwake\u201d:132,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-11T00:12:30.000\u201d,\u201dtimeInBed\u201d:694,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:52,\u201dlight\u201d:442,\u201drem\u201d:68,\u201dwake\u201d:132},\u201dtotalMinutesAsleep\u201d:562,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:694}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 {\u201csleep\u201d:[{\u201cdateOfSleep\u201d:\u201d2020-10-12\u201d,\u201dduration\u201d:28980000,\u201defficiency\u201d:93,\u201dendTime\u201d:\u201d2020-10-12T09:34:30.000\u201d,\u201dinfoCode\u201d:0,\u201disMainSleep\u201d:true,\u201dlevels\u201d:{\u201cdata\u201d:[{\u201cdateTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dlevel\u201d:\u201dwake\u201d,\u201dseconds\u201d:600},{\u201cdateTime\u201d:\u201d2020-10-12T01:41:00.000\u201d,\u201dlevel\u201d:\u201dlight\u201d,\u201dseconds\u201d:60},{\u201cdateTime\u201d:\u201d2020-10-12T01:42:00.000\u201d,\u201dlevel\u201d:\u201ddeep\u201d,\u201dseconds\u201d:2340},\u2026], \u201csummary\u201d:{\u201cdeep\u201d:{\u201ccount\u201d:4,\u201dminutes\u201d:63,\u201dthirtyDayAvgMinutes\u201d:59},\u201dlight\u201d:{\u201ccount\u201d:27,\u201dminutes\u201d:257,\u201dthirtyDayAvgMinutes\u201d:364},\u201drem\u201d:{\u201ccount\u201d:5,\u201dminutes\u201d:94,\u201dthirtyDayAvgMinutes\u201d:58},\u201dwake\u201d:{\u201ccount\u201d:24,\u201dminutes\u201d:69,\u201dthirtyDayAvgMinutes\u201d:95}}},\u201dlogId\u201d:26589710673,\u201dminutesAfterWakeup\u201d:0,\u201dminutesAsleep\u201d:415,\u201dminutesAwake\u201d:68,\u201dminutesToFallAsleep\u201d:0,\u201dstartTime\u201d:\u201d2020-10-12T01:31:00.000\u201d,\u201dtimeInBed\u201d:483,\u201dtype\u201d:\u201dstages\u201d}],\u201dsummary\u201d:{\u201cstages\u201d:{\u201cdeep\u201d:63,\u201dlight\u201d:257,\u201drem\u201d:94,\u201dwake\u201d:69},\u201dtotalMinutesAsleep\u201d:415,\u201dtotalSleepRecords\u201d:1,\u201dtotalTimeInBed\u201d:483}} FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_summary_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API. See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME FLAG_TO_MUTATE STEPS FLAG_TO_MUTATE MUTATION COLUMN_MAPPINGS Script column Stream column JSON_FITBIT_COLUMN fitbit_data SCRIPTS src/data/streams/mutations/fitbit/parse_steps_intraday_json.py Note TIMESTAMP , LOCAL_DATE_TIME , and STEPS are parsed from JSON_FITBIT_COLUMN . JSON_FITBIT_COLUMN is a string column containing the JSON objects returned by Fitbit\u2019s API . See an example of the raw data RAPIDS expects for this data stream: Example of the expected raw data device_id fitbit_data a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-07\u201d,\u201dvalue\u201d:\u201d1775\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:5},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:3},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-08\u201d,\u201dvalue\u201d:\u201d3201\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:14},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:11},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:10},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}} a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u201cactivities-steps\u201d:[{\u201cdateTime\u201d:\u201d2020-10-09\u201d,\u201dvalue\u201d:\u201d998\u201d}],\u201dactivities-steps-intraday\u201d:{\u201cdataset\u201d:[{\u201ctime\u201d:\u201d00:00:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:01:00\u201d,\u201dvalue\u201d:0},{\u201ctime\u201d:\u201d00:02:00\u201d,\u201dvalue\u201d:0},\u2026],\u201ddatasetInterval\u201d:1,\u201ddatasetType\u201d:\u201dminute\u201d}}","title":"Jsonfitbit format"},{"location":"snippets/parsedfitbit_format/","text":"The format.yaml maps and transforms columns in your raw data stream to the mandatory columns RAPIDS needs for Fitbit sensors . This file is at: src/data/streams/fitbitparsed_mysql/format.yaml If you want to use this stream with your data, modify every sensor in format.yaml to map all columns except TIMESTAMP in [RAPIDS_COLUMN_MAPPINGS] to your raw data column names. All columns are mandatory; however, all except device_id and local_date_time can be empty if you don\u2019t have that data. Just have in mind that some features will be empty if some of these columns are empty. FITBIT_HEARTRATE_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE_DAILY_RESTINGHR heartrate_daily_restinghr HEARTRATE_DAILY_CALORIESOUTOFRANGE heartrate_daily_caloriesoutofrange HEARTRATE_DAILY_CALORIESFATBURN heartrate_daily_caloriesfatburn HEARTRATE_DAILY_CALORIESCARDIO heartrate_daily_caloriescardio HEARTRATE_DAILY_CALORIESPEAK heartrate_daily_caloriespeak MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate_daily_restinghr heartrate_daily_caloriesoutofrange heartrate_daily_caloriesfatburn heartrate_daily_caloriescardio heartrate_daily_caloriespeak a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 72 1200.6102 760.3020 15.2048 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 70 1100.1120 660.0012 23.7088 0 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 69 750.3615 734.1516 131.8579 0 FITBIT_HEARTRATE_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id HEARTRATE heartrate HEARTRATE_ZONE heartrate_zone MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the raw data RAPIDS expects for this data stream device_id local_date_time heartrate heartrate_zone a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 68 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 67 outofrange a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 67 outofrange FITBIT_SLEEP_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time LOCAL_START_DATE_TIME local_start_date_time LOCAL_END_DATE_TIME local_end_date_time DEVICE_ID device_id EFFICIENCY efficiency MINUTES_AFTER_WAKEUP minutes_after_wakeup MINUTES_ASLEEP minutes_asleep MINUTES_AWAKE minutes_awake MINUTES_TO_FALL_ASLEEP minutes_to_fall_asleep MINUTES_IN_BED minutes_in_bed IS_MAIN_SLEEP is_main_sleep TYPE type MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2. We support both but ignore v1\u2019s count_awake , duration_awake , and count_awakenings , count_restless , duration_restless columns. Example of the expected raw data device_id local_start_date_time local_end_date_time efficiency minutes_after_wakeup minutes_asleep minutes_awake minutes_to_fall_asleep minutes_in_bed is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 15:36:30 2020-10-10 16:37:00 92 0 55 5 0 60 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-10 01:46:30 2020-10-10 08:10:00 88 0 318 65 0 383 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-11 00:12:30 2020-10-11 11:47:00 89 1 562 132 0 694 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-12 01:31:00 2020-10-12 09:34:30 93 0 415 68 0 483 1 stages FITBIT_SLEEP_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE LOCAL_DATE_TIME local_date_time DEVICE_ID device_id TYPE_EPISODE_ID type_episode_id DURATION duration IS_MAIN_SLEEP is_main_sleep TYPE type LEVEL level MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Fitbit API has two versions for sleep data, v1 and v1.2, we support both. Example of the expected raw data device_id type_episode_id local_date_time duration level is_main_sleep type a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:36:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:37:30 660 asleep 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 0 2020-10-10 15:48:30 60 restless 0 classic a748ee1a-1d0b-4ae9-9074-279a2b6ba524 \u2026 \u2026 \u2026 \u2026 \u2026 \u2026 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:46:30 420 light 1 stages a748ee1a-1d0b-4ae9-9074-279a2b6ba524 1 2020-10-10 01:53:30 1230 deep 1 stages FITBIT_STEPS_SUMMARY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 1775 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-08 3201 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-09 998 FITBIT_STEPS_INTRADAY RAPIDS_COLUMN_MAPPINGS RAPIDS column Stream column TIMESTAMP FLAG_TO_MUTATE DEVICE_ID device_id LOCAL_DATE_TIME local_date_time STEPS steps MUTATION COLUMN_MAPPINGS (None) SCRIPTS src/data/streams/mutations/fitbit/add_zero_timestamp.py Note add_zero_timestamp adds an all-zero column called timestamp that will be filled in later in the pipeline by readable_time.R converting LOCAL_DATE_TIME to a unix timestamp taking into account single or multiple time zones. Example of the expected raw data device_id local_date_time steps a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:00:00 5 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:01:00 3 a748ee1a-1d0b-4ae9-9074-279a2b6ba524 2020-10-07 00:02:00 0","title":"Parsedfitbit format"},{"location":"visualizations/data-quality-visualizations/","text":"Data Quality Visualizations \u00b6 We showcase these visualizations with a test study that collected 14 days of smartphone and Fitbit data from two participants (example01 and example02) and extracted behavioral features within five time segments (daily, morning, afternoon, evening, and night). Note Time segments (e.g. daily , morning , etc.) can have multiple instances (day 1, day 2, or morning 1, morning 2, etc.) 1. Histograms of phone data yield \u00b6 RAPIDS provides two histograms that show the number of time segment instances that had a certain ratio of valid yielded minutes and hours , respectively. A valid yielded minute has at least 1 row of data from any smartphone sensor and a valid yielded hour contains at least M valid minutes. These plots can be used as a rough indication of the smartphone monitoring coverage during a study aggregated across all participants. For example, the figure below shows a valid yielded minutes histogram for daily segments and we can infer that the monitoring coverage was very good since almost all segments contain at least 90 to 100% of the expected sensed minutes. Example Click here to see an example of these interactive visualizations in HTML format Histogram of the data yielded minute ratio for a single participant during five time segments (daily, morning, afternoon, evening, and night) 2. Heatmaps of overall data yield \u00b6 These heatmaps are a break down per time segment and per participant of Visualization 1 . Heatmap\u2019s rows represent participants, columns represent time segment instances and the cells\u2019 color represent the valid yielded minute or hour ratio for a participant during a time segment instance. As different participants might join a study on different dates and time segments can be of any length and start on any day, the x-axis can be labelled with the absolute time of the start of each time segment instance or the time delta between the start of each time segment instance minus the start of the first instance. These plots provide a quick study overview of the monitoring coverage per person and per time segment. The figure below shows the heatmap of the valid yielded minute ratio for participants example01 and example02 on daily segments and, as we inferred from the previous histogram, the lighter (yellow) color on most time segment instances (cells) indicate both phones sensed data without interruptions for most days (except for the first and last ones). [ABSOLUTE_TIME] Example Click here to see an example of these interactive visualizations in HTML format Overall compliance heatmap for all participants [RELATIVE_TIME] Example Click here to see an example of these interactive visualizations in HTML format Overall compliance heatmap for all participants 3. Heatmap of recorded phone sensors \u00b6 In these heatmaps rows represent time segment instances, columns represent minutes since the start of a time segment instance, and cells\u2019 color shows the number of phone sensors that logged at least one row of data during those 1-minute windows. RAPIDS creates a plot per participant and per time segment and can be used as a rough indication of whether time-based sensors were following their sensing schedule (e.g. if location was being sensed every 2 minutes). The figure below shows this heatmap for phone sensors collected by participant example01 in daily time segments from Apr 23 rd 2020 to May 4 th 2020. We can infer that for most of the monitoring time, the participant\u2019s phone logged data from at least 7 sensors each minute. Example Click here to see an example of these interactive visualizations in HTML format Heatmap of the recorded phone sensors per minute and per time segment of a single participant 4. Heatmap of sensor row count \u00b6 These heatmaps are a per-sensor breakdown of Visualization 1 and Visualization 2 . Note that the second row (ratio of valid yielded minutes) of this heatmap matches the respective participant (bottom) row the screenshot in Visualization 2. In these heatmaps rows represent phone or Fitbit sensors, columns represent time segment instances and cell\u2019s color shows the normalized (0 to 1) row count of each sensor within a time segment instance. RAPIDS creates one heatmap per participant and they can be used to judge missing data on a per participant and per sensor basis. The figure below shows data for 14 phone sensors (including data yield) of example01\u2019s daily segments. From the top two rows, we can see that the phone was sensing data for most of the monitoring period (as suggested by Figure 3 and Figure 4). We can also infer how phone usage influenced the different sensor streams; there are peaks of screen events during the first day (Apr 23 rd ), peaks of location coordinates on Apr 26 th and Apr 30 th , and no sent or received SMS except for Apr 23 rd , Apr 29 th and Apr 30 th (unlabeled row between screen and locations). Example Click here to see an example of these interactive visualizations in HTML format Heatmap of the sensor row count per time segment of a single participant","title":"Data Quality"},{"location":"visualizations/data-quality-visualizations/#data-quality-visualizations","text":"We showcase these visualizations with a test study that collected 14 days of smartphone and Fitbit data from two participants (example01 and example02) and extracted behavioral features within five time segments (daily, morning, afternoon, evening, and night). Note Time segments (e.g. daily , morning , etc.) can have multiple instances (day 1, day 2, or morning 1, morning 2, etc.)","title":"Data Quality Visualizations"},{"location":"visualizations/data-quality-visualizations/#1-histograms-of-phone-data-yield","text":"RAPIDS provides two histograms that show the number of time segment instances that had a certain ratio of valid yielded minutes and hours , respectively. A valid yielded minute has at least 1 row of data from any smartphone sensor and a valid yielded hour contains at least M valid minutes. These plots can be used as a rough indication of the smartphone monitoring coverage during a study aggregated across all participants. For example, the figure below shows a valid yielded minutes histogram for daily segments and we can infer that the monitoring coverage was very good since almost all segments contain at least 90 to 100% of the expected sensed minutes. Example Click here to see an example of these interactive visualizations in HTML format Histogram of the data yielded minute ratio for a single participant during five time segments (daily, morning, afternoon, evening, and night)","title":"1. Histograms of phone data yield"},{"location":"visualizations/data-quality-visualizations/#2-heatmaps-of-overall-data-yield","text":"These heatmaps are a break down per time segment and per participant of Visualization 1 . Heatmap\u2019s rows represent participants, columns represent time segment instances and the cells\u2019 color represent the valid yielded minute or hour ratio for a participant during a time segment instance. As different participants might join a study on different dates and time segments can be of any length and start on any day, the x-axis can be labelled with the absolute time of the start of each time segment instance or the time delta between the start of each time segment instance minus the start of the first instance. These plots provide a quick study overview of the monitoring coverage per person and per time segment. The figure below shows the heatmap of the valid yielded minute ratio for participants example01 and example02 on daily segments and, as we inferred from the previous histogram, the lighter (yellow) color on most time segment instances (cells) indicate both phones sensed data without interruptions for most days (except for the first and last ones). [ABSOLUTE_TIME] Example Click here to see an example of these interactive visualizations in HTML format Overall compliance heatmap for all participants [RELATIVE_TIME] Example Click here to see an example of these interactive visualizations in HTML format Overall compliance heatmap for all participants","title":"2. Heatmaps of overall data yield"},{"location":"visualizations/data-quality-visualizations/#3-heatmap-of-recorded-phone-sensors","text":"In these heatmaps rows represent time segment instances, columns represent minutes since the start of a time segment instance, and cells\u2019 color shows the number of phone sensors that logged at least one row of data during those 1-minute windows. RAPIDS creates a plot per participant and per time segment and can be used as a rough indication of whether time-based sensors were following their sensing schedule (e.g. if location was being sensed every 2 minutes). The figure below shows this heatmap for phone sensors collected by participant example01 in daily time segments from Apr 23 rd 2020 to May 4 th 2020. We can infer that for most of the monitoring time, the participant\u2019s phone logged data from at least 7 sensors each minute. Example Click here to see an example of these interactive visualizations in HTML format Heatmap of the recorded phone sensors per minute and per time segment of a single participant","title":"3. Heatmap of recorded phone sensors"},{"location":"visualizations/data-quality-visualizations/#4-heatmap-of-sensor-row-count","text":"These heatmaps are a per-sensor breakdown of Visualization 1 and Visualization 2 . Note that the second row (ratio of valid yielded minutes) of this heatmap matches the respective participant (bottom) row the screenshot in Visualization 2. In these heatmaps rows represent phone or Fitbit sensors, columns represent time segment instances and cell\u2019s color shows the normalized (0 to 1) row count of each sensor within a time segment instance. RAPIDS creates one heatmap per participant and they can be used to judge missing data on a per participant and per sensor basis. The figure below shows data for 14 phone sensors (including data yield) of example01\u2019s daily segments. From the top two rows, we can see that the phone was sensing data for most of the monitoring period (as suggested by Figure 3 and Figure 4). We can also infer how phone usage influenced the different sensor streams; there are peaks of screen events during the first day (Apr 23 rd ), peaks of location coordinates on Apr 26 th and Apr 30 th , and no sent or received SMS except for Apr 23 rd , Apr 29 th and Apr 30 th (unlabeled row between screen and locations). Example Click here to see an example of these interactive visualizations in HTML format Heatmap of the sensor row count per time segment of a single participant","title":"4. Heatmap of sensor row count"},{"location":"visualizations/feature-visualizations/","text":"Feature Visualizations \u00b6 1. Heatmap Correlation Matrix \u00b6 Columns and rows are the behavioral features computed in RAPIDS, cells\u2019 color represents the correlation coefficient between all days of data for every pair of features of all participants. The user can specify a minimum number of observations ( time segment instances) required to compute the correlation between two features using the MIN_ROWS_RATIO parameter (0.5 by default) and the correlation method (Pearson, Spearman or Kendall) with the CORR_METHOD parameter. In addition, this plot can be configured to only display correlation coefficients above a threshold using the CORR_THRESHOLD parameter (0.1 by default). Example Click here to see an example of these interactive visualizations in HTML format Correlation matrix heatmap for all the features of all participants","title":"Features"},{"location":"visualizations/feature-visualizations/#feature-visualizations","text":"","title":"Feature Visualizations"},{"location":"visualizations/feature-visualizations/#1-heatmap-correlation-matrix","text":"Columns and rows are the behavioral features computed in RAPIDS, cells\u2019 color represents the correlation coefficient between all days of data for every pair of features of all participants. The user can specify a minimum number of observations ( time segment instances) required to compute the correlation between two features using the MIN_ROWS_RATIO parameter (0.5 by default) and the correlation method (Pearson, Spearman or Kendall) with the CORR_METHOD parameter. In addition, this plot can be configured to only display correlation coefficients above a threshold using the CORR_THRESHOLD parameter (0.1 by default). Example Click here to see an example of these interactive visualizations in HTML format Correlation matrix heatmap for all the features of all participants","title":"1. Heatmap Correlation Matrix"},{"location":"workflow-examples/analysis/","text":"Analysis Workflow Example \u00b6 TL;DR In addition to using RAPIDS to extract behavioral features and create plots, you can structure your data analysis within RAPIDS (i.e. cleaning your features and creating ML/statistical models) We include an analysis example in RAPIDS that covers raw data processing, cleaning, feature extraction, machine learning modeling, and evaluation Use this example as a guide to structure your own analysis within RAPIDS RAPIDS analysis workflows are compatible with your favorite data science tools and libraries RAPIDS analysis workflows are reproducible and we encourage you to publish them along with your research papers Why should I integrate my analysis in RAPIDS? \u00b6 Even though the bulk of RAPIDS current functionality is related to the computation of behavioral features, we recommend RAPIDS as a complementary tool to create a mobile data analysis workflow. This is because the cookiecutter data science file organization guidelines, the use of Snakemake, the provided behavioral features, and the reproducible R and Python development environments allow researchers to divide an analysis workflow into small parts that can be audited, shared in an online repository, reproduced in other computers, and understood by other people as they follow a familiar and consistent structure. We believe these advantages outweigh the time needed to learn how to create these workflows in RAPIDS. We clarify that to create analysis workflows in RAPIDS, researchers can still use any data manipulation tools, editors, libraries or languages they are already familiar with. RAPIDS is meant to be the final destination of analysis code that was developed in interactive notebooks or stand-alone scripts. For example, a user can compute call and location features using RAPIDS, then, they can use Jupyter notebooks to explore feature cleaning approaches and once the cleaning code is final, it can be moved to RAPIDS as a new step in the pipeline. In turn, the output of this cleaning step can be used to explore machine learning models and once a model is finished, it can also be transferred to RAPIDS as a step of its own. The idea is that when it is time to publish a piece of research, a RAPIDS workflow can be shared in a public repository as is. In the following sections we share an example of how we structured an analysis workflow in RAPIDS. Analysis workflow structure \u00b6 To accurately reflect the complexity of a real-world modeling scenario, we decided not to oversimplify this example. Importantly, every step in this example follows a basic structure: an input file and parameters are manipulated by an R or Python script that saves the results to an output file. Input files, parameters, output files and scripts are grouped into Snakemake rules that are described on smk files in the rules folder (we point the reader to the relevant rule(s) of each step). Researchers can use these rules and scripts as a guide to create their own as it is expected every modeling project will have different requirements, data and goals but ultimately most follow a similar chainned pattern. Hint The example\u2019s config file is example_profile/example_config.yaml and its Snakefile is in example_profile/Snakefile . The config file is already configured to process the sensor data as explained in Analysis workflow modules . Description of the study modeled in our analysis workflow example \u00b6 Our example is based on a hypothetical study that recruited 2 participants that underwent surgery and collected mobile data for at least one week before and one week after the procedure. Participants wore a Fitbit device and installed the AWARE client in their personal Android and iOS smartphones to collect mobile data 24/7. In addition, participants completed daily severity ratings of 12 common symptoms on a scale from 0 to 10 that we summed up into a daily symptom burden score. The goal of this workflow is to find out if we can predict the daily symptom burden score of a participant. Thus, we framed this question as a binary classification problem with two classes, high and low symptom burden based on the scores above and below average of each participant. We also want to compare the performance of individual (personalized) models vs a population model. In total, our example workflow has nine steps that are in charge of sensor data preprocessing, feature extraction, feature cleaning, machine learning model training and model evaluation (see figure below). We ship this workflow with RAPIDS and share files with test data in an Open Science Framework repository. Modules of RAPIDS example workflow, from raw data to model evaluation Configure and run the analysis workflow example \u00b6 Install RAPIDS Unzip the CSV files inside rapids_example_csv.zip in data/external/example_workflow/*.csv . Create the participant files for this example by running: ./rapids -j1 create_example_participant_files Run the example pipeline with: ./rapids -j1 --profile example_profile Note you will see a lot of warning messages, you can ignore them since they happen because we ran ML algorithms with a small fake dataset. Modules of our analysis workflow example \u00b6 1. Feature extraction We extract daily behavioral features for data yield, received and sent messages, missed, incoming and outgoing calls, resample fused location data using Doryab provider, activity recognition, battery, Bluetooth, screen, light, applications foreground, conversations, Wi-Fi connected, Wi-Fi visible, Fitbit heart rate summary and intraday data, Fitbit sleep summary data, and Fitbit step summary and intraday data without excluding sleep periods with an active bout threshold of 10 steps. In total, we obtained 237 daily sensor features over 12 days per participant. 2. Extract demographic data. It is common to have demographic data in addition to mobile and target (ground truth) data. In this example we include participants\u2019 age, gender and the number of days they spent in hospital after their surgery as features in our model. We extract these three columns from the data/external/example_workflow/participant_info.csv file. As these three features remain the same within participants, they are used only on the population model. Refer to the demographic_features rule in rules/models.smk . 3. Create target labels. The two classes for our machine learning binary classification problem are high and low symptom burden. Target values are already stored in the data/external/example_workflow/participant_target.csv file. A new rule/script can be created if further manipulation is necessary. Refer to the parse_targets rule in rules/models.smk . 4. Feature merging. These daily features are stored on a CSV file per sensor, a CSV file per participant, and a CSV file including all features from all participants (in every case each column represents a feature and each row represents a day). Refer to the merge_sensor_features_for_individual_participants and merge_sensor_features_for_all_participants rules in rules/features.smk . 5. Data visualization. At this point the user can use the five plots RAPIDS provides (or implement new ones) to explore and understand the quality of the raw data and extracted features and decide what sensors, days, or participants to include and exclude. Refer to rules/reports.smk to find the rules that generate these plots. 6. Feature cleaning. In this stage we perform four steps to clean our sensor feature file. First, we discard days with a data yield hour ratio less than or equal to 0.75, i.e. we include days with at least 18 hours of data. Second, we drop columns (features) with more than 30% of missing rows. Third, we drop columns with zero variance. Fourth, we drop rows (days) with more than 30% of missing columns (features). In this cleaning stage several parameters are created and exposed in example_profile/example_config.yaml . After this step, we kept 161 features over 11 days for the individual model of p01, 101 features over 12 days for the individual model of p02 and 107 features over 20 days for the population model. Note that the difference in the number of features between p01 and p02 is mostly due to iOS restrictions that stops researchers from collecting the same number of sensors than in Android phones. Feature cleaning for the individual models is done in the clean_sensor_features_for_individual_participants rule and for the population model in the clean_sensor_features_for_all_participants rule in rules/models.smk . 7. Merge features and targets. In this step we merge the cleaned features and target labels for our individual models in the merge_features_and_targets_for_individual_model rule in rules/models.smk . Additionally, we merge the cleaned features, target labels, and demographic features of our two participants for the population model in the merge_features_and_targets_for_population_model rule in rules/models.smk . These two merged files are the input for our individual and population models. 8. Modelling. This stage has three phases: model building, training and evaluation. In the building phase we impute, normalize and oversample our dataset. Missing numeric values in each column are imputed with their mean and we impute missing categorical values with their mode. We normalize each numeric column with one of three strategies (min-max, z-score, and scikit-learn package\u2019s robust scaler) and we one-hot encode each categorial feature as a numerical array. We oversample our imbalanced dataset using SMOTE (Synthetic Minority Over-sampling Technique) or a Random Over sampler from scikit-learn. All these parameters are exposed in example_profile/example_config.yaml . In the training phase, we create eight models: logistic regression, k-nearest neighbors, support vector machine, decision tree, random forest, gradient boosting classifier, extreme gradient boosting classifier and a light gradient boosting machine. We cross-validate each model with an inner cycle to tune hyper-parameters based on the Macro F1 score and an outer cycle to predict the test set on a model with the best hyper-parameters. Both cross-validation cycles use a leave-one-out strategy. Parameters for each model like weights and learning rates are exposed in example_profile/example_config.yaml . Finally, in the evaluation phase we compute the accuracy, Macro F1, kappa, area under the curve and per class precision, recall and F1 score of all folds of the outer cross-validation cycle. Refer to the modelling_for_individual_participants rule for the individual modeling and to the modelling_for_all_participants rule for the population modeling, both in rules/models.smk . 9. Compute model baselines. We create three baselines to evaluate our classification models. First, a majority classifier that labels each test sample with the majority class of our training data. Second, a random weighted classifier that predicts each test observation sampling at random from a binomial distribution based on the ratio of our target labels. Third, a decision tree classifier based solely on the demographic features of each participant. As we do not have demographic features for individual model, this baseline is only available for population model. Our baseline metrics (e.g. accuracy, precision, etc.) are saved into a CSV file, ready to be compared to our modeling results. Refer to the baselines_for_individual_model rule for the individual model baselines and to the baselines_for_population_model rule for population model baselines, both in rules/models.smk .","title":"Complete Example"},{"location":"workflow-examples/analysis/#analysis-workflow-example","text":"TL;DR In addition to using RAPIDS to extract behavioral features and create plots, you can structure your data analysis within RAPIDS (i.e. cleaning your features and creating ML/statistical models) We include an analysis example in RAPIDS that covers raw data processing, cleaning, feature extraction, machine learning modeling, and evaluation Use this example as a guide to structure your own analysis within RAPIDS RAPIDS analysis workflows are compatible with your favorite data science tools and libraries RAPIDS analysis workflows are reproducible and we encourage you to publish them along with your research papers","title":"Analysis Workflow Example"},{"location":"workflow-examples/analysis/#why-should-i-integrate-my-analysis-in-rapids","text":"Even though the bulk of RAPIDS current functionality is related to the computation of behavioral features, we recommend RAPIDS as a complementary tool to create a mobile data analysis workflow. This is because the cookiecutter data science file organization guidelines, the use of Snakemake, the provided behavioral features, and the reproducible R and Python development environments allow researchers to divide an analysis workflow into small parts that can be audited, shared in an online repository, reproduced in other computers, and understood by other people as they follow a familiar and consistent structure. We believe these advantages outweigh the time needed to learn how to create these workflows in RAPIDS. We clarify that to create analysis workflows in RAPIDS, researchers can still use any data manipulation tools, editors, libraries or languages they are already familiar with. RAPIDS is meant to be the final destination of analysis code that was developed in interactive notebooks or stand-alone scripts. For example, a user can compute call and location features using RAPIDS, then, they can use Jupyter notebooks to explore feature cleaning approaches and once the cleaning code is final, it can be moved to RAPIDS as a new step in the pipeline. In turn, the output of this cleaning step can be used to explore machine learning models and once a model is finished, it can also be transferred to RAPIDS as a step of its own. The idea is that when it is time to publish a piece of research, a RAPIDS workflow can be shared in a public repository as is. In the following sections we share an example of how we structured an analysis workflow in RAPIDS.","title":"Why should I integrate my analysis in RAPIDS?"},{"location":"workflow-examples/analysis/#analysis-workflow-structure","text":"To accurately reflect the complexity of a real-world modeling scenario, we decided not to oversimplify this example. Importantly, every step in this example follows a basic structure: an input file and parameters are manipulated by an R or Python script that saves the results to an output file. Input files, parameters, output files and scripts are grouped into Snakemake rules that are described on smk files in the rules folder (we point the reader to the relevant rule(s) of each step). Researchers can use these rules and scripts as a guide to create their own as it is expected every modeling project will have different requirements, data and goals but ultimately most follow a similar chainned pattern. Hint The example\u2019s config file is example_profile/example_config.yaml and its Snakefile is in example_profile/Snakefile . The config file is already configured to process the sensor data as explained in Analysis workflow modules .","title":"Analysis workflow structure"},{"location":"workflow-examples/analysis/#description-of-the-study-modeled-in-our-analysis-workflow-example","text":"Our example is based on a hypothetical study that recruited 2 participants that underwent surgery and collected mobile data for at least one week before and one week after the procedure. Participants wore a Fitbit device and installed the AWARE client in their personal Android and iOS smartphones to collect mobile data 24/7. In addition, participants completed daily severity ratings of 12 common symptoms on a scale from 0 to 10 that we summed up into a daily symptom burden score. The goal of this workflow is to find out if we can predict the daily symptom burden score of a participant. Thus, we framed this question as a binary classification problem with two classes, high and low symptom burden based on the scores above and below average of each participant. We also want to compare the performance of individual (personalized) models vs a population model. In total, our example workflow has nine steps that are in charge of sensor data preprocessing, feature extraction, feature cleaning, machine learning model training and model evaluation (see figure below). We ship this workflow with RAPIDS and share files with test data in an Open Science Framework repository. Modules of RAPIDS example workflow, from raw data to model evaluation","title":"Description of the study modeled in our analysis workflow example"},{"location":"workflow-examples/analysis/#configure-and-run-the-analysis-workflow-example","text":"Install RAPIDS Unzip the CSV files inside rapids_example_csv.zip in data/external/example_workflow/*.csv . Create the participant files for this example by running: ./rapids -j1 create_example_participant_files Run the example pipeline with: ./rapids -j1 --profile example_profile Note you will see a lot of warning messages, you can ignore them since they happen because we ran ML algorithms with a small fake dataset.","title":"Configure and run the analysis workflow example"},{"location":"workflow-examples/analysis/#modules-of-our-analysis-workflow-example","text":"1. Feature extraction We extract daily behavioral features for data yield, received and sent messages, missed, incoming and outgoing calls, resample fused location data using Doryab provider, activity recognition, battery, Bluetooth, screen, light, applications foreground, conversations, Wi-Fi connected, Wi-Fi visible, Fitbit heart rate summary and intraday data, Fitbit sleep summary data, and Fitbit step summary and intraday data without excluding sleep periods with an active bout threshold of 10 steps. In total, we obtained 237 daily sensor features over 12 days per participant. 2. Extract demographic data. It is common to have demographic data in addition to mobile and target (ground truth) data. In this example we include participants\u2019 age, gender and the number of days they spent in hospital after their surgery as features in our model. We extract these three columns from the data/external/example_workflow/participant_info.csv file. As these three features remain the same within participants, they are used only on the population model. Refer to the demographic_features rule in rules/models.smk . 3. Create target labels. The two classes for our machine learning binary classification problem are high and low symptom burden. Target values are already stored in the data/external/example_workflow/participant_target.csv file. A new rule/script can be created if further manipulation is necessary. Refer to the parse_targets rule in rules/models.smk . 4. Feature merging. These daily features are stored on a CSV file per sensor, a CSV file per participant, and a CSV file including all features from all participants (in every case each column represents a feature and each row represents a day). Refer to the merge_sensor_features_for_individual_participants and merge_sensor_features_for_all_participants rules in rules/features.smk . 5. Data visualization. At this point the user can use the five plots RAPIDS provides (or implement new ones) to explore and understand the quality of the raw data and extracted features and decide what sensors, days, or participants to include and exclude. Refer to rules/reports.smk to find the rules that generate these plots. 6. Feature cleaning. In this stage we perform four steps to clean our sensor feature file. First, we discard days with a data yield hour ratio less than or equal to 0.75, i.e. we include days with at least 18 hours of data. Second, we drop columns (features) with more than 30% of missing rows. Third, we drop columns with zero variance. Fourth, we drop rows (days) with more than 30% of missing columns (features). In this cleaning stage several parameters are created and exposed in example_profile/example_config.yaml . After this step, we kept 161 features over 11 days for the individual model of p01, 101 features over 12 days for the individual model of p02 and 107 features over 20 days for the population model. Note that the difference in the number of features between p01 and p02 is mostly due to iOS restrictions that stops researchers from collecting the same number of sensors than in Android phones. Feature cleaning for the individual models is done in the clean_sensor_features_for_individual_participants rule and for the population model in the clean_sensor_features_for_all_participants rule in rules/models.smk . 7. Merge features and targets. In this step we merge the cleaned features and target labels for our individual models in the merge_features_and_targets_for_individual_model rule in rules/models.smk . Additionally, we merge the cleaned features, target labels, and demographic features of our two participants for the population model in the merge_features_and_targets_for_population_model rule in rules/models.smk . These two merged files are the input for our individual and population models. 8. Modelling. This stage has three phases: model building, training and evaluation. In the building phase we impute, normalize and oversample our dataset. Missing numeric values in each column are imputed with their mean and we impute missing categorical values with their mode. We normalize each numeric column with one of three strategies (min-max, z-score, and scikit-learn package\u2019s robust scaler) and we one-hot encode each categorial feature as a numerical array. We oversample our imbalanced dataset using SMOTE (Synthetic Minority Over-sampling Technique) or a Random Over sampler from scikit-learn. All these parameters are exposed in example_profile/example_config.yaml . In the training phase, we create eight models: logistic regression, k-nearest neighbors, support vector machine, decision tree, random forest, gradient boosting classifier, extreme gradient boosting classifier and a light gradient boosting machine. We cross-validate each model with an inner cycle to tune hyper-parameters based on the Macro F1 score and an outer cycle to predict the test set on a model with the best hyper-parameters. Both cross-validation cycles use a leave-one-out strategy. Parameters for each model like weights and learning rates are exposed in example_profile/example_config.yaml . Finally, in the evaluation phase we compute the accuracy, Macro F1, kappa, area under the curve and per class precision, recall and F1 score of all folds of the outer cross-validation cycle. Refer to the modelling_for_individual_participants rule for the individual modeling and to the modelling_for_all_participants rule for the population modeling, both in rules/models.smk . 9. Compute model baselines. We create three baselines to evaluate our classification models. First, a majority classifier that labels each test sample with the majority class of our training data. Second, a random weighted classifier that predicts each test observation sampling at random from a binomial distribution based on the ratio of our target labels. Third, a decision tree classifier based solely on the demographic features of each participant. As we do not have demographic features for individual model, this baseline is only available for population model. Our baseline metrics (e.g. accuracy, precision, etc.) are saved into a CSV file, ready to be compared to our modeling results. Refer to the baselines_for_individual_model rule for the individual model baselines and to the baselines_for_population_model rule for population model baselines, both in rules/models.smk .","title":"Modules of our analysis workflow example"},{"location":"workflow-examples/minimal/","text":"Minimal Working Example \u00b6 This is a quick guide for creating and running a simple pipeline to extract missing, outgoing, and incoming call features for 24 hr ( 00:00:00 to 23:59:59 ) and night ( 00:00:00 to 05:59:59 ) time segments of every day of data of one participant that was monitored on the US East coast with an Android smartphone. Install RAPIDS and make sure your conda environment is active (see Installation ) Download this CSV file and save it as data/external/aware_csv/calls.csv Make the changes listed below for the corresponding Configuration step (we provide an example of what the relevant sections in your config.yml will look like after you are done) Required configuration changes ( click to expand ) Supported data streams . Based on the docs, we decided to use the aware_csv data stream because we are processing aware data saved in a CSV file. We will use this label in a later step; there\u2019s no need to type it or save it anywhere yet. Create your participants file . Since we are processing data from a single participant, you only need to create a single participant file called p01.yaml in data/external/participant_files . This participant file only has a PHONE section because this hypothetical participant was only monitored with a smartphone. Note that for a real analysis, you can do this automatically with a CSV file Add p01 to [PIDS] in config.yaml Create a file in data/external/participant_files/p01.yaml with the following content: PHONE : DEVICE_IDS : [ a748ee1a-1d0b-4ae9-9074-279a2b6ba524 ] # the participant's AWARE device id PLATFORMS : [ android ] # or ios LABEL : MyTestP01 # any string START_DATE : 2020-01-01 # this can also be empty END_DATE : 2021-01-01 # this can also be empty Select what time segments you want to extract features on. Set [TIME_SEGMENTS][FILE] to data/external/timesegments_periodic.csv Create a file in data/external/timesegments_periodic.csv with the following content label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 night,00:00:00,5H 59M 59S,every_day,0 Choose the timezone of your study . We will use the default time zone settings since this example is processing data collected on the US East Coast ( America/New_York ) TIMEZONE : TYPE : SINGLE SINGLE : TZCODE : America/New_York Modify your device data stream configuration Set [PHONE_DATA_STREAMS][USE] to aware_csv . We will use the default value for [PHONE_DATA_STREAMS][aware_csv][FOLDER] since we already stored the test calls CSV file there. Select what sensors and features you want to process. Set [PHONE_CALLS][CONTAINER] to calls.csv in the config.yaml file. Set [PHONE_CALLS][PROVIDERS][RAPIDS][COMPUTE] to True in the config.yaml file. Example of the config.yaml sections after the changes outlined above This will be your config.yaml after following the instructions above. Click on the numbered markers to know more. PIDS : [ p01 ] # (1) TIMEZONE : TYPE : SINGLE # (2) SINGLE : TZCODE : America/New_York # ... other irrelevant sections TIME_SEGMENTS : &time_segments TYPE : PERIODIC # (3) FILE : \"data/external/timesegments_periodic.csv\" # (4) INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE PHONE_DATA_STREAMS : USE : aware_csv # (5) aware_csv : FOLDER : data/external/aware_csv # (6) # ... other irrelevant sections ############## PHONE ########################################################### ################################################################################ # ... other irrelevant sections # Communication call features config, TYPES and FEATURES keys need to match PHONE_CALLS : CONTAINER : calls.csv # (7) PROVIDERS : RAPIDS : COMPUTE : True # (8) CALL_TYPES : ... We added p01 to PIDS after creating the participant file: data/external/participant_files/p01.yaml With the following content: PHONE : DEVICE_IDS : [ a748ee1a-1d0b-4ae9-9074-279a2b6ba524 ] # the participant's AWARE device id PLATFORMS : [ android ] # or ios LABEL : MyTestP01 # any string START_DATE : 2020-01-01 # this can also be empty END_DATE : 2021-01-01 # this can also be empty We use the default SINGLE time zone. We use the default PERIODIC time segment [TYPE] We created this time segments file with these lines: label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 night,001:00:00,5H 59M 59S,every_day,0 We set [USE] to aware_device to tell RAPIDS to process sensor data collected with the AWARE Framework stored in CSV files. We used the default [FOLDER] for awre_csv since we already stored our test calls.csv file there We changed [CONTAINER] to calls.csv to process our test call data. We flipped [COMPUTE] to True to extract call behavioral features using the RAPIDS feature provider. Run RAPIDS ./rapids -j1 The call features for daily and morning time segments will be in data/processed/features/all_participants/all_sensor_features.csv","title":"Minimal Example"},{"location":"workflow-examples/minimal/#minimal-working-example","text":"This is a quick guide for creating and running a simple pipeline to extract missing, outgoing, and incoming call features for 24 hr ( 00:00:00 to 23:59:59 ) and night ( 00:00:00 to 05:59:59 ) time segments of every day of data of one participant that was monitored on the US East coast with an Android smartphone. Install RAPIDS and make sure your conda environment is active (see Installation ) Download this CSV file and save it as data/external/aware_csv/calls.csv Make the changes listed below for the corresponding Configuration step (we provide an example of what the relevant sections in your config.yml will look like after you are done) Required configuration changes ( click to expand ) Supported data streams . Based on the docs, we decided to use the aware_csv data stream because we are processing aware data saved in a CSV file. We will use this label in a later step; there\u2019s no need to type it or save it anywhere yet. Create your participants file . Since we are processing data from a single participant, you only need to create a single participant file called p01.yaml in data/external/participant_files . This participant file only has a PHONE section because this hypothetical participant was only monitored with a smartphone. Note that for a real analysis, you can do this automatically with a CSV file Add p01 to [PIDS] in config.yaml Create a file in data/external/participant_files/p01.yaml with the following content: PHONE : DEVICE_IDS : [ a748ee1a-1d0b-4ae9-9074-279a2b6ba524 ] # the participant's AWARE device id PLATFORMS : [ android ] # or ios LABEL : MyTestP01 # any string START_DATE : 2020-01-01 # this can also be empty END_DATE : 2021-01-01 # this can also be empty Select what time segments you want to extract features on. Set [TIME_SEGMENTS][FILE] to data/external/timesegments_periodic.csv Create a file in data/external/timesegments_periodic.csv with the following content label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 night,00:00:00,5H 59M 59S,every_day,0 Choose the timezone of your study . We will use the default time zone settings since this example is processing data collected on the US East Coast ( America/New_York ) TIMEZONE : TYPE : SINGLE SINGLE : TZCODE : America/New_York Modify your device data stream configuration Set [PHONE_DATA_STREAMS][USE] to aware_csv . We will use the default value for [PHONE_DATA_STREAMS][aware_csv][FOLDER] since we already stored the test calls CSV file there. Select what sensors and features you want to process. Set [PHONE_CALLS][CONTAINER] to calls.csv in the config.yaml file. Set [PHONE_CALLS][PROVIDERS][RAPIDS][COMPUTE] to True in the config.yaml file. Example of the config.yaml sections after the changes outlined above This will be your config.yaml after following the instructions above. Click on the numbered markers to know more. PIDS : [ p01 ] # (1) TIMEZONE : TYPE : SINGLE # (2) SINGLE : TZCODE : America/New_York # ... other irrelevant sections TIME_SEGMENTS : &time_segments TYPE : PERIODIC # (3) FILE : \"data/external/timesegments_periodic.csv\" # (4) INCLUDE_PAST_PERIODIC_SEGMENTS : FALSE PHONE_DATA_STREAMS : USE : aware_csv # (5) aware_csv : FOLDER : data/external/aware_csv # (6) # ... other irrelevant sections ############## PHONE ########################################################### ################################################################################ # ... other irrelevant sections # Communication call features config, TYPES and FEATURES keys need to match PHONE_CALLS : CONTAINER : calls.csv # (7) PROVIDERS : RAPIDS : COMPUTE : True # (8) CALL_TYPES : ... We added p01 to PIDS after creating the participant file: data/external/participant_files/p01.yaml With the following content: PHONE : DEVICE_IDS : [ a748ee1a-1d0b-4ae9-9074-279a2b6ba524 ] # the participant's AWARE device id PLATFORMS : [ android ] # or ios LABEL : MyTestP01 # any string START_DATE : 2020-01-01 # this can also be empty END_DATE : 2021-01-01 # this can also be empty We use the default SINGLE time zone. We use the default PERIODIC time segment [TYPE] We created this time segments file with these lines: label,start_time,length,repeats_on,repeats_value daily,00:00:00,23H 59M 59S,every_day,0 night,001:00:00,5H 59M 59S,every_day,0 We set [USE] to aware_device to tell RAPIDS to process sensor data collected with the AWARE Framework stored in CSV files. We used the default [FOLDER] for awre_csv since we already stored our test calls.csv file there We changed [CONTAINER] to calls.csv to process our test call data. We flipped [COMPUTE] to True to extract call behavioral features using the RAPIDS feature provider. Run RAPIDS ./rapids -j1 The call features for daily and morning time segments will be in data/processed/features/all_participants/all_sensor_features.csv","title":"Minimal Working Example"}]}
\ No newline at end of file
diff --git a/dev/sitemap.xml.gz b/dev/sitemap.xml.gz
index d98d9936..9c14248d 100644
Binary files a/dev/sitemap.xml.gz and b/dev/sitemap.xml.gz differ
diff --git a/dev/stylesheets/extra.css b/dev/stylesheets/extra.css
index 91707625..331aa97e 100644
--- a/dev/stylesheets/extra.css
+++ b/dev/stylesheets/extra.css
@@ -29,5 +29,4 @@ div[data-md-component=announce]>div#announce-msg>a{
 
 .md-typeset table:not([class]) th {
     min-width: 0rem;
-}
-
+}
\ No newline at end of file

timestamp	device_id	battery_status	battery_level	battery_scale	battery_voltage	battery_temperature	battery_health	battery_technology
00:08:10.415	per_ios	4	80	100	4170	23	2	Li-ion
00:17:38.602	per_ios	4	77	100	4157	23	2	Li-ion
03:20:30.415	per_ios	2	77	100	4170	23	2	Li-ion
03:30:35.875	per_ios	2	80	100	4157	23	2	Li-ion
local_segment	local_segment_label	local_segment_start_datetime	local_segment_end_datetime	phone_battery_rapids_countdischarge	phone_battery_rapids_sumdurationdischarge	phone_battery_rapids_avgconsumptionrate	phone_battery_rapids_maxconsumptionrate	phone_battery_rapids_countcharge	phone_battery_rapids_sumdurationcharge
00:00:00,00:29:59	thirtyminutes0000	2020-07-01 00:00:00	2020-07-01 00:29:59	1	21.8259833333333	0.137450851775292	0.137450851775292	0	0
00:03:00,03:29:59	thirtyminutes0006	2020-07-01 03:00:00	2020-07-01 03:29:59	0	0	0	0	1	9.49288333333333
timestamp	device_id	battery_status	battery_level	battery_scale	battery_voltage	battery_temperature	battery_health	battery_technology
17:59:41.434	per_ios	4	59	100	4094	23	2	Li-ion
18:04:14.321	per_ios	4	58	100	4157	23	2	Li-ion
18:07:24.456	per_ios	4	57	100	4157	23	2	Li-ion
20:03:03.415	per_ios	2	72	100	4170	23	2	Li-ion
20:05:12.434	per_ios	2	73	100	4094	23	2	Li-ion
20:07:24.678	per_ios	2	74	100	4157	23	2	Li-ion
20:10:34.875	per_ios	2	75	100	4157	23	2	Li-ion
21:30:04.415	per_ios	4	74	100	4170	23	2	Li-ion
21:32:14.434	per_ios	4	73	100	4094	23	2	Li-ion
21:35:23.678	per_ios	4	72	100	4157	23	2	Li-ion
21:37:47.875	per_ios	4	71	100	4157	23	2	Li-ion
timestamp	device_id	battery_status	battery_level	battery_scale	battery_voltage	battery_temperature	battery_adaptor	battery_health	battery_technology
20:10:34.875	fre_ios	2	75	100	4157	23	0	2	Li-ion
20:20:17.171	fre_ios	4	74	100	4170	23	0	2	Li-ion
timestamp	device_id	battery_status	battery_level	battery_scale	battery_voltage	battery_temperature	battery_adaptor	battery_health	battery_technology
11:59:28.434	per_ios	2	63	100	4094	23	0	2	Li-ion
12:04:37.678	per_ios	2	64	100	4157	23	0	2	Li-ion
timestamp	device_id	battery_status	battery_level	battery_scale	battery_voltage	battery_temperature	battery_adaptor	battery_health	battery_technology
05:59:49.434	per_ios	4	79	100	4094	23	0	2	Li-ion
06:02:19.321	per_ios	4	78	100	4157	23	0	2	Li-ion
timestamp	device_id	battery_status	battery_level	battery_scale	battery_voltage	battery_temperature	battery_health	battery_technology
2020-07-02 00:03:47.875	per_and	3	63	100	4157	23	2	Li-ion
2020-07-02 00:05:47.875	per_and	3	62	100	4157	23	2	Li-ion
2020-07-02 23:55:47.875	per_and	3	55	100	4157	23	2	Li-ion
2020-07-02 23:59:47.875	per_and	3	54	100	4157	23	2	Li-ion
2020-07-03 00:06:47.875	per_and	3	53	100	4157	23	2	Li-ion
2020-07-03 00:09:47.875	per_and	3	52	100	4157	23	2	Li-ion
2020-07-03 23:47:05.000	per_and	3	60	100	4157	23	2	Li-ion
2020-07-03 23:55:05.000	per_and	3	59	100	4157	23	2	Li-ion
2020-07-04 00:15:05.000	per_and	3	58	100	4157	23	2	Li-ion
2020-07-04 00:18:05.000	per_and	3	57	100	4157	23	2	Li-ion
2020-07-04 23:51:00.000	per_and	3	41	100	4157	23	2	Li-ion
2020-07-04 23:57:00.000	per_and	3	40	100	4157	23	2	Li-ion
2020-07-05 00:21:00.000	per_and	3	39	100	4157	23	2	Li-ion
2020-07-05 00:23:00.000	per_and	3	38	100	4157	23	2	Li-ion
local_segment	local_segment_label	local_segment_start_datetime	local_segment_end_datetime	phone_battery_rapids_countdischarge	phone_battery_rapids_sumdurationdischarge	phone_battery_rapids_avgconsumptionrate	phone_battery_rapids_maxconsumptionrate
threeday#2020-07-02 00:00:00,2020-07-04 23:59:59	threeday	2020-07-02 00:00:00	2020-07-04 23:59:59	4	149.7954	0.0710868450815781	0.111113168762384
threeday#2020-07-03 00:00:00,2020-07-05 23:59:59	threeday	2020-07-03 00:00:00	2020-07-05 23:59:59	3	162.7952	0.0492745931499224	0.0502547286558745
threeday#2020-07-04 00:00:00,2020-07-06 23:59:59	threeday	2020-07-04 00:00:00	2020-07-06 23:59:59	2	110.0815	0.0449915246814979	0.0483879032392475
threeday#2020-07-05 00:00:00,2020-07-07 23:59:59	threeday	2020-07-05 00:00:00	2020-07-07 23:59:59	1	52.9991166666667	0.0377364779979038	0.0377364779979038
label	event_timestamp	length	shift	shift_direction	device_id
survey1	1587661220000	10H	10H	-1	a748ee1a-1d0b-4ae9-9074-279a2b6ba524
survey2	1587661220000	10H	5H	-1	a748ee1a-1d0b-4ae9-9074-279a2b6ba524
survey3	1587661220000	10H	0H	1	a748ee1a-1d0b-4ae9-9074-279a2b6ba524
timestamp	device_id	battery_status	battery_level	battery_scale	battery_voltage	battery_temperature	battery_health	battery_technology
2020-04-23 03:15:00.000	a748ee1a-1d0b-4ae9-9074-279a2b6ba524	3	90	100	4157	23	2	Li-ion
2020-04-23 03:21:00.000	a748ee1a-1d0b-4ae9-9074-279a2b6ba524	3	89	100	4157	23	2	Li-ion
2020-04-23 07:50:00.000	a748ee1a-1d0b-4ae9-9074-279a2b6ba524	3	80	100	4157	23	2	Li-ion
2020-04-23 08:05:00.000	a748ee1a-1d0b-4ae9-9074-279a2b6ba524	3	79	100	4157	23	2	Li-ion
2020-04-23 08:12:00.000	a748ee1a-1d0b-4ae9-9074-279a2b6ba524	3	78	100	4157	23	2	Li-ion
2020-04-23 22:50:00.000	a748ee1a-1d0b-4ae9-9074-279a2b6ba524	3	50	100	4157	23	2	Li-ion
2020-04-23 22:53:00.000	a748ee1a-1d0b-4ae9-9074-279a2b6ba524	3	49	100	4157	23	2	Li-ion