From d529490999567264820a335220fe72b405670c79 Mon Sep 17 00:00:00 2001
From: Meng Li <34143965+Meng6@users.noreply.github.com>
Date: Fri, 12 Mar 2021 12:38:36 -0500
Subject: [PATCH] Migrate fitbit features to new data stream

---
 Snakefile                                     |  5 +-
 docs/features/fitbit-data-yield.md            |  3 +-
 docs/features/fitbit-heartrate-intraday.md    | 28 +---------
 docs/features/fitbit-heartrate-summary.md     | 28 +---------
 docs/features/fitbit-sleep-intraday.md        | 53 ++-----------------
 docs/features/fitbit-sleep-summary.md         | 50 +----------------
 docs/features/fitbit-steps-intraday.md        | 28 +---------
 docs/features/fitbit-steps-summary.md         | 28 +---------
 rules/features.smk                            | 12 ++---
 src/data/datetime/readable_datetime.R         |  2 +-
 src/data/streams/pull_phone_data.R            |  2 +-
 src/data/streams/pull_wearable_data.R         |  2 +-
 .../episodes/sleep_intraday_episodes.py       | 37 +++++++++++++
 13 files changed, 63 insertions(+), 215 deletions(-)
 create mode 100644 src/features/fitbit_sleep_intraday/episodes/sleep_intraday_episodes.py

diff --git a/Snakefile b/Snakefile
index e076c0af..862d14f7 100644
--- a/Snakefile
+++ b/Snakefile
@@ -217,8 +217,7 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
 for provider in config["FITBIT_DATA_YIELD"]["PROVIDERS"].keys():
     if config["FITBIT_DATA_YIELD"]["PROVIDERS"][provider]["COMPUTE"]:
         files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_raw.csv", pid=config["PIDS"]))
-        files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv", pid=config["PIDS"]))
-        files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv", pid=config["PIDS"]))
         files_to_compute.extend(expand("data/processed/features/{pid}/fitbit_data_yield.csv", pid=config["PIDS"]))
         files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
         files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")
@@ -253,6 +252,8 @@ for provider in config["FITBIT_SLEEP_SUMMARY"]["PROVIDERS"].keys():
 for provider in config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"].keys():
     if config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][provider]["COMPUTE"]:
         files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_raw.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv", pid=config["PIDS"]))
+        files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_episodes.csv", pid=config["PIDS"]))
         files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled.csv", pid=config["PIDS"]))
         files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled_with_datetime.csv", pid=config["PIDS"]))
         files_to_compute.extend(expand("data/interim/{pid}/fitbit_sleep_intraday_features/fitbit_sleep_intraday_{language}_{provider_key}.csv", pid=config["PIDS"], language=config["FITBIT_SLEEP_INTRADAY"]["PROVIDERS"][provider]["SRC_LANGUAGE"].lower(), provider_key=provider.lower()))
diff --git a/docs/features/fitbit-data-yield.md b/docs/features/fitbit-data-yield.md
index 0caa6793..0f0751d1 100644
--- a/docs/features/fitbit-data-yield.md
+++ b/docs/features/fitbit-data-yield.md
@@ -23,8 +23,7 @@ Before explaining the data yield features, let's define the following relevant c
 !!! info "File Sequence"
     ```bash
     - data/raw/{pid}/fitbit_heartrate_intraday_raw.csv
-    - data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv
-    - data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv
+    - data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv
     - data/interim/{pid}/fitbit_data_yield_features/fitbit_data_yield_{language}_{provider_key}.csv
     - data/processed/features/{pid}/fitbit_data_yield.csv
     ```
diff --git a/docs/features/fitbit-heartrate-intraday.md b/docs/features/fitbit-heartrate-intraday.md
index d1bd4196..6338d863 100644
--- a/docs/features/fitbit-heartrate-intraday.md
+++ b/docs/features/fitbit-heartrate-intraday.md
@@ -4,30 +4,7 @@ Sensor parameters description for `[FITBIT_HEARTRATE_INTRADAY]`:
 
 |Key&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;            | Description |
 |----------------|-----------------------------------------------------------------------------------------------------------------------------------
-|`[TABLE]`| Database table name or file path where the heart rate intraday data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file.
-
-The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together. 
-
-We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity).
-
-??? example "Example of the structure of source data"
-
-    === "JSON"
-
-        |device_id                                |fitbit_data                                               |
-        |---------------------------------------- |--------------------------------------------------------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"activities-heart":[{"dateTime":"2020-10-07","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1200.6102,"max":88,"min":31,"minutes":1058,"name":"Out of Range"},{"caloriesOut":760.3020,"max":120,"min":86,"minutes":366,"name":"Fat Burn"},{"caloriesOut":15.2048,"max":146,"min":120,"minutes":2,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":72}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":68},{"time":"00:01:00","value":67},{"time":"00:02:00","value":67},...],"datasetInterval":1,"datasetType":"minute"}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}}
-    
-    === "PLAIN_TEXT"
-        All columns are mandatory, however, all except `device_id` and `local_date_time` can be empty if you don't have that data. Just have in mind that some features will be empty if some of these columns are empty.
-
-        |device_id                              |local_date_time        |heartrate |heartrate_zone  |
-        |-------------------------------------- |---------------------- |--------- |--------------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-07 00:00:00    |68        |outofrange      |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-07 00:01:00    |67        |outofrange      |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-07 00:02:00    |67        |outofrange      |
+|`[CONTAINER]`| Container where your heart rate intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. |
 
 
 ## RAPIDS provider
@@ -38,8 +15,7 @@ We provide examples of the input format that RAPIDS expects, note that both exam
 !!! info "File Sequence"
     ```bash
     - data/raw/{pid}/fitbit_heartrate_intraday_raw.csv
-    - data/raw/{pid}/fitbit_heartrate_intraday_parsed.csv
-    - data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv
+    - data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv
     - data/interim/{pid}/fitbit_heartrate_intraday_features/fitbit_heartrate_intraday_{language}_{provider_key}.csv
     - data/processed/features/{pid}/fitbit_heartrate_intraday.csv
     ```
diff --git a/docs/features/fitbit-heartrate-summary.md b/docs/features/fitbit-heartrate-summary.md
index 3a0f341b..ebb7aa41 100644
--- a/docs/features/fitbit-heartrate-summary.md
+++ b/docs/features/fitbit-heartrate-summary.md
@@ -4,30 +4,7 @@ Sensor parameters description for `[FITBIT_HEARTRATE_SUMMARY]`:
 
 |Key&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;            | Description |
 |----------------|-----------------------------------------------------------------------------------------------------------------------------------
-|`[TABLE]`| Database table name or file path where the heart rate summary data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file.
-
-The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together. 
-
-We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity).
-
-??? example "Example of the structure of source data"
-
-    === "JSON"
-
-        |device_id                                |fitbit_data                                               |
-        |---------------------------------------- |--------------------------------------------------------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"activities-heart":[{"dateTime":"2020-10-07","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1200.6102,"max":88,"min":31,"minutes":1058,"name":"Out of Range"},{"caloriesOut":760.3020,"max":120,"min":86,"minutes":366,"name":"Fat Burn"},{"caloriesOut":15.2048,"max":146,"min":120,"minutes":2,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":72}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":68},{"time":"00:01:00","value":67},{"time":"00:02:00","value":67},...],"datasetInterval":1,"datasetType":"minute"}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"activities-heart":[{"dateTime":"2020-10-08","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":1100.1120,"max":89,"min":30,"minutes":921,"name":"Out of Range"},{"caloriesOut":660.0012,"max":118,"min":82,"minutes":361,"name":"Fat Burn"},{"caloriesOut":23.7088,"max":142,"min":108,"minutes":3,"name":"Cardio"},{"caloriesOut":0,"max":221,"min":148,"minutes":0,"name":"Peak"}],"restingHeartRate":70}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":77},{"time":"00:01:00","value":75},{"time":"00:02:00","value":73},...],"datasetInterval":1,"datasetType":"minute"}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"activities-heart":[{"dateTime":"2020-10-09","value":{"customHeartRateZones":[],"heartRateZones":[{"caloriesOut":750.3615,"max":77,"min":30,"minutes":851,"name":"Out of Range"},{"caloriesOut":734.1516,"max":107,"min":77,"minutes":550,"name":"Fat Burn"},{"caloriesOut":131.8579,"max":130,"min":107,"minutes":29,"name":"Cardio"},{"caloriesOut":0,"max":220,"min":130,"minutes":0,"name":"Peak"}],"restingHeartRate":69}}],"activities-heart-intraday":{"dataset":[{"time":"00:00:00","value":90},{"time":"00:01:00","value":89},{"time":"00:02:00","value":88},...],"datasetInterval":1,"datasetType":"minute"}}
-    
-    === "PLAIN_TEXT"
-        All columns are mandatory, however, all except `device_id` and `local_date_time` can be empty if you don't have that data. Just have in mind that some features will be empty if some of these columns are empty.
-
-        |device_id                              |local_date_time   |heartrate_daily_restinghr |heartrate_daily_caloriesoutofrange  |heartrate_daily_caloriesfatburn  |heartrate_daily_caloriescardio  |heartrate_daily_caloriespeak   |
-        |-------------------------------------- |----------------- |------- |-------------- |------------- |------------ |-------|
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-07        |72      |1200.6102      |760.3020      |15.2048      |0      |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-08        |70      |1100.1120      |660.0012      |23.7088      |0      |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-09        |69      |750.3615       |734.1516      |131.8579     |0      |
+|`[CONTAINER]`| Container where your heart rate summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. |
 
 
 ## RAPIDS provider
@@ -38,8 +15,7 @@ We provide examples of the input format that RAPIDS expects, note that both exam
 !!! info "File Sequence"
     ```bash
     - data/raw/{pid}/fitbit_heartrate_summary_raw.csv
-    - data/raw/{pid}/fitbit_heartrate_summary_parsed.csv
-    - data/raw/{pid}/fitbit_heartrate_summary_parsed_with_datetime.csv
+    - data/raw/{pid}/fitbit_heartrate_summary_with_datetime.csv
     - data/interim/{pid}/fitbit_heartrate_summary_features/fitbit_heartrate_summary_{language}_{provider_key}.csv
     - data/processed/features/{pid}/fitbit_heartrate_summary.csv
     ```
diff --git a/docs/features/fitbit-sleep-intraday.md b/docs/features/fitbit-sleep-intraday.md
index ec679850..9661eb2e 100644
--- a/docs/features/fitbit-sleep-intraday.md
+++ b/docs/features/fitbit-sleep-intraday.md
@@ -4,55 +4,7 @@ Sensor parameters description for `[FITBIT_SLEEP_INTRADAY]`:
 
 |Key&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;            | Description |
 |----------------|-----------------------------------------------------------------------------------------------------------------------------------
-|`[TABLE]`| Database table name or file path where the sleep intraday data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file.
-
-
-The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together. 
-
-We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity).
-
-??? example "Example of the structure of source data with Fitbit’s sleep API Version 1"
-
-    === "JSON"
-
-        |device_id                                |fitbit_data                                               |
-        |---------------------------------------- |--------------------------------------------------------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep": [{"awakeCount": 2, "awakeDuration": 3, "awakeningsCount": 10, "dateOfSleep": "2020-10-07", "duration": 8100000, "efficiency": 91, "endTime": "2020-10-07T18:10:00.000", "isMainSleep": true, "logId": 14147921940, "minuteData": [{"dateTime": "15:55:00", "value": "3"}, {"dateTime": "15:56:00", "value": "3"}, {"dateTime": "15:57:00", "value": "2"},...], "minutesAfterWakeup": 0, "minutesAsleep": 123, "minutesAwake": 12, "minutesToFallAsleep": 0, "restlessCount": 8, "restlessDuration": 9, "startTime": "2020-10-07T15:55:00.000", "timeInBed": 135}, {"awakeCount": 0, "awakeDuration": 0, "awakeningsCount": 1, "dateOfSleep": "2020-10-07", "duration": 3780000, "efficiency": 100, "endTime": "2020-10-07T10:52:30.000", "isMainSleep": false, "logId": 14144903977, "minuteData": [{"dateTime": "09:49:00", "value": "1"}, {"dateTime": "09:50:00", "value": "1"}, {"dateTime": "09:51:00", "value": "1"},...], "minutesAfterWakeup": 1, "minutesAsleep": 62, "minutesAwake": 0, "minutesToFallAsleep": 0, "restlessCount": 1, "restlessDuration": 1, "startTime": "2020-10-07T09:49:00.000", "timeInBed": 63}], "summary": {"totalMinutesAsleep": 185, "totalSleepRecords": 2, "totalTimeInBed": 198}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep": [{"awakeCount": 3, "awakeDuration": 21, "awakeningsCount": 16, "dateOfSleep": "2020-10-08", "duration": 19260000, "efficiency": 89, "endTime": "2020-10-08T06:01:30.000", "isMainSleep": true, "logId": 14150613895, "minuteData": [{"dateTime": "00:40:00", "value": "3"}, {"dateTime": "00:41:00", "value": "3"}, {"dateTime": "00:42:00", "value": "3"},...], "minutesAfterWakeup": 0, "minutesAsleep": 275, "minutesAwake": 33, "minutesToFallAsleep": 0, "restlessCount": 13, "restlessDuration": 25, "startTime": "2020-10-08T00:40:00.000", "timeInBed": 321}], "summary": {"totalMinutesAsleep": 275, "totalSleepRecords": 1, "totalTimeInBed": 321}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep": [{"awakeCount": 1, "awakeDuration": 3, "awakeningsCount": 8, "dateOfSleep": "2020-10-09", "duration": 19320000, "efficiency": 96, "endTime": "2020-10-09T05:57:30.000", "isMainSleep": true, "logId": 14161136803, "minuteData": [{"dateTime": "00:35:30", "value": "2"}, {"dateTime": "00:36:30", "value": "1"}, {"dateTime": "00:37:30", "value": "1"},...], "minutesAfterWakeup": 0, "minutesAsleep": 309, "minutesAwake": 13, "minutesToFallAsleep": 0, "restlessCount": 7, "restlessDuration": 10, "startTime": "2020-10-09T00:35:30.000", "timeInBed": 322}], "summary": {"totalMinutesAsleep": 309, "totalSleepRecords": 1, "totalTimeInBed": 322}}
-    
-    === "PLAIN_TEXT"
-
-        All columns are mandatory, however, all except `device_id`, `local_date_time` and `duration` can be empty if you don't have that data. Just have in mind that some features might be inaccurate or empty as `type_episode_id`, `level`, `is_main_sleep`, and `type` are used for sleep episodes extraction. `type_episode_id` is based on where it is extracted: if it is extracted from the 1st "minutesData" block, the `type_episode_id` field will be 0. Similarly, the kth block will be k-1. Actually, you only need to make sure rows extracted from the same "minutesData" block are assigned with the same unique `type_episode_id` value.
-
-        |device_id                             |type_episode_id  |local_date_time     |duration  |level      |is_main_sleep  |type           |
-        |------------------------------------  |---------------- |------------------- |--------- |---------- |-------------- |-------------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524  |0                |2020-10-07 15:55:00 |60        |awake      |0              |classic        |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524  |0                |2020-10-07 15:56:00 |60        |awake      |0              |classic        |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524  |0                |2020-10-07 15:57:00 |60        |restless   |0              |classic        |
-
-??? example "Example of the structure of source data with Fitbit’s sleep API Version 1.2"
-
-    === "JSON"
-
-        |device_id                                |fitbit_data                                               |
-        |---------------------------------------- |--------------------------------------------------------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep":[{"dateOfSleep":"2020-10-10","duration":3600000,"efficiency":92,"endTime":"2020-10-10T16:37:00.000","infoCode":2,"isMainSleep":false,"levels":{"data":[{"dateTime":"2020-10-10T15:36:30.000","level":"restless","seconds":60},{"dateTime":"2020-10-10T15:37:30.000","level":"asleep","seconds":660},{"dateTime":"2020-10-10T15:48:30.000","level":"restless","seconds":60},...], "summary":{"asleep":{"count":0,"minutes":56},"awake":{"count":0,"minutes":0},"restless":{"count":3,"minutes":4}}},"logId":26315914306,"minutesAfterWakeup":0,"minutesAsleep":55,"minutesAwake":5,"minutesToFallAsleep":0,"startTime":"2020-10-10T15:36:30.000","timeInBed":60,"type":"classic"},{"dateOfSleep":"2020-10-10","duration":22980000,"efficiency":88,"endTime":"2020-10-10T08:10:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-10T01:46:30.000","level":"light","seconds":420},{"dateTime":"2020-10-10T01:53:30.000","level":"deep","seconds":1230},{"dateTime":"2020-10-10T02:14:00.000","level":"light","seconds":360},...], "summary":{"deep":{"count":3,"minutes":92,"thirtyDayAvgMinutes":0},"light":{"count":29,"minutes":193,"thirtyDayAvgMinutes":0},"rem":{"count":4,"minutes":33,"thirtyDayAvgMinutes":0},"wake":{"count":28,"minutes":65,"thirtyDayAvgMinutes":0}}},"logId":26311786557,"minutesAfterWakeup":0,"minutesAsleep":318,"minutesAwake":65,"minutesToFallAsleep":0,"startTime":"2020-10-10T01:46:30.000","timeInBed":383,"type":"stages"}],"summary":{"stages":{"deep":92,"light":193,"rem":33,"wake":65},"totalMinutesAsleep":373,"totalSleepRecords":2,"totalTimeInBed":443}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep":[{"dateOfSleep":"2020-10-11","duration":41640000,"efficiency":89,"endTime":"2020-10-11T11:47:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-11T00:12:30.000","level":"wake","seconds":450},{"dateTime":"2020-10-11T00:20:00.000","level":"light","seconds":870},{"dateTime":"2020-10-11T00:34:30.000","level":"wake","seconds":780},...], "summary":{"deep":{"count":4,"minutes":52,"thirtyDayAvgMinutes":62},"light":{"count":32,"minutes":442,"thirtyDayAvgMinutes":364},"rem":{"count":6,"minutes":68,"thirtyDayAvgMinutes":58},"wake":{"count":29,"minutes":132,"thirtyDayAvgMinutes":94}}},"logId":26589710670,"minutesAfterWakeup":1,"minutesAsleep":562,"minutesAwake":132,"minutesToFallAsleep":0,"startTime":"2020-10-11T00:12:30.000","timeInBed":694,"type":"stages"}],"summary":{"stages":{"deep":52,"light":442,"rem":68,"wake":132},"totalMinutesAsleep":562,"totalSleepRecords":1,"totalTimeInBed":694}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep":[{"dateOfSleep":"2020-10-12","duration":28980000,"efficiency":93,"endTime":"2020-10-12T09:34:30.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-12T01:31:00.000","level":"wake","seconds":600},{"dateTime":"2020-10-12T01:41:00.000","level":"light","seconds":60},{"dateTime":"2020-10-12T01:42:00.000","level":"deep","seconds":2340},...], "summary":{"deep":{"count":4,"minutes":63,"thirtyDayAvgMinutes":59},"light":{"count":27,"minutes":257,"thirtyDayAvgMinutes":364},"rem":{"count":5,"minutes":94,"thirtyDayAvgMinutes":58},"wake":{"count":24,"minutes":69,"thirtyDayAvgMinutes":95}}},"logId":26589710673,"minutesAfterWakeup":0,"minutesAsleep":415,"minutesAwake":68,"minutesToFallAsleep":0,"startTime":"2020-10-12T01:31:00.000","timeInBed":483,"type":"stages"}],"summary":{"stages":{"deep":63,"light":257,"rem":94,"wake":69},"totalMinutesAsleep":415,"totalSleepRecords":1,"totalTimeInBed":483}}
-    
-    === "PLAIN_TEXT"
-
-        All columns are mandatory, however, all except `device_id`, `local_date_time` and `duration` can be empty if you don't have that data. Just have in mind that some features might be inaccurate or empty as `type_episode_id`, `level`, `is_main_sleep`, and `type` are used for sleep episodes extraction. `type_episode_id` is based on where it is extracted: if it is extracted from the 1st "data" and "shortData" block, the `type_episode_id` field will be 0. Similarly, the kth block will be k-1. Actually, you only need to make sure rows extracted from the same "minutesData" block are assigned with the same unique `type_episode_id` value.
-
-        |device_id                              |type_episode_id  |local_date_time     |duration  |level      |is_main_sleep  |type           |
-        |------------------------------------   |---------------- |------------------- |--------- |---------- |-------------- |-------------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |0                |2020-10-10 15:36:30 |60        |restless   |0              |classic        |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |0                |2020-10-10 15:37:30 |660       |asleep     |0              |classic        |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |0                |2020-10-10 15:48:30 |60        |restless   |0              |classic        |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |...              |...                 |...       |...        |...            |...            |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |1                |2020-10-10 01:46:30 |420       |light      |1              |stages         |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |1                |2020-10-10 01:53:30 |1230      |deep       |1              |stages         |
+|`[CONTAINER]`| Container where your sleep intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. |
 
 ## RAPIDS provider
 
@@ -62,7 +14,8 @@ We provide examples of the input format that RAPIDS expects, note that both exam
 !!! info "File Sequence"
     ```bash
     - data/raw/{pid}/fitbit_sleep_intraday_raw.csv
-    - data/raw/{pid}/fitbit_sleep_intraday_parsed.csv
+    - data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv
+    - data/interim/{pid}/fitbit_sleep_intraday_episodes.csv
     - data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled.csv
     - data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled_with_datetime.csv
     - data/interim/{pid}/fitbit_sleep_intraday_features/fitbit_sleep_intraday_{language}_{provider_key}.csv
diff --git a/docs/features/fitbit-sleep-summary.md b/docs/features/fitbit-sleep-summary.md
index 8e7f3834..515cb6d8 100644
--- a/docs/features/fitbit-sleep-summary.md
+++ b/docs/features/fitbit-sleep-summary.md
@@ -4,52 +4,7 @@ Sensor parameters description for `[FITBIT_SLEEP_SUMMARY]`:
 
 |Key&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;            | Description |
 |----------------|-----------------------------------------------------------------------------------------------------------------------------------
-|`[TABLE]`| Database table name or file path where the sleep summary data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file.
-
-The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together. 
-
-We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity).
-
-??? example "Example of the structure of source data with Fitbit’s sleep API Version 1"
-
-    === "JSON"
-
-        |device_id                                |fitbit_data                                               |
-        |---------------------------------------- |--------------------------------------------------------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep": [{"awakeCount": 2, "awakeDuration": 3, "awakeningsCount": 10, "dateOfSleep": "2020-10-07", "duration": 8100000, "efficiency": 91, "endTime": "2020-10-07T18:10:00.000", "isMainSleep": true, "logId": 14147921940, "minuteData": [{"dateTime": "15:55:00", "value": "3"}, {"dateTime": "15:56:00", "value": "3"}, {"dateTime": "15:57:00", "value": "2"},...], "minutesAfterWakeup": 0, "minutesAsleep": 123, "minutesAwake": 12, "minutesToFallAsleep": 0, "restlessCount": 8, "restlessDuration": 9, "startTime": "2020-10-07T15:55:00.000", "timeInBed": 135}, {"awakeCount": 0, "awakeDuration": 0, "awakeningsCount": 1, "dateOfSleep": "2020-10-07", "duration": 3780000, "efficiency": 100, "endTime": "2020-10-07T10:52:30.000", "isMainSleep": false, "logId": 14144903977, "minuteData": [{"dateTime": "09:49:00", "value": "1"}, {"dateTime": "09:50:00", "value": "1"}, {"dateTime": "09:51:00", "value": "1"},...], "minutesAfterWakeup": 1, "minutesAsleep": 62, "minutesAwake": 0, "minutesToFallAsleep": 0, "restlessCount": 1, "restlessDuration": 1, "startTime": "2020-10-07T09:49:00.000", "timeInBed": 63}], "summary": {"totalMinutesAsleep": 185, "totalSleepRecords": 2, "totalTimeInBed": 198}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep": [{"awakeCount": 3, "awakeDuration": 21, "awakeningsCount": 16, "dateOfSleep": "2020-10-08", "duration": 19260000, "efficiency": 89, "endTime": "2020-10-08T06:01:30.000", "isMainSleep": true, "logId": 14150613895, "minuteData": [{"dateTime": "00:40:00", "value": "3"}, {"dateTime": "00:41:00", "value": "3"}, {"dateTime": "00:42:00", "value": "3"},...], "minutesAfterWakeup": 0, "minutesAsleep": 275, "minutesAwake": 33, "minutesToFallAsleep": 0, "restlessCount": 13, "restlessDuration": 25, "startTime": "2020-10-08T00:40:00.000", "timeInBed": 321}], "summary": {"totalMinutesAsleep": 275, "totalSleepRecords": 1, "totalTimeInBed": 321}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep": [{"awakeCount": 1, "awakeDuration": 3, "awakeningsCount": 8, "dateOfSleep": "2020-10-09", "duration": 19320000, "efficiency": 96, "endTime": "2020-10-09T05:57:30.000", "isMainSleep": true, "logId": 14161136803, "minuteData": [{"dateTime": "00:35:30", "value": "2"}, {"dateTime": "00:36:30", "value": "1"}, {"dateTime": "00:37:30", "value": "1"},...], "minutesAfterWakeup": 0, "minutesAsleep": 309, "minutesAwake": 13, "minutesToFallAsleep": 0, "restlessCount": 7, "restlessDuration": 10, "startTime": "2020-10-09T00:35:30.000", "timeInBed": 322}], "summary": {"totalMinutesAsleep": 309, "totalSleepRecords": 1, "totalTimeInBed": 322}}
-    
-    === "PLAIN_TEXT"
-
-        All columns are mandatory, however, all except `device_id` and `local_date_time` can be empty if you don't have that data. Just have in mind that some features will be empty if some of these columns are empty.
-        
-        |device_id                              |local_start_date_time  |local_end_date_time    |efficiency  |minutes_after_wakeup  |minutes_asleep  |minutes_awake  |minutes_to_fall_asleep  |minutes_in_bed  |is_main_sleep  |type     |count_awake |duration_awake  |count_awakenings  |count_restless  |duration_restless  |
-        |-------------------------------------- |---------------------- |---------------------- |----------- |--------------------- |--------------- |-------------- |----------------------- |--------------- |-------------- |-------- |----------- |--------------- |----------------- |--------------- |------------------ |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-07 15:55:00    |2020-10-07 18:10:00    |91          |0                     |123             |12             |0                       |135             |1              |classic  |2           |3               |10                |8               |9                  |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-07 09:49:00    |2020-10-07 10:52:30    |100         |1                     |62              |0              |0                       |63              |0              |classic  |0           |0               |1                 |1               |1                  |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-08 00:40:00    |2020-10-08 06:01:30    |89          |0                     |275             |33             |0                       |321             |1              |classic  |3           |21              |16                |13              |25                 |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-09 00:35:30    |2020-10-09 05:57:30    |96          |0                     |309             |13             |0                       |322             |1              |classic  |1           |3               |8                 |7               |10                 |
-
-??? example "Example of the structure of source data with Fitbit’s sleep API Version 1.2"
-
-    === "JSON"
-
-        |device_id                                |fitbit_data                                               |
-        |---------------------------------------- |--------------------------------------------------------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep":[{"dateOfSleep":"2020-10-10","duration":3600000,"efficiency":92,"endTime":"2020-10-10T16:37:00.000","infoCode":2,"isMainSleep":false,"levels":{"data":[{"dateTime":"2020-10-10T15:36:30.000","level":"restless","seconds":60},{"dateTime":"2020-10-10T15:37:30.000","level":"asleep","seconds":660},{"dateTime":"2020-10-10T15:48:30.000","level":"restless","seconds":60},...], "summary":{"asleep":{"count":0,"minutes":56},"awake":{"count":0,"minutes":0},"restless":{"count":3,"minutes":4}}},"logId":26315914306,"minutesAfterWakeup":0,"minutesAsleep":55,"minutesAwake":5,"minutesToFallAsleep":0,"startTime":"2020-10-10T15:36:30.000","timeInBed":60,"type":"classic"},{"dateOfSleep":"2020-10-10","duration":22980000,"efficiency":88,"endTime":"2020-10-10T08:10:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-10T01:46:30.000","level":"light","seconds":420},{"dateTime":"2020-10-10T01:53:30.000","level":"deep","seconds":1230},{"dateTime":"2020-10-10T02:14:00.000","level":"light","seconds":360},...], "summary":{"deep":{"count":3,"minutes":92,"thirtyDayAvgMinutes":0},"light":{"count":29,"minutes":193,"thirtyDayAvgMinutes":0},"rem":{"count":4,"minutes":33,"thirtyDayAvgMinutes":0},"wake":{"count":28,"minutes":65,"thirtyDayAvgMinutes":0}}},"logId":26311786557,"minutesAfterWakeup":0,"minutesAsleep":318,"minutesAwake":65,"minutesToFallAsleep":0,"startTime":"2020-10-10T01:46:30.000","timeInBed":383,"type":"stages"}],"summary":{"stages":{"deep":92,"light":193,"rem":33,"wake":65},"totalMinutesAsleep":373,"totalSleepRecords":2,"totalTimeInBed":443}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep":[{"dateOfSleep":"2020-10-11","duration":41640000,"efficiency":89,"endTime":"2020-10-11T11:47:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-11T00:12:30.000","level":"wake","seconds":450},{"dateTime":"2020-10-11T00:20:00.000","level":"light","seconds":870},{"dateTime":"2020-10-11T00:34:30.000","level":"wake","seconds":780},...], "summary":{"deep":{"count":4,"minutes":52,"thirtyDayAvgMinutes":62},"light":{"count":32,"minutes":442,"thirtyDayAvgMinutes":364},"rem":{"count":6,"minutes":68,"thirtyDayAvgMinutes":58},"wake":{"count":29,"minutes":132,"thirtyDayAvgMinutes":94}}},"logId":26589710670,"minutesAfterWakeup":1,"minutesAsleep":562,"minutesAwake":132,"minutesToFallAsleep":0,"startTime":"2020-10-11T00:12:30.000","timeInBed":694,"type":"stages"}],"summary":{"stages":{"deep":52,"light":442,"rem":68,"wake":132},"totalMinutesAsleep":562,"totalSleepRecords":1,"totalTimeInBed":694}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep":[{"dateOfSleep":"2020-10-12","duration":28980000,"efficiency":93,"endTime":"2020-10-12T09:34:30.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-12T01:31:00.000","level":"wake","seconds":600},{"dateTime":"2020-10-12T01:41:00.000","level":"light","seconds":60},{"dateTime":"2020-10-12T01:42:00.000","level":"deep","seconds":2340},...], "summary":{"deep":{"count":4,"minutes":63,"thirtyDayAvgMinutes":59},"light":{"count":27,"minutes":257,"thirtyDayAvgMinutes":364},"rem":{"count":5,"minutes":94,"thirtyDayAvgMinutes":58},"wake":{"count":24,"minutes":69,"thirtyDayAvgMinutes":95}}},"logId":26589710673,"minutesAfterWakeup":0,"minutesAsleep":415,"minutesAwake":68,"minutesToFallAsleep":0,"startTime":"2020-10-12T01:31:00.000","timeInBed":483,"type":"stages"}],"summary":{"stages":{"deep":63,"light":257,"rem":94,"wake":69},"totalMinutesAsleep":415,"totalSleepRecords":1,"totalTimeInBed":483}}
-    
-    === "PLAIN_TEXT"
-        All columns are mandatory, however, all except `device_id` and `local_date_time` can be empty if you don't have that data. Just have in mind that some features will be empty if some of these columns are empty.
-
-        |device_id                              |local_start_date_time  |local_end_date_time    |efficiency  |minutes_after_wakeup  |minutes_asleep  |minutes_awake  |minutes_to_fall_asleep  |minutes_in_bed  |is_main_sleep  |type     |
-        |-------------------------------------- |---------------------- |---------------------- |----------- |--------------------- |--------------- |-------------- |----------------------- |--------------- |-------------- |-------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-10 15:36:30    |2020-10-10 16:37:00    |92          |0                     |55              |5              |0                       |60              |0              |classic  |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-10 01:46:30    |2020-10-10 08:10:00    |88          |0                     |318             |65             |0                       |383             |1              |stages   |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-11 00:12:30    |2020-10-11 11:47:00    |89          |1                     |562             |132            |0                       |694             |1              |stages   |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-12 01:31:00    |2020-10-12 09:34:30    |93          |0                     |415             |68             |0                       |483             |1              |stages   |
+|`[CONTAINER]`| Container where your sleep summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. |
 
 
 ## RAPIDS provider
@@ -60,8 +15,7 @@ We provide examples of the input format that RAPIDS expects, note that both exam
 !!! info "File Sequence"
     ```bash
     - data/raw/{pid}/fitbit_sleep_summary_raw.csv
-    - data/raw/{pid}/fitbit_sleep_summary_parsed.csv
-    - data/raw/{pid}/fitbit_sleep_summary_parsed_with_datetime.csv
+    - data/raw/{pid}/fitbit_sleep_summary_with_datetime.csv
     - data/interim/{pid}/fitbit_sleep_summary_features/fitbit_sleep_summary_{language}_{provider_key}.csv
     - data/processed/features/{pid}/fitbit_sleep_summary.csv
     ```
diff --git a/docs/features/fitbit-steps-intraday.md b/docs/features/fitbit-steps-intraday.md
index 63f22dbe..4b9ab6f9 100644
--- a/docs/features/fitbit-steps-intraday.md
+++ b/docs/features/fitbit-steps-intraday.md
@@ -4,30 +4,7 @@ Sensor parameters description for `[FITBIT_STEPS_INTRADAY]`:
 
 |Key&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;            | Description |
 |----------------|-----------------------------------------------------------------------------------------------------------------------------------
-|`[TABLE]`| Database table name or file path where the steps intraday data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file.
-
-The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together. 
-
-We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity).
-
-??? example "Example of the structure of source data"
-
-    === "JSON"
-
-        |device_id                                |fitbit_data                                               |
-        |---------------------------------------- |--------------------------------------------------------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |"activities-steps":[{"dateTime":"2020-10-07","value":"1775"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":5},{"time":"00:01:00","value":3},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |"activities-steps":[{"dateTime":"2020-10-08","value":"3201"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":14},{"time":"00:01:00","value":11},{"time":"00:02:00","value":10},...],"datasetInterval":1,"datasetType":"minute"}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |"activities-steps":[{"dateTime":"2020-10-09","value":"998"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":0},{"time":"00:01:00","value":0},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}}
-    
-    === "PLAIN_TEXT"
-        All columns are mandatory.
-
-        |device_id                              |local_date_time        |steps     |
-        |-------------------------------------- |---------------------- |--------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-07 00:00:00    |5         |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-07 00:01:00    |3         |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-07 00:02:00    |0         |
+|`[CONTAINER]`| Container where your steps intraday data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. |
 
 
 ## RAPIDS provider
@@ -38,8 +15,7 @@ We provide examples of the input format that RAPIDS expects, note that both exam
 !!! info "File Sequence"
     ```bash
     - data/raw/{pid}/fitbit_steps_intraday_raw.csv
-    - data/raw/{pid}/fitbit_steps_intraday_parsed.csv
-    - data/raw/{pid}/fitbit_steps_intraday_parsed_with_datetime.csv
+    - data/raw/{pid}/fitbit_steps_intraday_with_datetime.csv
     - data/interim/{pid}/fitbit_steps_intraday_features/fitbit_steps_intraday_{language}_{provider_key}.csv
     - data/processed/features/{pid}/fitbit_steps_intraday.csv
     ```
diff --git a/docs/features/fitbit-steps-summary.md b/docs/features/fitbit-steps-summary.md
index eca48336..452b9405 100644
--- a/docs/features/fitbit-steps-summary.md
+++ b/docs/features/fitbit-steps-summary.md
@@ -4,30 +4,7 @@ Sensor parameters description for `[FITBIT_STEPS_SUMMARY]`:
 
 |Key&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;            | Description |
 |----------------|-----------------------------------------------------------------------------------------------------------------------------------
-|`[TABLE]`| Database table name or file path where the steps summary data is stored. The configuration keys in [Device Data Source Configuration](../../setup/configuration/#device-data-source-configuration) control whether this parameter is interpreted as table or file.
-
-The format of the column(s) containing the Fitbit sensor data can be `JSON` or `PLAIN_TEXT`. The data in `JSON` format is obtained directly from the Fitbit API. We support `PLAIN_TEXT` in case you already parsed your data and don't have access to your participants' Fitbit accounts anymore. If your data is in `JSON` format then summary and intraday data come packed together. 
-
-We provide examples of the input format that RAPIDS expects, note that both examples for `JSON` and `PLAIN_TEXT` are tabular and the actual format difference comes in the `fitbit_data` column (we truncate the `JSON` example for brevity).
-
-??? example "Example of the structure of source data"
-
-    === "JSON"
-
-        |device_id                                |fitbit_data                                               |
-        |---------------------------------------- |--------------------------------------------------------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |"activities-steps":[{"dateTime":"2020-10-07","value":"1775"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":5},{"time":"00:01:00","value":3},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |"activities-steps":[{"dateTime":"2020-10-08","value":"3201"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":14},{"time":"00:01:00","value":11},{"time":"00:02:00","value":10},...],"datasetInterval":1,"datasetType":"minute"}}
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |"activities-steps":[{"dateTime":"2020-10-09","value":"998"}],"activities-steps-intraday":{"dataset":[{"time":"00:00:00","value":0},{"time":"00:01:00","value":0},{"time":"00:02:00","value":0},...],"datasetInterval":1,"datasetType":"minute"}}
-    
-    === "PLAIN_TEXT"
-        All columns are mandatory.
-
-        |device_id                              |local_date_time        |steps     |
-        |-------------------------------------- |---------------------- |--------- |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-07             |1775      |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-08             |3201      |
-        |a748ee1a-1d0b-4ae9-9074-279a2b6ba524   |2020-10-09             |998       |
+|`[CONTAINER]`| Container where your steps summary data is stored, depending on the data stream you are using this can be a database table, a CSV file, etc. |
 
 
 ## RAPIDS provider
@@ -38,8 +15,7 @@ We provide examples of the input format that RAPIDS expects, note that both exam
 !!! info "File Sequence"
     ```bash
     - data/raw/{pid}/fitbit_steps_summary_raw.csv
-    - data/raw/{pid}/fitbit_steps_summary_parsed.csv
-    - data/raw/{pid}/fitbit_steps_summary_parsed_with_datetime.csv
+    - data/raw/{pid}/fitbit_steps_summary_with_datetime.csv
     - data/interim/{pid}/fitbit_steps_summary_features/fitbit_steps_summary_{language}_{provider_key}.csv
     - data/processed/features/{pid}/fitbit_steps_summary.csv
     ```
diff --git a/rules/features.smk b/rules/features.smk
index c13a4cd9..a2105145 100644
--- a/rules/features.smk
+++ b/rules/features.smk
@@ -506,7 +506,7 @@ rule phone_wifi_visible_r_features:
 
 rule fitbit_data_yield_python_features:
     input:
-        sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv",
+        sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv",
         time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
     params:
         provider = lambda wildcards: config["FITBIT_DATA_YIELD"]["PROVIDERS"][wildcards.provider_key.upper()],
@@ -519,7 +519,7 @@ rule fitbit_data_yield_python_features:
 
 rule fitbit_data_yield_r_features:
     input:
-        sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_parsed_with_datetime.csv",
+        sensor_data = "data/raw/{pid}/fitbit_heartrate_intraday_with_datetime.csv",
         time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
     params:
         provider = lambda wildcards: config["FITBIT_DATA_YIELD"]["PROVIDERS"][wildcards.provider_key.upper()],
@@ -660,13 +660,13 @@ rule fitbit_sleep_summary_r_features:
     script:
         "../src/features/entry.R"
 
-rule resample_sleep_episodes:
+rule sleep_intraday_episodes:
     input:
-        "data/raw/{pid}/fitbit_sleep_intraday_parsed.csv"
+        sleep_intraday = "data/raw/{pid}/fitbit_sleep_intraday_with_datetime.csv"
     output:
-        "data/interim/{pid}/fitbit_sleep_intraday_episodes_resampled.csv"
+        "data/interim/{pid}/fitbit_sleep_intraday_episodes.csv"
     script:
-        "../src/features/utils/resample_episodes.R"
+        "../src/features/fitbit_sleep_intraday/episodes/sleep_intraday_episodes.py"
 
 rule fitbit_sleep_intraday_python_features:
     input:
diff --git a/src/data/datetime/readable_datetime.R b/src/data/datetime/readable_datetime.R
index 117c94e6..86e57b95 100644
--- a/src/data/datetime/readable_datetime.R
+++ b/src/data/datetime/readable_datetime.R
@@ -49,7 +49,7 @@ validate_user_timezones <- function(timezone_parameters){
 }
 
 create_mising_temporal_column <- function(data, device_type){
-  if(device_type == "fitbit"){
+  if(device_type == "fitbit" && all(data$timestamp == 0)){
       # For fibit we infere timestamp from Fitbit's local date time
       if(nrow(data) == 0)
         return(data %>% mutate(timestamp = NA_real_))
diff --git a/src/data/streams/pull_phone_data.R b/src/data/streams/pull_phone_data.R
index b0ba2244..f94f6a0b 100644
--- a/src/data/streams/pull_phone_data.R
+++ b/src/data/streams/pull_phone_data.R
@@ -160,7 +160,7 @@ pull_phone_data <- function(){
 
     if(!setequal(expected_columns, colnames(mutated_data)))
       stop(paste0("The mutated data for ", device, " does not have the columns RAPIDS expects. The mutation script returned [", paste(colnames(mutated_data), collapse=","),"] but RAPIDS expected [",paste(expected_columns, collapse=","), "]. One ore more mutation scripts in [", sensor,"][MUTATION][SCRIPTS] are adding extra columns or removing or not adding the ones expected"))
-    participant_data <- rbind(participant_data, mutated_data)
+    participant_data <- rbind(participant_data, mutated_data %>% distinct())
       
   }
   participant_data <- participant_data %>% arrange(timestamp)
diff --git a/src/data/streams/pull_wearable_data.R b/src/data/streams/pull_wearable_data.R
index 50184a3b..9f744231 100644
--- a/src/data/streams/pull_wearable_data.R
+++ b/src/data/streams/pull_wearable_data.R
@@ -111,7 +111,7 @@ pull_wearable_data_main <- function(){
 
     if(!setequal(expected_columns, colnames(mutated_data)))
       stop(paste0("The mutated data for ", device, " does not have the columns RAPIDS expects. The mutation script returned [", paste(colnames(mutated_data), collapse=","),"] but RAPIDS expected [",paste(expected_columns, collapse=","), "]. One ore more mutation scripts in [", sensor,"][MUTATION][SCRIPTS] are adding extra columns or removing or not adding the ones expected"))
-    participant_data <- rbind(participant_data, mutated_data)
+    participant_data <- rbind(participant_data, mutated_data %>% distinct())
       
   }
   if(device_type == "fitbit")
diff --git a/src/features/fitbit_sleep_intraday/episodes/sleep_intraday_episodes.py b/src/features/fitbit_sleep_intraday/episodes/sleep_intraday_episodes.py
new file mode 100644
index 00000000..cdca5c1d
--- /dev/null
+++ b/src/features/fitbit_sleep_intraday/episodes/sleep_intraday_episodes.py
@@ -0,0 +1,37 @@
+import pandas as pd
+import numpy as np
+
+
+def mergeSleepEpisodes(sleep_data, cols_for_groupby):
+    sleep_episodes = pd.DataFrame(columns=["type_episode_id", "level_episode_id", "level", "unified_level", "is_main_sleep", "type", "timestamp", "duration"])
+    if not sleep_data.empty:
+        sleep_data = sleep_data.groupby(by=cols_for_groupby)
+        sleep_episodes = sleep_data[["timestamp"]].first()
+        sleep_episodes["duration"] = sleep_data["duration"].sum()
+    
+    return sleep_episodes
+
+
+sleep_intraday = pd.read_csv(snakemake.input["sleep_intraday"])
+
+# discard useless columns
+for col in ["device_id", "local_timezone", "local_date_time", "local_date", "local_time", "local_hour", "local_minute", "assigned_segments"]:
+    del sleep_intraday[col]
+
+# Extract "unified_level" based on "level" field
+# For "classic" type, "unified_level" is one of {0, 1} where 0: awake {"awake" + "restless"}, 1: asleep {"asleep"}
+# For "stages" type, "unified_level" is one of {0, 1} where 0: awake {"wake"}, 1: asleep {"deep" + "light" + "rem"}
+sleep_intraday["unified_level"] = np.where(sleep_intraday["level"].isin(["awake", "restless", "wake"]), 0, 1)
+
+# Put consecutive rows with the same "level" field together and merge episodes
+sleep_intraday.insert(2, "level_episode_id", (sleep_intraday[["type_episode_id", "level"]] != sleep_intraday[["type_episode_id", "level"]].shift()).any(axis=1).cumsum())
+sleep_intraday_episodes = mergeSleepEpisodes(sleep_intraday, ["type_episode_id", "level_episode_id", "level", "unified_level", "is_main_sleep", "type"])
+
+
+# Generate "start_timestamp" and "end_timestamp"
+sleep_intraday_episodes["end_timestamp"] = sleep_intraday_episodes["timestamp"] + ((sleep_intraday_episodes["duration"] - 1) * 1000) + 999
+sleep_intraday_episodes.rename(columns={"timestamp": "start_timestamp"}, inplace=True)
+
+del sleep_intraday_episodes["duration"]
+
+sleep_intraday_episodes.to_csv(snakemake.output[0], index=True)