Add sleep intraday to fitbitjson_mysql

2021-03-10 19:38:09 -05:00 · 2021-03-10 19:38:09 -05:00 · 1b0ee4bbf0
parent a420f5ef92
commit 1b0ee4bbf0
10 changed files with 236 additions and 283 deletions
--- a/7
+++ b/7
@ -279,13 +279,6 @@ for provider in config["FITBIT_STEPS_INTRADAY"]["PROVIDERS"].keys():
        files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
        files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")

-# for provider in config["FITBIT_CALORIES"]["PROVIDERS"].keys():
-#     if config["FITBIT_CALORIES"]["PROVIDERS"][provider]["COMPUTE"]:
-#         files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_raw.csv", pid=config["PIDS"], fitbit_data_type=(["json"] if config["FITBIT_CALORIES"]["TABLE_FORMAT"] == "JSON" else ["summary", "intraday"])))
-#         files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
-#         files_to_compute.extend(expand("data/raw/{pid}/fitbit_calories_{fitbit_data_type}_parsed_with_datetime.csv", pid=config["PIDS"], fitbit_data_type=["summary", "intraday"]))
-#         files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))
-#         files_to_compute.append("data/processed/features/all_participants/all_sensor_features.csv")

 for provider in config["EMPATICA_ACCELEROMETER"]["PROVIDERS"].keys():
    if config["EMPATICA_ACCELEROMETER"]["PROVIDERS"][provider]["COMPUTE"]:
--- a/config.yaml
+++ b/config.yaml
@ -448,19 +448,6 @@ FITBIT_STEPS_INTRADAY:
      SRC_FOLDER: "rapids" # inside src/features/fitbit_steps_intraday
      SRC_LANGUAGE: "python"

-# FITBIT_CALORIES:
-#   TABLE_FORMAT: JSON # JSON or CSV. If your JSON or CSV data are files change [DEVICE_DATA][FITBIT][SOURCE][TYPE] to FILES
-#   TABLE:
-#     JSON: fitbit_calories
-#     CSV:
-#       SUMMARY: calories_summary
-#       INTRADAY: calories_intraday
-#   PROVIDERS:
-#     RAPIDS:
-#       COMPUTE: False
-#       FEATURES: []
-
-
 ########################################################################################################################
 #                                                 EMPATICA                                                             #
 ########################################################################################################################
--- a/docs/datastreams/fitbitjson-mysql.md
+++ b/docs/datastreams/fitbitjson-mysql.md
@ -146,6 +146,49 @@ If you want RAPIDS to process Fitbit sensor data using this stream, you will nee
                |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep":[{"dateOfSleep":"2020-10-11","duration":41640000,"efficiency":89,"endTime":"2020-10-11T11:47:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-11T00:12:30.000","level":"wake","seconds":450},{"dateTime":"2020-10-11T00:20:00.000","level":"light","seconds":870},{"dateTime":"2020-10-11T00:34:30.000","level":"wake","seconds":780},...], "summary":{"deep":{"count":4,"minutes":52,"thirtyDayAvgMinutes":62},"light":{"count":32,"minutes":442,"thirtyDayAvgMinutes":364},"rem":{"count":6,"minutes":68,"thirtyDayAvgMinutes":58},"wake":{"count":29,"minutes":132,"thirtyDayAvgMinutes":94}}},"logId":26589710670,"minutesAfterWakeup":1,"minutesAsleep":562,"minutesAwake":132,"minutesToFallAsleep":0,"startTime":"2020-10-11T00:12:30.000","timeInBed":694,"type":"stages"}],"summary":{"stages":{"deep":52,"light":442,"rem":68,"wake":132},"totalMinutesAsleep":562,"totalSleepRecords":1,"totalTimeInBed":694}}
                |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep":[{"dateOfSleep":"2020-10-12","duration":28980000,"efficiency":93,"endTime":"2020-10-12T09:34:30.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-12T01:31:00.000","level":"wake","seconds":600},{"dateTime":"2020-10-12T01:41:00.000","level":"light","seconds":60},{"dateTime":"2020-10-12T01:42:00.000","level":"deep","seconds":2340},...], "summary":{"deep":{"count":4,"minutes":63,"thirtyDayAvgMinutes":59},"light":{"count":27,"minutes":257,"thirtyDayAvgMinutes":364},"rem":{"count":5,"minutes":94,"thirtyDayAvgMinutes":58},"wake":{"count":24,"minutes":69,"thirtyDayAvgMinutes":95}}},"logId":26589710673,"minutesAfterWakeup":0,"minutesAsleep":415,"minutesAwake":68,"minutesToFallAsleep":0,"startTime":"2020-10-12T01:31:00.000","timeInBed":483,"type":"stages"}],"summary":{"stages":{"deep":63,"light":257,"rem":94,"wake":69},"totalMinutesAsleep":415,"totalSleepRecords":1,"totalTimeInBed":483}}

+??? info "FITBIT_SLEEP_INTRADAY"
+
+    **RAPIDS_COLUMN_MAPPINGS**
+
+    | RAPIDS column   | Stream column   |
+    |-----------------|-----------------|
+    | TIMESTAMP | FLAG_TO_MUTATE |
+    | LOCAL_DATE_TIME       | FLAG_TO_MUTATE |
+    | DEVICE_ID | device_id |
+    | TYPE_EPISODE_ID | FLAG_TO_MUTATE |
+    | DURATION | FLAG_TO_MUTATE |
+    | IS_MAIN_SLEEP | FLAG_TO_MUTATE |
+    | TYPE | FLAG_TO_MUTATE |
+    | LEVEL | FLAG_TO_MUTATE |
+
+    **MUTATION**
+
+    - **COLUMN_MAPPINGS**
+
+        | Script column   | Stream column   |
+        |-----------------|-----------------|
+        | JSON_FITBIT_COLUMN      | fitbit_data      |
+    
+    - **SCRIPTS**
+    
+        ```bash
+        src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py
+        ```
+
+        !!! note
+
+            Fitbit API has two versions for sleep data, v1 and v1.2, we support both.
+            
+            All columns except `DEVICE_ID` are parsed from `JSON_FITBIT_COLUMN`. `JSON_FITBIT_COLUMN` is a string column containing the JSON objects returned by Fitbit's API. See an example of the raw data RAPIDS expects for this data stream:
+
+            ??? example "Example of the expected raw data"
+
+                |device_id                                |fitbit_data                                               |
+                |---------------------------------------- |--------------------------------------------------------- |
+                |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep":[{"dateOfSleep":"2020-10-10","duration":3600000,"efficiency":92,"endTime":"2020-10-10T16:37:00.000","infoCode":2,"isMainSleep":false,"levels":{"data":[{"dateTime":"2020-10-10T15:36:30.000","level":"restless","seconds":60},{"dateTime":"2020-10-10T15:37:30.000","level":"asleep","seconds":660},{"dateTime":"2020-10-10T15:48:30.000","level":"restless","seconds":60},...], "summary":{"asleep":{"count":0,"minutes":56},"awake":{"count":0,"minutes":0},"restless":{"count":3,"minutes":4}}},"logId":26315914306,"minutesAfterWakeup":0,"minutesAsleep":55,"minutesAwake":5,"minutesToFallAsleep":0,"startTime":"2020-10-10T15:36:30.000","timeInBed":60,"type":"classic"},{"dateOfSleep":"2020-10-10","duration":22980000,"efficiency":88,"endTime":"2020-10-10T08:10:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-10T01:46:30.000","level":"light","seconds":420},{"dateTime":"2020-10-10T01:53:30.000","level":"deep","seconds":1230},{"dateTime":"2020-10-10T02:14:00.000","level":"light","seconds":360},...], "summary":{"deep":{"count":3,"minutes":92,"thirtyDayAvgMinutes":0},"light":{"count":29,"minutes":193,"thirtyDayAvgMinutes":0},"rem":{"count":4,"minutes":33,"thirtyDayAvgMinutes":0},"wake":{"count":28,"minutes":65,"thirtyDayAvgMinutes":0}}},"logId":26311786557,"minutesAfterWakeup":0,"minutesAsleep":318,"minutesAwake":65,"minutesToFallAsleep":0,"startTime":"2020-10-10T01:46:30.000","timeInBed":383,"type":"stages"}],"summary":{"stages":{"deep":92,"light":193,"rem":33,"wake":65},"totalMinutesAsleep":373,"totalSleepRecords":2,"totalTimeInBed":443}}
+                |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep":[{"dateOfSleep":"2020-10-11","duration":41640000,"efficiency":89,"endTime":"2020-10-11T11:47:00.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-11T00:12:30.000","level":"wake","seconds":450},{"dateTime":"2020-10-11T00:20:00.000","level":"light","seconds":870},{"dateTime":"2020-10-11T00:34:30.000","level":"wake","seconds":780},...], "summary":{"deep":{"count":4,"minutes":52,"thirtyDayAvgMinutes":62},"light":{"count":32,"minutes":442,"thirtyDayAvgMinutes":364},"rem":{"count":6,"minutes":68,"thirtyDayAvgMinutes":58},"wake":{"count":29,"minutes":132,"thirtyDayAvgMinutes":94}}},"logId":26589710670,"minutesAfterWakeup":1,"minutesAsleep":562,"minutesAwake":132,"minutesToFallAsleep":0,"startTime":"2020-10-11T00:12:30.000","timeInBed":694,"type":"stages"}],"summary":{"stages":{"deep":52,"light":442,"rem":68,"wake":132},"totalMinutesAsleep":562,"totalSleepRecords":1,"totalTimeInBed":694}}
+                |a748ee1a-1d0b-4ae9-9074-279a2b6ba524     |{"sleep":[{"dateOfSleep":"2020-10-12","duration":28980000,"efficiency":93,"endTime":"2020-10-12T09:34:30.000","infoCode":0,"isMainSleep":true,"levels":{"data":[{"dateTime":"2020-10-12T01:31:00.000","level":"wake","seconds":600},{"dateTime":"2020-10-12T01:41:00.000","level":"light","seconds":60},{"dateTime":"2020-10-12T01:42:00.000","level":"deep","seconds":2340},...], "summary":{"deep":{"count":4,"minutes":63,"thirtyDayAvgMinutes":59},"light":{"count":27,"minutes":257,"thirtyDayAvgMinutes":364},"rem":{"count":5,"minutes":94,"thirtyDayAvgMinutes":58},"wake":{"count":24,"minutes":69,"thirtyDayAvgMinutes":95}}},"logId":26589710673,"minutesAfterWakeup":0,"minutesAsleep":415,"minutesAwake":68,"minutesToFallAsleep":0,"startTime":"2020-10-12T01:31:00.000","timeInBed":483,"type":"stages"}],"summary":{"stages":{"deep":63,"light":257,"rem":94,"wake":69},"totalMinutesAsleep":415,"totalSleepRecords":1,"totalTimeInBed":483}}
+
 ??? info "FITBIT_STEPS_SUMMARY"

    **RAPIDS_COLUMN_MAPPINGS**
--- a/docs/datastreams/mandatory-fitbit-format.md
+++ b/docs/datastreams/mandatory-fitbit-format.md
@ -6,7 +6,7 @@ This is a description of the format RAPIDS needs to process data for the followi

    | RAPIDS column   | Description   |
    |-----------------|-----------------|
-    | TIMESTAMP       |  An UNIX timestamp (13 digits) when a row of data was logged |
+    | TIMESTAMP       |  An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) |
    | LOCAL_DATE_TIME       |  Date time string with format `yyyy-mm-dd hh:mm:ss` |
    | DEVICE_ID       |  A string that uniquely identifies a device |
    | HEARTRATE_DAILY_RESTINGHR |  Daily resting heartrate |
@ -19,7 +19,7 @@ This is a description of the format RAPIDS needs to process data for the followi

    | RAPIDS column   | Description   |
    |-----------------|-----------------|
-    | TIMESTAMP       |  An UNIX timestamp (13 digits) when a row of data was logged |
+    | TIMESTAMP       |  An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) |
    | LOCAL_DATE_TIME       |  Date time string with format `yyyy-mm-dd hh:mm:ss` |
    | DEVICE_ID       |  A string that uniquely identifies a device |
    | HEARTRATE |  Intraday heartrate |
@ -29,7 +29,7 @@ This is a description of the format RAPIDS needs to process data for the followi

    | RAPIDS column   | Description   |
    |-----------------|-----------------|
-    | TIMESTAMP       |  An UNIX timestamp (13 digits) when a row of data was logged |
+    | TIMESTAMP       |  An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) |
    | LOCAL_DATE_TIME       |  Date time string with format `yyyy-mm-dd hh:mm:ss`, this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day|
    | LOCAL_START_DATE_TIME       |  Date time string with format `yyyy-mm-dd hh:mm:ss` representing the start of a daily sleep episode |
    | LOCAL_END_DATE_TIME       |  Date time string with format `yyyy-mm-dd hh:mm:ss`  representing the end of a daily sleep episode|
@ -43,11 +43,24 @@ This is a description of the format RAPIDS needs to process data for the followi
    | IS_MAIN_SLEEP | 0 if this episode is a nap, or 1 if it is a main sleep episode|
    | TYPE | stages or classic [sleep data](https://dev.fitbit.com/build/reference/web-api/sleep/)|

+??? info "FITBIT_SLEEP_INTRADAY"
+
+    | RAPIDS column   | Description   |
+    |-----------------|-----------------|
+    | TIMESTAMP       |  An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS)|
+    | LOCAL_DATE_TIME       |  Date time string with format `yyyy-mm-dd hh:mm:ss`, this either is a copy of LOCAL_START_DATE_TIME or LOCAL_END_DATE_TIME depending on which column is used to assign an episode to a specific day|
+    | DEVICE_ID       |  A string that uniquely identifies a device |
+    | TYPE_EPISODE_ID | An id for each unique main or nap episode. Main and nap episodes have different levels, each row in this table is one of such levels, so multiple rows can have the same TYPE_EPISODE_ID|
+    | DURATION | Duration of the episode level in minutes|
+    | IS_MAIN_SLEEP | 0 if this episode level belongs to a nap, or 1 if it belongs to a main sleep episode|
+    | TYPE | type of level: stages or classic [sleep data](https://dev.fitbit.com/build/reference/web-api/sleep/)|
+    | LEVEL | For stages levels one of `wake`, `deep`, `light`, or `rem`. For classic levels one of `awake`, `restless`, and `asleep`|
+
 ??? info "FITBIT_STEPS_SUMMARY"

    | RAPIDS column   | Description   |
    |-----------------|-----------------|
-    | TIMESTAMP       |  An UNIX timestamp (13 digits) when a row of data was logged |
+    | TIMESTAMP       |  An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) |
    | LOCAL_DATE_TIME       |  Date time string with format `yyyy-mm-dd hh:mm:ss` |
    | DEVICE_ID       |  A string that uniquely identifies a device |
    | STEPS |  Daily step count |
@ -56,7 +69,7 @@ This is a description of the format RAPIDS needs to process data for the followi

    | RAPIDS column   | Description   |
    |-----------------|-----------------|
-    | TIMESTAMP       |  An UNIX timestamp (13 digits) when a row of data was logged |
+    | TIMESTAMP       |  An UNIX timestamp (13 digits) when a row of data was logged (automatically created by RAPIDS) |
    | LOCAL_DATE_TIME       |  Date time string with format `yyyy-mm-dd hh:mm:ss` |
    | DEVICE_ID       |  A string that uniquely identifies a device |
    | STEPS |  Intraday step count (usually every minute)|
--- a/src/data/fitbit_parse_sleep.py
+++ b/src/data/fitbit_parse_sleep.py
@ -1,251 +0,0 @@
-import json, yaml
-import pandas as pd
-import numpy as np
-from datetime import datetime, timedelta
-import dateutil.parser
-
-SLEEP_CODE2LEVEL = ["asleep", "restless", "awake"]
-
-
-SLEEP_SUMMARY_COLUMNS_V1_2 = ("device_id", "efficiency",
-                                "minutes_after_wakeup", "minutes_asleep", "minutes_awake", "minutes_to_fall_asleep", "minutes_in_bed",
-                                "is_main_sleep", "type",
-                                "local_start_date_time", "local_end_date_time",
-                                "timestamp")
-SLEEP_SUMMARY_COLUMNS_V1 = SLEEP_SUMMARY_COLUMNS_V1_2 + ("count_awake", "duration_awake", "count_awakenings", "count_restless", "duration_restless")
-
-SLEEP_INTRADAY_COLUMNS = (# Extract "type_episode_id" field based on summary data: start from 0
-                            "type_episode_id",
-                            "duration",
-                            # For "classic" type, original_level is one of {"awake", "restless", "asleep"}
-                            # For "stages" type, original_level is one of {"wake", "deep", "light", "rem"}
-                            "level",
-                            # For "classic" type, unified_level is one of {0, 1} where 0: awake {"awake" + "restless"}, 1: asleep {"asleep"}
-                            # For "stages" type, unified_level is one of {0, 1} where 0: awake {"wake"}, 1: asleep {"deep" + "light" + "rem"}
-                            "unified_level",
-                            # One of {0, 1} where 0: nap, 1: main sleep
-                            "is_main_sleep", 
-                            # One of {"classic", "stages"}
-                            "type",
-                            "local_date_time",
-                            "start_timestamp",
-                            "end_timestamp")
-
-
-def mergeLongAndShortData(data_intraday):
-    long_data = pd.DataFrame(columns=["dateTime", "level"])
-    short_data = pd.DataFrame(columns=["dateTime", "level"])
-
-    window_length = 30
-
-    for data in data_intraday["data"]:
-        counter = 0
-        for times in range(data["seconds"] // window_length):
-            row = {"dateTime": dateutil.parser.parse(data["dateTime"])+timedelta(seconds=counter*window_length), "level": data["level"]}
-            long_data = long_data.append(row, ignore_index = True)
-            counter = counter + 1
-
-    for data in data_intraday["shortData"]:
-        counter = 0
-        for times in range(data["seconds"] // window_length):
-            row = {"dateTime": dateutil.parser.parse(data["dateTime"])+timedelta(seconds=counter*window_length), "level": data["level"]}
-            short_data = short_data.append(row, ignore_index = True)
-            counter = counter + 1
-    long_data.set_index("dateTime",inplace=True)
-    short_data.set_index("dateTime",inplace=True)
-    long_data["level"] = np.where(long_data.index.isin(short_data.index) == True, "wake", long_data["level"])
-    
-    long_data.reset_index(inplace=True)
-    
-    return long_data.values.tolist()
-
-# Parse one record for sleep API version 1
-def parseOneRecordForV1(record, device_id, type_episode_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
-
-    sleep_record_type = "classic"
-
-    d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
-    d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
-
-    # Summary data
-    if fitbit_data_type == "summary":
-        row_summary = (device_id, record["efficiency"],
-                        record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
-                        d_is_main_sleep, sleep_record_type,
-                        d_start_datetime, d_end_datetime,
-                        0,
-                        record["awakeCount"], record["awakeDuration"], record["awakeningsCount"],
-                        record["restlessCount"], record["restlessDuration"])
-        
-        records_summary.append(row_summary)
-
-    # Intraday data
-    if fitbit_data_type == "intraday":
-        start_date = d_start_datetime.date()
-        end_date = d_end_datetime.date()
-        is_before_midnight = True
-        curr_date = start_date
-        for data in record["minuteData"]:
-            # For overnight episodes, use end_date once we are over midnight
-            d_time = datetime.strptime(data["dateTime"], '%H:%M:%S').time()
-            if is_before_midnight and d_time.hour == 0:
-                curr_date = end_date
-            d_datetime = datetime.combine(curr_date, d_time)
-
-            # API 1.2 stores original_level as strings, so we convert original_levels of API 1 to strings too
-            # (1: "asleep", 2: "restless", 3: "awake")
-            d_original_level = SLEEP_CODE2LEVEL[int(data["value"])-1]
-
-
-            row_intraday = (type_episode_id, 60,
-                            d_original_level, -1, d_is_main_sleep, sleep_record_type,
-                            d_datetime, 0, 0)
-
-            records_intraday.append(row_intraday)
-
-    return records_summary, records_intraday
-
-# Parse one record for sleep API version 1.2
-def parseOneRecordForV12(record, device_id, type_episode_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type):
-    
-    sleep_record_type = record['type']
-
-    d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
-    d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
-
-    # Summary data
-    if fitbit_data_type == "summary":
-        row_summary = (device_id, record["efficiency"],
-                        record["minutesAfterWakeup"], record["minutesAsleep"], record["minutesAwake"], record["minutesToFallAsleep"], record["timeInBed"],
-                        d_is_main_sleep, sleep_record_type,
-                        d_start_datetime, d_end_datetime,
-                        0)
-        
-        records_summary.append(row_summary)
-    
-    # Intraday data
-    if fitbit_data_type == "intraday":
-        if sleep_record_type == "classic":
-            for data in record["levels"]["data"]:
-                d_datetime = dateutil.parser.parse(data["dateTime"])
-
-                row_intraday = (type_episode_id, data["seconds"],
-                    data["level"], -1, d_is_main_sleep, sleep_record_type,
-                    d_datetime, 0, 0)
-                records_intraday.append(row_intraday)
-        else:
-            # For sleep type "stages"
-            for data in mergeLongAndShortData(record["levels"]):
-                row_intraday = (type_episode_id, 30,
-                    data[1], -1, d_is_main_sleep, sleep_record_type,
-                    data[0], 0, 0)
-
-                records_intraday.append(row_intraday)
-    
-    return records_summary, records_intraday
-
-def parseSleepData(sleep_data, fitbit_data_type):
-    SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
-    if sleep_data.empty:
-        if fitbit_data_type == "summary":
-            return pd.DataFrame(columns=SLEEP_SUMMARY_COLUMNS)
-        elif fitbit_data_type == "intraday":
-            return pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS)
-    device_id = sleep_data["device_id"].iloc[0]
-    records_summary, records_intraday = [], []
-    type_episode_id = 0
-    # Parse JSON into individual records
-    for multi_record in sleep_data.fitbit_data:
-        for record in json.loads(multi_record)["sleep"]:
-            # Whether the sleep episode is nap (0) or main sleep (1)
-            d_is_main_sleep = 1 if record["isMainSleep"] else 0
-
-            # For sleep API version 1
-            if "awakeCount" in record:
-                SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1
-                records_summary, records_intraday = parseOneRecordForV1(record, device_id, type_episode_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
-            # For sleep API version 1.2
-            else:
-                SLEEP_SUMMARY_COLUMNS = SLEEP_SUMMARY_COLUMNS_V1_2
-                records_summary, records_intraday = parseOneRecordForV12(record, device_id, type_episode_id, d_is_main_sleep, records_summary, records_intraday, fitbit_data_type)
-            
-            type_episode_id = type_episode_id + 1
-
-    if fitbit_data_type == "summary":
-        parsed_data = pd.DataFrame(data=records_summary, columns=SLEEP_SUMMARY_COLUMNS)
-    elif fitbit_data_type == "intraday":
-        parsed_data = pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
-
-    return parsed_data
-
-def mergeSleepEpisodes(sleep_data, cols_for_groupby):
-    sleep_episodes = pd.DataFrame(columns=["type_episode_id", "level_episode_id", "level", "unified_level", "is_main_sleep", "type", "start_timestamp", "end_timestamp"])
-    if not sleep_data.empty:
-        sleep_data = sleep_data.groupby(by=cols_for_groupby)
-        sleep_episodes = sleep_data[["start_timestamp"]].first()
-        sleep_episodes["end_timestamp"] = sleep_data["end_timestamp"].last()
-    
-        sleep_episodes.reset_index(inplace=True, drop=False)
-
-    return sleep_episodes
-
-
-
-timezone = snakemake.params["timezone"]
-column_format = snakemake.params["column_format"]
-fitbit_data_type = snakemake.params["fitbit_data_type"]
-sleep_episode_timestamp = snakemake.params["sleep_episode_timestamp"]
-
-with open(snakemake.input["participant_file"], "r", encoding="utf-8") as f:
-    participant_file = yaml.safe_load(f)
-local_start_date = pd.Timestamp(participant_file["FITBIT"]["START_DATE"])
-local_end_date = pd.Timestamp(participant_file["FITBIT"]["END_DATE"]) + pd.DateOffset(1)
-
-if column_format == "JSON":
-    json_raw = pd.read_csv(snakemake.input["raw_data"])
-    parsed_data = parseSleepData(json_raw, fitbit_data_type)
-elif column_format == "PLAIN_TEXT":
-    if fitbit_data_type == "summary":
-        parsed_data = pd.read_csv(snakemake.input["raw_data"], parse_dates=["local_start_date_time", "local_end_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
-    elif fitbit_data_type == "intraday":
-        parsed_data = pd.read_csv(snakemake.input["raw_data"], parse_dates=["local_date_time"], date_parser=lambda col: pd.to_datetime(col).tz_localize(None))
-else:
-    raise ValueError("column_format can only be one of ['JSON', 'PLAIN_TEXT'].")
-
-# Drop duplicates
-parsed_data.drop_duplicates(inplace=True)
-
-if parsed_data.shape[0] > 0 and fitbit_data_type == "summary":
-    if sleep_episode_timestamp != "start" and sleep_episode_timestamp != "end":
-        raise ValueError("SLEEP_EPISODE_TIMESTAMP can only be one of ['start', 'end'].")
-    # Column name to be considered as the event datetime
-    datetime_column = "local_" + sleep_episode_timestamp + "_date_time"
-    
-    if not pd.isnull(local_start_date) and not pd.isnull(local_end_date):
-        parsed_data = parsed_data.loc[(parsed_data[datetime_column] >= local_start_date) & (parsed_data[datetime_column] < local_end_date)]
-    
-    # Sort by "local_start_date_time" column
-    parsed_data.sort_values(by="local_start_date_time", ascending=True, inplace=True)
-
-    parsed_data["timestamp"] = parsed_data[datetime_column].dt.tz_localize(timezone, ambiguous=False, nonexistent="NaT").dropna().astype(np.int64) // 10**6
-    parsed_data.dropna(subset=['timestamp'], inplace=True)
-    parsed_data.drop(["local_start_date_time", "local_end_date_time"], axis = 1, inplace=True)
-
-if parsed_data.shape[0] > 0 and fitbit_data_type == "intraday":
-    if not pd.isnull(local_start_date) and not pd.isnull(local_end_date):
-        parsed_data = parsed_data.loc[(parsed_data["local_date_time"] >= local_start_date) & (parsed_data["local_date_time"] < local_end_date)]
-    
-    # Sort by "local_date_time" column
-    parsed_data.sort_values(by="local_date_time", ascending=True, inplace=True)
-
-    parsed_data["start_timestamp"] = parsed_data["local_date_time"].dt.tz_localize(timezone, ambiguous=False, nonexistent="NaT").dropna().astype(np.int64) // 10**6
-    parsed_data.dropna(subset=['start_timestamp'], inplace=True)
-    parsed_data["end_timestamp"] = parsed_data["start_timestamp"] + ((parsed_data["duration"] - 1) * 1000) + 999
-    parsed_data["unified_level"] = np.where(parsed_data["level"].isin(["awake", "restless", "wake"]), 0, 1)
-    
-    # Put consecutive rows with the same "level" field together and merge episodes
-    parsed_data.insert(2, "level_episode_id", (parsed_data[["type_episode_id", "level"]] != parsed_data[["type_episode_id", "level"]].shift()).any(axis=1).cumsum())
-    parsed_data = mergeSleepEpisodes(parsed_data, ["type_episode_id", "level_episode_id", "level", "unified_level", "is_main_sleep", "type"])
-
-
-parsed_data.to_csv(snakemake.output[0], index=False)
--- a/src/data/streams/fitbitjson_mysql/format.yaml
+++ b/src/data/streams/fitbitjson_mysql/format.yaml
@ -10,7 +10,7 @@ FITBIT_HEARTRATE_SUMMARY:
    HEARTRATE_DAILY_CALORIESPEAK: FLAG_TO_MUTATE
  MUTATION:
    COLUMN_MAPPINGS:
-      JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects
+      JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/parse_heartrate_summary_json.py

@ -23,7 +23,7 @@ FITBIT_HEARTRATE_INTRADAY:
    HEARTRATE_ZONE: FLAG_TO_MUTATE
  MUTATION:
    COLUMN_MAPPINGS:
-      JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects
+      JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/parse_heartrate_intraday_json.py

@ -44,10 +44,26 @@ FITBIT_SLEEP_SUMMARY:
    TYPE: FLAG_TO_MUTATE
  MUTATION:
    COLUMN_MAPPINGS:
-      JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects
+      JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/parse_sleep_summary_json.py

+FITBIT_SLEEP_INTRADAY:
+  RAPIDS_COLUMN_MAPPINGS:
+    TIMESTAMP: FLAG_TO_MUTATE
+    DEVICE_ID: device_id
+    LOCAL_DATE_TIME: FLAG_TO_MUTATE
+    TYPE_EPISODE_ID: FLAG_TO_MUTATE
+    DURATION: FLAG_TO_MUTATE
+    IS_MAIN_SLEEP: FLAG_TO_MUTATE
+    TYPE: FLAG_TO_MUTATE
+    LEVEL: FLAG_TO_MUTATE
+  MUTATION:
+    COLUMN_MAPPINGS:
+      JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
+    SCRIPTS: # List any python or r scripts that mutate your raw data
+      - src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py
+
 FITBIT_STEPS_SUMMARY:
  RAPIDS_COLUMN_MAPPINGS:
    TIMESTAMP: FLAG_TO_MUTATE
@ -56,7 +72,7 @@ FITBIT_STEPS_SUMMARY:
    STEPS: FLAG_TO_MUTATE
  MUTATION:
    COLUMN_MAPPINGS:
-      JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects
+      JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/parse_steps_summary_json.py

@ -68,6 +84,6 @@ FITBIT_STEPS_INTRADAY:
    STEPS: FLAG_TO_MUTATE
  MUTATION:
    COLUMN_MAPPINGS:
-      JSON_FITBIT_COLUMN: fitbit_data # text column with JSON objects
+      JSON_FITBIT_COLUMN: fitbit_data # string columnwith JSON objects
    SCRIPTS: # List any python or r scripts that mutate your raw data
      - src/data/streams/mutations/fitbit/parse_steps_intraday_json.py
--- a/src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py
+++ b/src/data/streams/mutations/fitbit/parse_sleep_intraday_json.py
@ -0,0 +1,142 @@
+import json
+import pandas as pd
+import numpy as np
+from datetime import datetime, timedelta
+import dateutil.parser
+
+SLEEP_CODE2LEVEL = ["asleep", "restless", "awake"]
+
+SLEEP_INTRADAY_COLUMNS = ("device_id",
+                            "type_episode_id",
+                            "duration",
+                            # For "classic" type, original_level is one of {"awake", "restless", "asleep"}
+                            # For "stages" type, original_level is one of {"wake", "deep", "light", "rem"}
+                            "level",
+                            # one of {0, 1} where 0: nap, 1: main sleep
+                            "is_main_sleep", 
+                            # one of {"classic", "stages"}
+                            "type",
+                            "local_date_time",
+                            "timestamp")
+
+def mergeLongAndShortData(data_intraday):
+    long_data = pd.DataFrame(columns=["dateTime", "level"])
+    short_data = pd.DataFrame(columns=["dateTime", "level"])
+
+    window_length = 30
+
+    for data in data_intraday["data"]:
+        counter = 0
+        for times in range(data["seconds"] // window_length):
+            row = {"dateTime": dateutil.parser.parse(data["dateTime"])+timedelta(seconds=counter*window_length), "level": data["level"]}
+            long_data = long_data.append(row, ignore_index = True)
+            counter = counter + 1
+
+    for data in data_intraday["shortData"]:
+        counter = 0
+        for times in range(data["seconds"] // window_length):
+            row = {"dateTime": dateutil.parser.parse(data["dateTime"])+timedelta(seconds=counter*window_length), "level": data["level"]}
+            short_data = short_data.append(row, ignore_index = True)
+            counter = counter + 1
+    long_data.set_index("dateTime",inplace=True)
+    short_data.set_index("dateTime",inplace=True)
+    long_data["level"] = np.where(long_data.index.isin(short_data.index) == True, "wake", long_data["level"])
+    
+    long_data.reset_index(inplace=True)
+    
+    return long_data.values.tolist()
+
+# Parse one record for sleep API version 1
+def parseOneRecordForV1(record, device_id, d_is_main_sleep, records_intraday, type_episode_id):
+
+    sleep_record_type = "classic"
+
+    d_start_datetime = datetime.strptime(record["startTime"][:18], "%Y-%m-%dT%H:%M:%S")
+    d_end_datetime = datetime.strptime(record["endTime"][:18], "%Y-%m-%dT%H:%M:%S")
+
+    # Intraday data
+    start_date = d_start_datetime.date()
+    end_date = d_end_datetime.date()
+    is_before_midnight = True
+    curr_date = start_date
+    for data in record["minuteData"]:
+        # For overnight episodes, use end_date once we are over midnight
+        d_time = datetime.strptime(data["dateTime"], '%H:%M:%S').time()
+        if is_before_midnight and d_time.hour == 0:
+            curr_date = end_date
+        d_datetime = datetime.combine(curr_date, d_time)
+
+        # API 1.2 stores original_level as strings, so we convert original_levels of API 1 to strings too
+        # (1: "asleep", 2: "restless", 3: "awake")
+        d_original_level = SLEEP_CODE2LEVEL[int(data["value"])-1]
+
+
+        row_intraday = (device_id, type_episode_id, 60,
+                        d_original_level, d_is_main_sleep, sleep_record_type,
+                        d_datetime, 0)
+
+        records_intraday.append(row_intraday)
+
+    return records_intraday
+
+# Parse one record for sleep API version 1.2
+def parseOneRecordForV12(record, device_id, d_is_main_sleep, records_intraday, type_episode_id):
+    
+    sleep_record_type = record['type']
+
+    if sleep_record_type == "classic":
+        for data in record["levels"]["data"]:
+            d_datetime = dateutil.parser.parse(data["dateTime"])
+
+            row_intraday = (device_id, type_episode_id, data["seconds"],
+                data["level"], d_is_main_sleep, sleep_record_type,
+                d_datetime, 0)
+            records_intraday.append(row_intraday)
+    else:
+        # For sleep type "stages"
+        for data in mergeLongAndShortData(record["levels"]):
+            row_intraday = (device_id, type_episode_id, 30,
+                data[1], d_is_main_sleep, sleep_record_type,
+                data[0], 0)
+
+            records_intraday.append(row_intraday)
+    
+    return records_intraday
+    
+
+
+def parseSleepData(sleep_data):
+    if sleep_data.empty:
+        return pd.DataFrame(columns=SLEEP_INTRADAY_COLUMNS)
+    device_id = sleep_data["device_id"].iloc[0]
+    records_intraday = []
+    type_episode_id = 0
+    # Parse JSON into individual records
+    for multi_record in sleep_data.json_fitbit_column:
+        sleep_record = json.loads(multi_record)
+        if "sleep" in sleep_record:
+            for record in json.loads(multi_record)["sleep"]:
+                # Whether the sleep episode is nap (0) or main sleep (1)
+                d_is_main_sleep = 1 if record["isMainSleep"] else 0
+
+                # For sleep API version 1
+                if "awakeCount" in record:
+                    records_intraday = parseOneRecordForV1(record, device_id, d_is_main_sleep, records_intraday, type_episode_id)
+                # For sleep API version 1.2
+                else:
+                    records_intraday = parseOneRecordForV12(record, device_id, d_is_main_sleep, records_intraday, type_episode_id)
+                
+                type_episode_id = type_episode_id + 1
+
+    parsed_data = pd.DataFrame(data=records_intraday, columns=SLEEP_INTRADAY_COLUMNS)
+
+    return parsed_data
+
+
+
+def main(json_raw, stream_parameters):
+    parsed_data = parseSleepData(json_raw)
+    parsed_data["timestamp"] = 0 # this column is added at readable_datetime.R because we neeed to take into account multiple timezones
+    parsed_data['local_date_time'] = parsed_data['local_date_time'].dt.strftime('%Y-%m-%d %H:%M:%S')
+
+    return(parsed_data)
--- a/src/data/streams/pull_phone_data.R
+++ b/src/data/streams/pull_phone_data.R
@ -156,7 +156,7 @@ pull_phone_data <- function(){
    mutated_data <- mutate_data(mutation_scripts, renamed_data, data_configuration)

    if(!setequal(expected_columns, colnames(mutated_data)))
-      stop(paste0("The mutated data for ", device, " does not have the columns RAPIDS expects. The container script returned [", paste(colnames(mutated_data), collapse=","),"] but RAPIDS expected [",paste(expected_columns, collapse=","), "]. One ore more mutation scripts in [", sensor,"][MUTATION][SCRIPTS] are adding extra columns or removing or not adding the ones expected"))
+      stop(paste0("The mutated data for ", device, " does not have the columns RAPIDS expects. The mutation script returned [", paste(colnames(mutated_data), collapse=","),"] but RAPIDS expected [",paste(expected_columns, collapse=","), "]. One ore more mutation scripts in [", sensor,"][MUTATION][SCRIPTS] are adding extra columns or removing or not adding the ones expected"))
    participant_data <- rbind(participant_data, mutated_data)
      
  }
--- a/src/data/streams/pull_wearable_data.R
+++ b/src/data/streams/pull_wearable_data.R
@ -110,7 +110,7 @@ pull_wearable_data_main <- function(){
    mutated_data <- mutate_data(mutation_scripts, renamed_data, data_configuration)

    if(!setequal(expected_columns, colnames(mutated_data)))
-      stop(paste0("The mutated data for ", device, " does not have the columns RAPIDS expects. The container script returned [", paste(colnames(mutated_data), collapse=","),"] but RAPIDS expected [",paste(expected_columns, collapse=","), "]. One ore more mutation scripts in [", sensor,"][MUTATION][SCRIPTS] are adding extra columns or removing or not adding the ones expected"))
+      stop(paste0("The mutated data for ", device, " does not have the columns RAPIDS expects. The mutation script returned [", paste(colnames(mutated_data), collapse=","),"] but RAPIDS expected [",paste(expected_columns, collapse=","), "]. One ore more mutation scripts in [", sensor,"][MUTATION][SCRIPTS] are adding extra columns or removing or not adding the ones expected"))
    participant_data <- rbind(participant_data, mutated_data)
      
  }
--- a/src/data/streams/rapids_columns.yaml
+++ b/src/data/streams/rapids_columns.yaml
@ -159,6 +159,16 @@ FITBIT_SLEEP_SUMMARY:
  - IS_MAIN_SLEEP
  - TYPE

+FITBIT_SLEEP_INTRADAY:
+  - TIMESTAMP
+  - DEVICE_ID
+  - LOCAL_DATE_TIME
+  - TYPE_EPISODE_ID
+  - DURATION
+  - IS_MAIN_SLEEP
+  - TYPE
+  - LEVEL
+
 FITBIT_STEPS_SUMMARY:
  - TIMESTAMP
  - DEVICE_ID