Refactor location features from Doryab provider & add a new strategy to infer home location & fix bugs

feature/aware_sql_split
Meng Li 2021-04-09 12:05:25 -04:00
parent 32472461ec
commit 0d6f51be8b
53 changed files with 669 additions and 702 deletions

View File

@ -207,11 +207,12 @@ for provider in config["PHONE_LOCATIONS"]["PROVIDERS"].keys():
if provider == "BARNETT": if provider == "BARNETT":
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_barnett_daily.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_locations_barnett_daily.csv", pid=config["PIDS"]))
if provider == "DORYAB":
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/raw/{pid}/phone_locations_raw.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_processed_with_datetime_with_home.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=get_script_language(config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower())) files_to_compute.extend(expand("data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv", pid=config["PIDS"], language=get_script_language(config["PHONE_LOCATIONS"]["PROVIDERS"][provider]["SRC_SCRIPT"]), provider_key=provider.lower()))
files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/phone_locations.csv", pid=config["PIDS"]))
files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"])) files_to_compute.extend(expand("data/processed/features/{pid}/all_sensor_features.csv", pid=config["PIDS"]))

View File

@ -232,24 +232,20 @@ PHONE_LOCATIONS:
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
HOME_INFERENCE:
DBSCAN_EPS: 100 # meters
DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: False COMPUTE: False
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome", "homelabel"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 100 # meters DBSCAN_EPS: 100 # meters
DBSCAN_MINSAMPLES: 5 DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
MAXIMUM_ROW_GAP: 300 MAXIMUM_ROW_GAP: 300 # seconds
MAXIMUM_ROW_DURATION: 60
MINUTES_DATA_USED: False MINUTES_DATA_USED: False
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET, TIME_SEGMENT, TIME_SEGMENT_INSTANCE
INFER_HOME_LOCATION_STRATEGY: DORYAB_STRATEGY # DORYAB_STRATEGY, SUN_LI_VEGA_STRATEGY
MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3
CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS
RADIUS_FOR_HOME: 100 RADIUS_FOR_HOME: 100
SRC_SCRIPT: src/features/phone_locations/doryab/main.py SRC_SCRIPT: src/features/phone_locations/doryab/main.py

View File

@ -6,6 +6,8 @@
- Add the `EXCLUDE_SLEEP` module for steps intraday features - Add the `EXCLUDE_SLEEP` module for steps intraday features
- Fix bug when no phone data yield is needed to process location data - Fix bug when no phone data yield is needed to process location data
- Remove location rows with the same timestamp based on their accuracy - Remove location rows with the same timestamp based on their accuracy
- Refactor location features from Doryab provider
- Add a new strategy to infer home location
## v1.2.0 ## v1.2.0
- Sleep summary and intraday features are more consistent. - Sleep summary and intraday features are more consistent.
- Add wake and bedtime features for sleep summary data. - Add wake and bedtime features for sleep summary data.

View File

@ -55,10 +55,13 @@ If you computed locations features using the provider `[PHONE_LOCATIONS][BARNETT
## Doryab (locations) ## Doryab (locations)
If you computed locations features using the provider `[PHONE_LOCATIONS][DORYAB]` cite [this paper](https://arxiv.org/abs/1812.10394) and [this paper](https://doi.org/10.1145/2750858.2805845) in addition to RAPIDS. If you computed locations features using the provider `[PHONE_LOCATIONS][DORYAB]` cite [this paper](https://arxiv.org/abs/1812.10394) and [this paper](https://doi.org/10.1145/2750858.2805845) in addition to RAPIDS. In addition, if you used the `SUN_LI_VEGA_STRATEGY` strategy, cite [this paper](https://www.jmir.org/2020/9/e19992/) as well.
!!! cite "Doryab et al. citation" !!! cite "Doryab et al. citation"
Doryab, A., Chikarsel, P., Liu, X., & Dey, A. K. (2019). Extraction of Behavioral Features from Smartphone and Wearable Data. ArXiv:1812.10394 [Cs, Stat]. http://arxiv.org/abs/1812.10394 Doryab, A., Chikarsel, P., Liu, X., & Dey, A. K. (2019). Extraction of Behavioral Features from Smartphone and Wearable Data. ArXiv:1812.10394 [Cs, Stat]. http://arxiv.org/abs/1812.10394
!!! cite "Canzian et al. citation" !!! cite "Canzian et al. citation"
Luca Canzian and Mirco Musolesi. 2015. Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis. In Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp '15). Association for Computing Machinery, New York, NY, USA, 12931304. DOI:https://doi.org/10.1145/2750858.2805845 Luca Canzian and Mirco Musolesi. 2015. Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis. In Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp '15). Association for Computing Machinery, New York, NY, USA, 12931304. DOI:https://doi.org/10.1145/2750858.2805845
!!! cite "Sun et al. citation"
Sun S, Folarin AA, Ranjan Y, Rashid Z, Conde P, Stewart C, Cummins N, Matcham F, Dalla Costa G, Simblett S, Leocani L, Lamers F, Sørensen PS, Buron M, Zabalza A, Guerrero Pérez AI, Penninx BW, Siddi S, Haro JM, Myin-Germeys I, Rintala A, Wykes T, Narayan VA, Comi G, Hotopf M, Dobson RJ, RADAR-CNS Consortium. Using Smartphones and Wearable Devices to Monitor Behavioral Changes During COVID-19. J Med Internet Res 2020;22(9):e19992

View File

@ -160,6 +160,15 @@ Due to the difference in the format of the raw battery data for iOS and Android
that contains data for Android. All other files (i.e. for iPhone) that contains data for Android. All other files (i.e. for iPhone)
are empty data files. are empty data files.
## Locations
Description
- The participant's home location is (latitude=1, longitude=1).
- From Sat 10:56:00 to Sat 11:04:00, the center of the cluster is (latitude=-100, longitude=-100).
- From Sun 03:30:00 to Sun 03:47:00, the center of the cluster is (latitude=1, longitude=1). Home location is extracted from this period.
- From Sun 11:30:00 to Sun 11:38:00, the center of the cluster is (latitude=100, longitude=100).
## Application Foreground ## Application Foreground
- The raw application foreground data file contains data for 1 day. - The raw application foreground data file contains data for 1 day.

View File

@ -111,7 +111,7 @@ These features are based on the original implementation by [Doryab et al.](../..
- data/raw/{pid}/phone_locations_raw.csv - data/raw/{pid}/phone_locations_raw.csv
- data/interim/{pid}/phone_locations_processed.csv - data/interim/{pid}/phone_locations_processed.csv
- data/interim/{pid}/phone_locations_processed_with_datetime.csv - data/interim/{pid}/phone_locations_processed_with_datetime.csv
- data/interim/{pid}/phone_locations_processed_with_datetime_with_home.csv - data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns.csv
- data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv - data/interim/{pid}/phone_locations_features/phone_locations_{language}_{provider_key}.csv
- data/processed/features/{pid}/phone_locations.csv - data/processed/features/{pid}/phone_locations.csv
``` ```
@ -127,11 +127,11 @@ Parameters description for `[PHONE_LOCATIONS][PROVIDERS][DORYAB]`:
| `[DBSCAN_EPS]` | The maximum distance in meters between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function. | `[DBSCAN_EPS]` | The maximum distance in meters between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function.
| `[DBSCAN_MINSAMPLES]` | The number of samples (or total weight) in a neighborhood for a point to be considered as a core point of a cluster. This includes the point itself. | `[DBSCAN_MINSAMPLES]` | The number of samples (or total weight) in a neighborhood for a point to be considered as a core point of a cluster. This includes the point itself.
| `[THRESHOLD_STATIC]` | It is the threshold value in km/hr which labels a row as Static or Moving. | `[THRESHOLD_STATIC]` | It is the threshold value in km/hr which labels a row as Static or Moving.
| `[MAXIMUM_ROW_GAP]` | The maximum gap (in seconds) allowed between any two consecutive rows for them to be considered part of the same displacement. If this threshold is too high, it can throw speed and distance calculations off for periods when the phone was not sensing. | `[MAXIMUM_ROW_GAP]` | The maximum gap (in seconds) allowed between any two consecutive rows for them to be considered part of the same displacement. If this threshold is too high, it can throw speed and distance calculations off for periods when the phone was not sensing. This value must be larger than your GPS sampling interval when `[LOCATIONS_TO_USE]` is `ALL` or `GPS`, otherwise all the stationary-related features will be NA. If `[LOCATIONS_TO_USE]` is `ALL_RESAMPLED` or `FUSED_RESAMPLED`, you can use the default value as every row will be resampled at 1-minute intervals.
| `[MAXIMUM_ROW_DURATION]` | The time difference between any two consecutive rows `A` and `B` is considered as the time a participant spent in `A`. If this difference is bigger than MAXIMUM_ROW_GAP we substitute it with `MAXIMUM_ROW_DURATION`.
| `[MINUTES_DATA_USED]` | Set to `True` to include an extra column in the final location feature file containing the number of minutes used to compute the features on each time segment. Use this for quality control purposes; the more data minutes exist for a period, the more reliable its features should be. For fused location, a single minute can contain more than one coordinate pair if the participant is moving fast enough. | `[MINUTES_DATA_USED]` | Set to `True` to include an extra column in the final location feature file containing the number of minutes used to compute the features on each time segment. Use this for quality control purposes; the more data minutes exist for a period, the more reliable its features should be. For fused location, a single minute can contain more than one coordinate pair if the participant is moving fast enough.
| `[SAMPLING_FREQUENCY]` | Expected time difference between any two location rows in minutes. If set to `0`, the sampling frequency will be inferred automatically as the median of all the differences between two consecutive row timestamps (recommended if you are using `FUSED_RESAMPLED` data). This parameter impacts all the time calculations. | `[CLUSTER_ON]` | Set this flag to `PARTICIPANT_DATASET` to create clusters based on the entire participant's dataset or to `TIME_SEGMENT` to create clusters based on all the instances of the corresponding time segment (e.g. all mornings) or to `TIME_SEGMENT_INSTANCE` to create clusters based on a single instance (e.g. 2020-05-20's morning).
| `[CLUSTER_ON]` | Set this flag to `PARTICIPANT_DATASET` to create clusters based on the entire participant's dataset or to `TIME_SEGMENT` to create clusters based on all the instances of the corresponding time segment (e.g. all mornings). |`[INFER_HOME_LOCATION_STRATEGY]` | The strategy applied to infer home locations. Set to `DORYAB_STRATEGY` to infer one home location for the entire dataset of each participant or to `SUN_LI_VEGA_STRATEGY` to infer one home location per day per participant. See Observations below to know more.
|`[MINIMUM_DAYS_TO_DETECT_HOME_CHANGES]` | The minimum number of consecutive days a new home location candidate has to repeat before it is considered the participant's new home. This parameter will be used only when `[INFER_HOME_LOCATION_STRATEGY]` is set to `SUN_LI_VEGA_STRATEGY`.
| `[CLUSTERING_ALGORITHM]` | The original Doryab et al. implementation uses `DBSCAN`, `OPTICS` is also available with similar (but not identical) clustering results and lower memory consumption. | `[CLUSTERING_ALGORITHM]` | The original Doryab et al. implementation uses `DBSCAN`, `OPTICS` is also available with similar (but not identical) clustering results and lower memory consumption.
| `[RADIUS_FOR_HOME]` | All location coordinates within this distance (meters) from the home location coordinates are considered a homestay (see `timeathome` feature). | `[RADIUS_FOR_HOME]` | All location coordinates within this distance (meters) from the home location coordinates are considered a homestay (see `timeathome` feature).
@ -143,25 +143,25 @@ Features description for `[PHONE_LOCATIONS][PROVIDERS][DORYAB]`:
|locationvariance |$meters^2$ |The sum of the variances of the latitude and longitude columns. |locationvariance |$meters^2$ |The sum of the variances of the latitude and longitude columns.
|loglocationvariance | - | Log of the sum of the variances of the latitude and longitude columns. |loglocationvariance | - | Log of the sum of the variances of the latitude and longitude columns.
|totaldistance |meters |Total distance traveled in a time segment using the haversine formula. |totaldistance |meters |Total distance traveled in a time segment using the haversine formula.
|averagespeed |km/hr |Average speed in a time segment considering only the instances labeled as Moving. |avgspeed |km/hr |Average speed in a time segment considering only the instances labeled as Moving.
|varspeed |km/hr |Speed variance in a time segment considering only the instances labeled as Moving. |varspeed |km/hr |Speed variance in a time segment considering only the instances labeled as Moving.
|{--circadianmovement--} |- | Not suggested for use now; see Observations below. \ "It encodes the extent to which a person's location patterns follow a 24-hour circadian cycle.\" [Doryab et al.](../../citation#doryab-locations). |{--circadianmovement--} |- | Deprecated, see Observations below. \ "It encodes the extent to which a person's location patterns follow a 24-hour circadian cycle.\" [Doryab et al.](../../citation#doryab-locations).
|numberofsignificantplaces |places |Number of significant locations visited. It is calculated using the DBSCAN/OPTICS clustering algorithm which takes in EPS and MIN_SAMPLES as parameters to identify clusters. Each cluster is a significant place. |numberofsignificantplaces |places |Number of significant locations visited. It is calculated using the DBSCAN/OPTICS clustering algorithm which takes in EPS and MIN_SAMPLES as parameters to identify clusters. Each cluster is a significant place.
|numberlocationtransitions |transitions |Number of movements between any two clusters in a time segment. |numberlocationtransitions |transitions |Number of movements between any two clusters in a time segment.
|radiusgyration |meters |Quantifies the area covered by a participant |radiusgyration |meters |Quantifies the area covered by a participant
|timeattop1location |minutes |Time spent at the most significant location. |timeattop1location |minutes |Time spent at the most significant location.
|timeattop2location |minutes |Time spent at the 2nd most significant location. |timeattop2location |minutes |Time spent at the 2nd most significant location.
|timeattop3location |minutes |Time spent at the 3rd most significant location. |timeattop3location |minutes |Time spent at the 3rd most significant location.
|movingtostaticratio | - | Ratio between stationary time and total location sensed time. A lat/long coordinate pair is labeled as stationary if its speed (distance/time) to the next coordinate pair is less than 1km/hr. A higher value represents a more stationary routine. These times are computed using timeInSeconds feature. |movingtostaticratio | - | Ratio between stationary time and total location sensed time. A lat/long coordinate pair is labeled as stationary if its speed (distance/time) to the next coordinate pair is less than 1km/hr. A higher value represents a more stationary routine.
|outlierstimepercent | - | Ratio between the time spent in non-significant clusters divided by the time spent in all clusters (total location sensed time). A higher value represents more time spent in non-significant clusters. These times are computed using timeInSeconds feature. |outlierstimepercent | - | Ratio between the time spent in non-significant clusters divided by the time spent in all clusters (stationary time. Only stationary samples are clustered). A higher value represents more time spent in non-significant clusters.
|maxlengthstayatclusters |minutes |Maximum time spent in a cluster (significant location). |maxlengthstayatclusters |minutes |Maximum time spent in a cluster (significant location).
|minlengthstayatclusters |minutes |Minimum time spent in a cluster (significant location). |minlengthstayatclusters |minutes |Minimum time spent in a cluster (significant location).
|meanlengthstayatclusters |minutes |Average time spent in a cluster (significant location). |avglengthstayatclusters |minutes |Average time spent in a cluster (significant location).
|stdlengthstayatclusters |minutes |Standard deviation of time spent in a cluster (significant location). |stdlengthstayatclusters |minutes |Standard deviation of time spent in a cluster (significant location).
|locationentropy |nats |Shannon Entropy computed over the row count of each cluster (significant location), it is higher the more rows belong to a cluster (i.e., the more time a participant spent at a significant location). |locationentropy |nats |Shannon Entropy computed over the row count of each cluster (significant location), it is higher the more rows belong to a cluster (i.e., the more time a participant spent at a significant location).
|normalizedlocationentropy |nats |Shannon Entropy computed over the row count of each cluster (significant location) divided by the number of clusters; it is higher the more rows belong to a cluster (i.e., the more time a participant spent at a significant location). |normalizedlocationentropy |nats |Shannon Entropy computed over the row count of each cluster (significant location) divided by the number of clusters; it is higher the more rows belong to a cluster (i.e., the more time a participant spent at a significant location).
|timeathome |minutes | Time spent at home (see Observations below for a description on how we compute home). |timeathome |minutes | Time spent at home (see Observations below for a description on how we compute home).
|homelabel |- | An integer that represents a different home location. It will be a constant number (1) for all participants when `[INFER_HOME_LOCATION_STRATEGY]` is set to `DORYAB_STRATEGY` or an incremental index if the strategy is set to `SUN_LI_VEGA_STRATEGY`.
!!! note "Assumptions/Observations" !!! note "Assumptions/Observations"
**Significant Locations Identified** **Significant Locations Identified**
@ -174,7 +174,25 @@ Features description for `[PHONE_LOCATIONS][PROVIDERS][DORYAB]`:
Based on an experiment where we collected fused location data for 7 days with a mean accuracy of 86 & SD of 350.874635, we determined that `EPS/MAX_EPS`=100 produced closer clustering results to reality. Higher values (>100) missed out on some significant places, like a short grocery visit, while lower values (<100) picked up traffic lights and stop signs while driving as significant locations. We recommend you set `EPS` based on your location data's accuracy (the more accurate your data is, the lower you should be able to set EPS). Based on an experiment where we collected fused location data for 7 days with a mean accuracy of 86 & SD of 350.874635, we determined that `EPS/MAX_EPS`=100 produced closer clustering results to reality. Higher values (>100) missed out on some significant places, like a short grocery visit, while lower values (<100) picked up traffic lights and stop signs while driving as significant locations. We recommend you set `EPS` based on your location data's accuracy (the more accurate your data is, the lower you should be able to set EPS).
**Duration Calculation** **Duration Calculation**
To calculate the time duration component for our features, we compute the difference between consecutive rows' timestamps to take into account sampling rate variability. If this time difference is larger than a threshold (300 seconds by default), we replace it with a maximum duration (60 seconds by default, i.e., we assume a participant spent at least 60 seconds in their last known location) To calculate the time duration component for our features, we compute the difference between consecutive rows' timestamps to take into account sampling rate variability. If this time difference is larger than a threshold (300 seconds by default), we replace it with NA and label that row as Moving.
**Home location** **Home location**
Home is calculated using all location data of a participant between 12 am and 6 am, then applying a clustering algorithm (`DB_SCAN` or `OPTICS`) and considering the center of the biggest cluster home for that participant.
- `DORYAB_STRATEGY`: home is calculated using all location data of a participant between 12 am and 6 am, then applying a clustering algorithm (`DBSCAN` or `OPTICS`) and considering the center of the biggest cluster home for that participant.
- `SUN_LI_VEGA_STRATEGY`: home is calculated using all location data of a participant between 12 am and 6 am, then applying a clustering algorithm (`DBSCAN` or `OPTICS`). The following steps are used to infer the home location per day for that participant:
1. if there are records within [03:30:00, 04:30:00] for that night:<br>
&nbsp;&nbsp;&nbsp;&nbsp;we choose the most common cluster during that period as a home candidate for that day.<br>
elif there are records within [midnight, 03:30:00) for that night:<br>
&nbsp;&nbsp;&nbsp;&nbsp;we choose the last valid cluster during that period as a home candidate for that day.<br>
elif there are records within (04:30:00, 06:00:00] for that night:<br>
&nbsp;&nbsp;&nbsp;&nbsp;we choose the first valid cluster during that period as a home candidate for that day.<br>
else:<br>
&nbsp;&nbsp;&nbsp;&nbsp;the home location is NA (missing) for that day.
2. If the count of consecutive days with the same candidate home location cluster label is larger or equal to `[MINIMUM_DAYS_TO_DETECT_HOME_CHANGES]`,
the candidate will be regarded as the home cluster; otherwise, the home cluster will be the last valid day's cluster.
If there are no valid clusters before that day, the first home location in the days after is used.

View File

@ -215,24 +215,20 @@ PHONE_LOCATIONS:
LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: FUSED_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
HOME_INFERENCE:
DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: True COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius ACCURACY_LIMIT: 51 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5 DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
MAXIMUM_ROW_GAP: 300 MAXIMUM_ROW_GAP: 300 # seconds
MAXIMUM_ROW_DURATION: 60
MINUTES_DATA_USED: False MINUTES_DATA_USED: False
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET, TIME_SEGMENT, TIME_SEGMENT_INSTANCE
INFER_HOME_LOCATION_STRATEGY: DORYAB_STRATEGY # DORYAB_STRATEGY, SUN_LI_VEGA_STRATEGY
MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3
CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS
RADIUS_FOR_HOME: 100 RADIUS_FOR_HOME: 100
SRC_SCRIPT: src/features/phone_locations/doryab/main.py SRC_SCRIPT: src/features/phone_locations/doryab/main.py

View File

@ -21,6 +21,12 @@ def get_barnett_daily(wildcards):
return "data/interim/{pid}/phone_locations_barnett_daily.csv" return "data/interim/{pid}/phone_locations_barnett_daily.csv"
return [] return []
def get_locations_python_input(wildcards):
if wildcards.provider_key.upper() == "DORYAB":
return "data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns.csv"
else:
return "data/interim/{pid}/phone_locations_processed_with_datetime.csv"
def find_features_files(wildcards): def find_features_files(wildcards):
feature_files = [] feature_files = []
for provider_key, provider in config[(wildcards.sensor_key).upper()]["PROVIDERS"].items(): for provider_key, provider in config[(wildcards.sensor_key).upper()]["PROVIDERS"].items():

View File

@ -366,9 +366,27 @@ rule phone_light_r_features:
script: script:
"../src/features/entry.R" "../src/features/entry.R"
rule phone_locations_add_doryab_extra_columns:
input:
sensor_input = "data/interim/{pid}/phone_locations_processed_with_datetime.csv",
params:
accuracy_limit = config["PHONE_LOCATIONS"]["PROVIDERS"]["DORYAB"]["ACCURACY_LIMIT"],
maximum_row_gap = config["PHONE_LOCATIONS"]["PROVIDERS"]["DORYAB"]["MAXIMUM_ROW_GAP"],
dbscan_eps = config["PHONE_LOCATIONS"]["PROVIDERS"]["DORYAB"]["DBSCAN_EPS"],
dbscan_minsamples = config["PHONE_LOCATIONS"]["PROVIDERS"]["DORYAB"]["DBSCAN_MINSAMPLES"],
threshold_static = config["PHONE_LOCATIONS"]["PROVIDERS"]["DORYAB"]["THRESHOLD_STATIC"],
clustering_algorithm = config["PHONE_LOCATIONS"]["PROVIDERS"]["DORYAB"]["CLUSTERING_ALGORITHM"],
cluster_on = config["PHONE_LOCATIONS"]["PROVIDERS"]["DORYAB"]["CLUSTER_ON"],
infer_home_location_strategy = config["PHONE_LOCATIONS"]["PROVIDERS"]["DORYAB"]["INFER_HOME_LOCATION_STRATEGY"],
minimum_days_to_detect_home_changes = config["PHONE_LOCATIONS"]["PROVIDERS"]["DORYAB"]["MINIMUM_DAYS_TO_DETECT_HOME_CHANGES"]
output:
"data/interim/{pid}/phone_locations_processed_with_datetime_with_doryab_columns.csv"
script:
"../src/features/phone_locations/doryab/add_doryab_extra_columns.py"
rule phone_locations_python_features: rule phone_locations_python_features:
input: input:
sensor_data = "data/interim/{pid}/phone_locations_processed_with_datetime_with_home.csv", sensor_data = get_locations_python_input,
time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv" time_segments_labels = "data/interim/time_segments/{pid}_time_segments_labels.csv"
params: params:
provider = lambda wildcards: config["PHONE_LOCATIONS"]["PROVIDERS"][wildcards.provider_key.upper()], provider = lambda wildcards: config["PHONE_LOCATIONS"]["PROVIDERS"][wildcards.provider_key.upper()],

View File

@ -121,19 +121,6 @@ rule phone_locations_processed_with_datetime:
script: script:
"../src/data/datetime/readable_datetime.R" "../src/data/datetime/readable_datetime.R"
rule phone_locations_processed_with_datetime_with_home:
input:
sensor_input = "data/interim/{pid}/phone_locations_processed_with_datetime.csv"
params:
dbscan_eps = config["PHONE_LOCATIONS"]["HOME_INFERENCE"]["DBSCAN_EPS"],
dbscan_minsamples = config["PHONE_LOCATIONS"]["HOME_INFERENCE"]["DBSCAN_MINSAMPLES"],
threshold_static = config["PHONE_LOCATIONS"]["HOME_INFERENCE"]["THRESHOLD_STATIC"],
clustering_algorithm = config["PHONE_LOCATIONS"]["HOME_INFERENCE"]["CLUSTERING_ALGORITHM"]
output:
"data/interim/{pid}/phone_locations_processed_with_datetime_with_home.csv"
script:
"../src/data/infer_home_location.py"
rule resample_episodes: rule resample_episodes:
input: input:
"data/interim/{pid}/{sensor}_episodes.csv" "data/interim/{pid}/{sensor}_episodes.csv"

View File

@ -1,140 +0,0 @@
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN,OPTICS
from math import radians, cos, sin, asin, sqrt
def filterDatafromDf(origDf):
return origDf[origDf['local_hour']<=6]
def distance_to_degrees(d):
#Just an approximation, but speeds up clustering by a huge amount and doesnt introduce much error
#over small distances
d = d / 1852
d = d / 60
return d
def cluster_and_label(df,clustering_algorithm,threshold_static,**kwargs):
"""
:param df: a df with columns "latitude", "longitude", and "datetime"
or
a df with comlumns "latitude","longitude" and a datetime index
:param kwargs: arguments for sklearn's DBSCAN
:return: a new df of labeled locations with moving points removed, where the cluster
labeled as "1" is the largest, "2" the second largest, and so on
"""
if not df.empty:
location_data = df
if not isinstance(df.index, pd.DatetimeIndex):
location_data = df.set_index("local_date_time")
stationary = mark_moving(location_data,threshold_static)
counts_df = stationary[["double_latitude" ,"double_longitude"]].groupby(["double_latitude" ,"double_longitude"]).size().reset_index()
counts = counts_df[0]
lat_lon = counts_df[["double_latitude","double_longitude"]].values
if clustering_algorithm == "DBSCAN":
clusterer = DBSCAN(**kwargs)
cluster_results = clusterer.fit_predict(lat_lon, sample_weight= counts)
else:
clusterer = OPTICS(**kwargs)
cluster_results = clusterer.fit_predict(lat_lon)
#Need to extend labels back to original df without weights
counts_df["location_label"] = cluster_results
# remove the old count column
del counts_df[0]
merged = pd.merge(stationary,counts_df, on = ["double_latitude" ,"double_longitude"])
#Now compute the label mapping:
cluster_results = merged["location_label"].values
valid_clusters = cluster_results[np.where(cluster_results != -1)]
label_map = rank_count_map(valid_clusters)
#And remap the labels:
merged.index = stationary.index
stationary = stationary.assign(location_label = merged["location_label"].map(label_map).values)
stationary.loc[:, "location_label"] = merged["location_label"].map(label_map)
return stationary
else:
return df
def rank_count_map(clusters):
""" Returns a function which will map each element of a list 'l' to its rank,
such that the most common element maps to 1
Is used in this context to sort the cluster labels so that cluster with rank 1 is the most
visited.
If return_dict, return a mapping dict rather than a function
If a function, if the value can't be found label as -1
"""
labels, counts = tuple(np.unique(clusters, return_counts = True))
sorted_by_count = [x for (y,x) in sorted(zip(counts, labels), reverse = True)]
label_to_rank = {label : rank + 1 for (label, rank) in [(sorted_by_count[i],i) for i in range(len(sorted_by_count))]}
return lambda x: label_to_rank.get(x, -1)
def mark_moving(df, threshold_static):
if not df.index.is_monotonic:
df = df.sort_index()
distance = haversine(df.double_longitude,df.double_latitude,df.double_longitude.shift(-1),df.double_latitude.shift(-1))/ 1000
time = (df.timestamp.diff(-1) * -1) / (1000*60*60)
df['stationary_or_not'] = np.where((distance / time) < threshold_static,1,0) # 1 being stationary,0 for moving
return df
def haversine(lon1,lat1,lon2,lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = np.radians([lon1, lat1, lon2, lat2])
# haversine formula
a = np.sin((lat2-lat1)/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin((lon2-lon1)/2.0)**2
r = 6371 # Radius of earth in kilometers. Use 3956 for miles
return (r * 2 * np.arcsin(np.sqrt(a)) * 1000)
# Infer a participants home location
origDf = pd.read_csv(snakemake.input[0])
filteredDf = filterDatafromDf(origDf)
if filteredDf.empty:
filteredDf.to_csv(snakemake.output[0])
else:
dbscan_eps = snakemake.params["dbscan_eps"]
dbscan_minsamples = snakemake.params["dbscan_minsamples"]
threshold_static = snakemake.params["threshold_static"]
clustering_algorithm = snakemake.params["clustering_algorithm"]
if clustering_algorithm == "DBSCAN":
hyperparameters = {'eps' : distance_to_degrees(dbscan_eps), 'min_samples': dbscan_minsamples}
elif clustering_algorithm == "OPTICS":
hyperparameters = {'max_eps': distance_to_degrees(dbscan_eps), 'min_samples': 2, 'metric':'euclidean', 'cluster_method' : 'dbscan'}
else:
raise ValueError("config[PHONE_LOCATIONS][HOME_INFERENCE][CLUSTERING ALGORITHM] only accepts DBSCAN or OPTICS but you provided ",clustering_algorithm)
filteredDf = cluster_and_label(filteredDf,clustering_algorithm,threshold_static,**hyperparameters)
origDf['home_latitude'] = filteredDf[filteredDf['location_label']==1][['double_latitude','double_longitude']].mean()['double_latitude']
origDf['home_longitude'] = filteredDf[filteredDf['location_label']==1][['double_latitude','double_longitude']].mean()['double_longitude']
distanceFromHome = haversine(origDf.double_longitude,origDf.double_latitude,origDf.home_longitude,origDf.home_latitude)
finalDf = origDf.drop(['home_latitude','home_longitude'], axis=1)
finalDf.insert(len(finalDf.columns)-1,'distancefromhome',distanceFromHome)
finalDf.to_csv(snakemake.output[0], index=False)

View File

@ -0,0 +1,138 @@
import warnings
import numpy as np
import pandas as pd
from doryab_clustering import haversine, create_clustering_hyperparameters, cluster
# Add "is_stationary" column to denote whether it is stationary or not
# "distance" and "speed" columns are also added
def mark_as_stationary(location_data, threshold_static):
# Distance in meters
location_data = location_data.assign(distance=haversine(location_data["double_longitude"], location_data["double_latitude"], location_data["double_longitude"].shift(-1), location_data["double_latitude"].shift(-1)))
# Speed in km/h
location_data.loc[:, "speed"] = (location_data["distance"] / location_data["duration_in_seconds"]).replace(np.inf, np.nan) * 3.6
location_data.loc[:, "is_stationary"] = np.where(location_data["speed"] < threshold_static, 1, 0)
location_data.dropna(subset=["duration_in_seconds"], inplace=True)
return location_data
def infer_home_location(location_data, clustering_algorithm, hyperparameters, strategy, days_threshold):
# Home locations are inferred based on records logged during midnight to 6am.
# The home location is the mean coordinate of the home cluster.
if (strategy == "DORYAB_STRATEGY") or (strategy == "SUN_LI_VEGA_STRATEGY"):
location_data_filtered = location_data[location_data["local_hour"] < 6]
if location_data_filtered.empty:
warnings.warn("We could not infer a home location because there are no location records logged during midnight to 6am.")
return pd.DataFrame(columns=location_data_filtered.columns.tolist() + ["distance_from_home", "home_label"])
location_data_filtered = cluster(location_data_filtered, clustering_algorithm, **hyperparameters)
if strategy == "DORYAB_STRATEGY":
# We assume the participant does not change the home location during the whole study.
# The most common cluster of all nights are regarded as the home cluster.
home_location = location_data_filtered[location_data_filtered["cluster_label"] == 1][["double_latitude", "double_longitude"]].mean()
location_data["distance_from_home"] = haversine(location_data["double_longitude"], location_data["double_latitude"], [home_location["double_longitude"]] * location_data.shape[0], [home_location["double_latitude"]] * location_data.shape[0])
location_data["home_label"] = 1
else: # SUN_LI_VEGA_STRATEGY
"""
We assume the participant might change the home location during the whole study.
Each night will be assigned a candidate home location based on the following rules:
if there are records within [03:30:00, 04:30:00]: (group 1)
we choose the most common cluster during that period as the candidate of home cluster.
elif there are records within [midnight, 03:30:00): (group 2)
we choose the last valid cluster during that period as the candidate of home cluster.
elif there are records within (04:30:00, 06:00:00]: (group 3)
we choose the first valid cluster during that period as the candidate of home cluster.
else:
the home location is NA (missing) for that night.
If the count of consecutive days with the same candidate home location cluster label is larger or equal to MINIMUM_DAYS_TO_DETECT_HOME_CHANGES,
the candidate will be regarded as the home cluster;
otherwise, the home cluster will be the last valid day's cluster.
(If there are no valid clusters before that day, it will be assigned the next valid day's cluster.)
"""
# Split location data into 3 groups: [midnight, 03:30:00), [03:30:00, 04:30:00], (04:30:00, 06:00:00]
location_data_filtered = location_data_filtered[~location_data_filtered["cluster_label"].isin([-1, np.nan])]
location_data_filtered["group"] = location_data_filtered["local_time"].apply(lambda x: 1 if x >= "03:30:00" and x <= "04:30:00" else (2 if x < "03:30:00" else 3))
# Select the smallest group number per day
selected_groups = location_data_filtered[location_data_filtered["group"] == location_data_filtered.groupby("local_date")["group"].transform("min")][["group", "local_date", "cluster_label"]]
# For group 1: [03:30:00, 04:30:00]
group_1 = selected_groups[selected_groups["group"] == 1]
home_clusters_group_1 = group_1.groupby(["local_date"]).agg(lambda x: pd.Series.mode(x)[0])
# For group 2: [midnight, 03:30:00)
group_2 = selected_groups[selected_groups["group"] == 2]
home_clusters_group_2 = group_2.groupby(["local_date"]).last()
# For group 3: (04:30:00, 06:00:00]
group_3 = selected_groups[selected_groups["group"] == 3]
home_clusters_group_3 = group_3.groupby(["local_date"]).first()
home_clusters = pd.concat([home_clusters_group_1, home_clusters_group_2, home_clusters_group_3]).sort_index()
# Count the consecutive days with the same candidate home location cluster label
home_clusters["number_of_days"] = home_clusters.groupby((home_clusters["cluster_label"] != home_clusters["cluster_label"].shift(1)).cumsum())["cluster_label"].transform("count")
# Assign the missing days with (1) the last valid day's cluster first and (2) the next valid day's cluster then
home_clusters.loc[home_clusters["number_of_days"] < days_threshold, "cluster_label"] = np.nan
location_data = location_data.merge(home_clusters[["cluster_label"]], left_on="local_date", right_index=True, how="left")
location_data["cluster_label"] = location_data["cluster_label"].fillna(method="ffill").fillna(method="bfill")
center_per_cluster = location_data_filtered.groupby(["cluster_label"])[["double_latitude", "double_longitude"]].mean().rename(columns={"double_latitude": "home_latitude", "double_longitude": "home_longitude"})
location_data = location_data.merge(center_per_cluster, left_on="cluster_label", right_index=True, how="left")
location_data["distance_from_home"] = haversine(location_data["double_longitude"], location_data["double_latitude"], location_data["home_longitude"], location_data["home_latitude"])
# reorder cluster labels
reorder_mapping = {old_label: idx + 1 for idx, old_label in enumerate(location_data["cluster_label"].unique())}
location_data["home_label"] = location_data["cluster_label"].map(reorder_mapping)
location_data.drop(["cluster_label", "home_longitude", "home_latitude"], axis=1, inplace=True)
return location_data
location_data = pd.read_csv(snakemake.input["sensor_input"])
accuracy_limit = snakemake.params["accuracy_limit"]
maximum_row_gap = snakemake.params["maximum_row_gap"]
dbscan_eps = snakemake.params["dbscan_eps"]
dbscan_minsamples = snakemake.params["dbscan_minsamples"]
threshold_static = snakemake.params["threshold_static"]
clustering_algorithm = snakemake.params["clustering_algorithm"]
cluster_on = snakemake.params["cluster_on"]
strategy = snakemake.params["infer_home_location_strategy"]
days_threshold = snakemake.params["minimum_days_to_detect_home_changes"]
rows_before_accuracy_filter = len(location_data)
location_data = location_data[location_data["accuracy"] < accuracy_limit]
if rows_before_accuracy_filter > 0 and len(location_data) == 0:
warnings.warn("Cannot compute Doryab location features because there are no rows with an accuracy value lower than ACCURACY_LIMIT: {}".format(accuracy_limit))
if not location_data.timestamp.is_monotonic:
location_data.sort_values(by=["timestamp"], inplace=True)
location_data["duration_in_seconds"] = -1 * location_data.timestamp.diff(-1) / 1000
location_data.loc[location_data["duration_in_seconds"] >= maximum_row_gap, "duration_in_seconds"] = np.nan
location_data = mark_as_stationary(location_data, threshold_static)
hyperparameters = create_clustering_hyperparameters(clustering_algorithm, dbscan_eps, dbscan_minsamples)
location_data_with_doryab_columns = infer_home_location(location_data, clustering_algorithm, hyperparameters, strategy, days_threshold)
if cluster_on == "PARTICIPANT_DATASET":
location_data_with_doryab_columns = cluster(location_data_with_doryab_columns, clustering_algorithm, **hyperparameters)
location_data_with_doryab_columns.to_csv(snakemake.output[0], index=False)

View File

@ -0,0 +1,77 @@
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN, OPTICS
# Calculate the great-circle distance (in meters) between two points on the earth (specified in decimal degrees)
def haversine(lon1, lat1, lon2, lat2):
# Radius of earth in kilometers. Use 3956 for miles
r = 6371
# Convert decimal degrees to radians
lon1, lat1, lon2, lat2 = np.radians([lon1, lat1, lon2, lat2])
# Haversine formula
distance = r * 2 * np.arcsin(np.sqrt(np.sin((lat2 - lat1) / 2.0) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin((lon2 - lon1) / 2.0) ** 2)) * 1000
return distance
# Just an approximation, but speeds up clustering by a huge amount and doesn't introduce much error over small distances
# Reference: https://jonisalonen.com/2014/computing-distance-between-coordinates-can-be-simple-and-fast/
def meters_to_degrees(distance):
# Convert meter to nautical mile
distance = distance / 1852
# Convert nautical mile to degree
distance = distance / 60
return distance
# Relabel clusters: -1 denotes the outliers (insignificant or rarely visited locations), 1 denotes the most visited significant location, 2 denotes the 2nd most significant location,...
def label(location_data):
# Exclude outliers (cluster_label = -1) while counting number of locations in a cluster
label2count = pd.DataFrame({"count": location_data["cluster_label"].replace(-1, np.nan).value_counts(ascending=False, sort=True)})
# Add the row number as the new cluster label since value_counts() will order it by default
label2count["new_cluster_label"] = np.arange(len(label2count)) + 1
# Still use -1 to denote the outliers
label2count.loc[-1, "new_cluster_label"] = -1
# Merge the new cluster label with the original location data
location_data = location_data.merge(label2count[["new_cluster_label"]], left_on="cluster_label", right_index=True, how="left")
del location_data["cluster_label"]
location_data.rename(columns={"new_cluster_label": "cluster_label"}, inplace=True)
return location_data
def create_clustering_hyperparameters(clustering_algorithm, dbscan_eps, dbscan_minsamples):
if clustering_algorithm == "DBSCAN":
hyperparameters = {"eps": meters_to_degrees(dbscan_eps), "min_samples": dbscan_minsamples}
else: # OPTICS
hyperparameters = {"max_eps": meters_to_degrees(dbscan_eps), "min_samples": dbscan_minsamples, "metric": "euclidean", "cluster_method": "dbscan"}
return hyperparameters
# Only stationary samples are clustered, hence moving samples are labeled with NA
def cluster(location_data, clustering_algorithm, **kwargs):
if location_data.empty:
return pd.DataFrame(columns=location_data.columns.tolist() + ["is_stationary", "cluster_label"])
# Only keep stationary samples for clustering
stationary_data = location_data[location_data["is_stationary"] == 1][["double_latitude", "double_longitude", "is_stationary"]]
# Remove duplicates and apply sample_weight (only available for DBSCAN currently) to reduce memory usage
stationary_data_dedup = stationary_data.groupby(["double_latitude", "double_longitude", "is_stationary"]).size().reset_index()
lat_lon = stationary_data_dedup[["double_latitude", "double_longitude"]].values
if stationary_data_dedup.shape[0] < kwargs["min_samples"]:
cluster_results = np.array([-1] * stationary_data_dedup.shape[0])
elif clustering_algorithm == "DBSCAN":
clusterer = DBSCAN(**kwargs)
cluster_results = clusterer.fit_predict(lat_lon, sample_weight=stationary_data_dedup[0])
else: # OPTICS
clusterer = OPTICS(**kwargs)
cluster_results = clusterer.fit_predict(lat_lon)
# Add cluster labels
stationary_data_dedup["cluster_label"] = cluster_results
location_data_with_labels = label(location_data.merge(stationary_data_dedup[["double_latitude", "double_longitude", "is_stationary", "cluster_label"]], how="left", on=["double_latitude", "double_longitude", "is_stationary"]))
return location_data_with_labels

View File

@ -1,427 +1,156 @@
import pandas as pd
import numpy as np import numpy as np
import warnings import pandas as pd
from astropy.timeseries import LombScargle from phone_locations.doryab.doryab_clustering import haversine, create_clustering_hyperparameters, cluster
from sklearn.cluster import DBSCAN,OPTICS
from math import radians, cos, sin, asin, sqrt
def apply_cluster_strategy(location_data, time_segment, clustering_algorithm, dbscan_eps, dbscan_minsamples, cluster_on, filter_data_by_segment):
hyperparameters = create_clustering_hyperparameters(clustering_algorithm, dbscan_eps, dbscan_minsamples)
if cluster_on == "PARTICIPANT_DATASET":
# clusters are created in cluster_accross_participant_dataset.py script
location_data = filter_data_by_segment(location_data, time_segment)
elif cluster_on == "TIME_SEGMENT":
location_data = filter_data_by_segment(location_data, time_segment)
location_data = cluster(location_data, clustering_algorithm, **hyperparameters)
else: # TIME_SEGMENT_INSTANCE
location_data = filter_data_by_segment(location_data, time_segment)
location_data_clusters = pd.DataFrame()
for segment_instance, instance_data in location_data.groupby(["local_segment"]):
location_data_per_group = cluster(instance_data, clustering_algorithm, **hyperparameters)
location_data_clusters = pd.concat([location_data_per_group, location_data_clusters])
location_data = location_data_clusters
return location_data
def distance_and_speed_features(moving_data):
distance_and_speed = moving_data[["local_segment", "distance"]].groupby(["local_segment"]).sum().rename(columns={"distance": "totaldistance"})
moving_data_grouped = moving_data[["local_segment", "speed"]].groupby(["local_segment"])
distance_and_speed["avgspeed"] = moving_data_grouped["speed"].mean()
distance_and_speed["varspeed"] = moving_data_grouped["speed"].var()
return distance_and_speed
def radius_of_gyration(location_data):
# center is the centroid of the places visited during a segment instance, not the home location
clusters = location_data.groupby(["local_segment", "cluster_label"]).agg(
double_latitude=("double_latitude", "mean"),
double_longitude=("double_longitude", "mean"),
time_in_a_cluster=("duration_in_seconds", "sum")
).reset_index()
clusters[["centroid_double_latitude", "centroid_double_longitude"]] = clusters.groupby(["local_segment"], sort=False)[["double_latitude", "double_longitude"]].transform("mean")
clusters["distance_squared"] = haversine(clusters["double_longitude"], clusters["double_latitude"], clusters["centroid_double_longitude"], clusters["centroid_double_latitude"]) ** 2
clusters["distance_squared_X_time_in_a_cluster"] = clusters["distance_squared"] * clusters["time_in_a_cluster"]
rog = np.sqrt(clusters.groupby(["local_segment"])["distance_squared_X_time_in_a_cluster"].sum() / clusters.groupby(["local_segment"])["time_in_a_cluster"].sum().replace(0, np.inf))
return rog
def cluster_stay(x, stay_at_clusters, cluster_n):
topn_cluster_label = x[stay_at_clusters.loc[x.index]["cluster_label"] == cluster_n]
time_at_topn = topn_cluster_label.iloc[0] if len(topn_cluster_label) == 1 else None
return time_at_topn
def stay_at_topn_clusters(location_data):
stay_at_clusters = location_data[["local_segment", "cluster_label", "duration_in_seconds"]].groupby(["local_segment", "cluster_label"], sort=True).sum().reset_index()
stay_at_clusters["duration_in_minutes"] = stay_at_clusters["duration_in_seconds"] / 60
stay_at_clusters_features = stay_at_clusters.groupby(["local_segment"]).agg(
timeattop1location=("duration_in_minutes", lambda x: cluster_stay(x, stay_at_clusters, 1)),
timeattop2location=("duration_in_minutes", lambda x: cluster_stay(x, stay_at_clusters, 2)),
timeattop3location=("duration_in_minutes", lambda x: cluster_stay(x, stay_at_clusters, 3)),
maxlengthstayatclusters=("duration_in_minutes", "max"),
minlengthstayatclusters=("duration_in_minutes", "min"),
avglengthstayatclusters=("duration_in_minutes", "mean"),
stdlengthstayatclusters=("duration_in_minutes", "std")
).fillna(0)
return stay_at_clusters_features
def location_entropy(location_data):
location_data = location_data.groupby(["local_segment", "cluster_label"])[["duration_in_seconds"]].sum().reset_index().rename(columns={"duration_in_seconds": "cluster_duration"})
location_data["all_clusters_duration"] = location_data.groupby(["local_segment"])["cluster_duration"].transform("sum")
location_data["plogp"] = (location_data["cluster_duration"] / location_data["all_clusters_duration"]).apply(lambda x: x * np.log(x))
entropy = -1 * location_data.groupby(["local_segment"])[["plogp"]].sum().rename(columns={"plogp": "locationentropy"})
entropy["num_clusters"] = location_data.groupby(["local_segment"])["cluster_label"].nunique()
entropy["normalizedlocationentropy"] = entropy["locationentropy"] / entropy["num_clusters"]
return entropy
def doryab_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs): def doryab_features(sensor_data_files, time_segment, provider, filter_data_by_segment, *args, **kwargs):
location_data = pd.read_csv(sensor_data_files["sensor_data"]) location_data = pd.read_csv(sensor_data_files["sensor_data"])
requested_features = provider["FEATURES"] requested_features = provider["FEATURES"]
accuracy_limit = provider["ACCURACY_LIMIT"]
dbscan_eps = provider["DBSCAN_EPS"] dbscan_eps = provider["DBSCAN_EPS"]
dbscan_minsamples = provider["DBSCAN_MINSAMPLES"] dbscan_minsamples = provider["DBSCAN_MINSAMPLES"]
threshold_static = provider["THRESHOLD_STATIC"]
maximum_gap_allowed = provider["MAXIMUM_ROW_GAP"]
maximum_row_duration = provider["MAXIMUM_ROW_DURATION"]
cluster_on = provider["CLUSTER_ON"] cluster_on = provider["CLUSTER_ON"]
clustering_algorithm = provider["CLUSTERING_ALGORITHM"] clustering_algorithm = provider["CLUSTERING_ALGORITHM"]
radius_from_home = provider["RADIUS_FOR_HOME"] radius_from_home = provider["RADIUS_FOR_HOME"]
minutes_data_used = provider["MINUTES_DATA_USED"] if provider["MINUTES_DATA_USED"]:
if(minutes_data_used):
requested_features.append("minutesdataused") requested_features.append("minutesdataused")
# name of the features this function can compute # name of the features this function can compute
base_features_names = ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed","circadianmovement","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","minutesdataused","timeathome"] base_features_names = ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed","numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","minutesdataused","timeathome","homelabel"]
# the subset of requested features this function can compute # the subset of requested features this function can compute
features_to_compute = list(set(requested_features) & set(base_features_names)) features_to_compute = list(set(requested_features) & set(base_features_names))
if clustering_algorithm == "DBSCAN": location_data = apply_cluster_strategy(location_data, time_segment, clustering_algorithm, dbscan_eps, dbscan_minsamples, cluster_on, filter_data_by_segment)
hyperparameters = {'eps' : distance_to_degrees(dbscan_eps), 'min_samples': dbscan_minsamples}
elif clustering_algorithm == "OPTICS":
hyperparameters = {'max_eps': distance_to_degrees(dbscan_eps), 'min_samples': 2, 'metric':'euclidean', 'cluster_method' : 'dbscan'}
else:
raise ValueError("config[PHONE_LOCATIONS][DORYAB][CLUSTERING ALGORITHM] only accepts DBSCAN or OPTICS but you provided ",clustering_algorithm)
rows_before_accuracy_filter = len(location_data)
location_data.query("accuracy < @accuracy_limit", inplace=True)
if rows_before_accuracy_filter > 0 and len(location_data) == 0:
warnings.warn("Cannot compute Doryab location features because there are no rows with an accuracy value lower than ACCURACY_LIMIT: {}".format(accuracy_limit))
if location_data.empty: if location_data.empty:
location_features = pd.DataFrame(columns=["local_segment"] + features_to_compute) return pd.DataFrame(columns=["local_segment"] + features_to_compute)
else:
if cluster_on == "PARTICIPANT_DATASET":
location_data = cluster_and_label(location_data,clustering_algorithm,threshold_static,**hyperparameters)
location_data = filter_data_by_segment(location_data, time_segment)
elif cluster_on == "TIME_SEGMENT":
location_data = filter_data_by_segment(location_data, time_segment)
location_data = cluster_and_label(location_data,clustering_algorithm,threshold_static,**hyperparameters)
else:
raise ValueError("config[PHONE_LOCATIONS][DORYAB][CLUSTER_ON] only accepts PARTICIPANT_DATASET or TIME_SEGMENT but you provided ",cluster_on)
if location_data.empty:
location_features = pd.DataFrame(columns=["local_segment"] + features_to_compute)
else:
location_features = pd.DataFrame() location_features = pd.DataFrame()
if "minutesdataused" in features_to_compute: location_features["minutesdataused"] = location_data.drop_duplicates(subset=["local_segment", "local_date", "local_hour", "local_minute"])[["local_segment", "local_minute"]].groupby(["local_segment"])["local_minute"].count()
for localDate in location_data["local_segment"].unique():
location_features.loc[localDate,"minutesdataused"] = getMinutesData(location_data[location_data["local_segment"]==localDate])
location_features.index.name = 'local_segment' # variance features
location_features["locationvariance"] = location_data.groupby(["local_segment"])["double_latitude"].var() + location_data.groupby(["local_segment"])["double_longitude"].var()
location_features["loglocationvariance"] = np.log10(location_features["locationvariance"]).replace(-np.inf, np.nan)
location_data = location_data[(location_data['double_latitude']!=0.0) & (location_data['double_longitude']!=0.0)] # distance and speed features
moving_data = location_data[location_data["is_stationary"] == 0]
location_features = location_features.merge(distance_and_speed_features(moving_data), how="outer", left_index=True, right_index=True)
if location_data.empty: # stationary features
location_features = pd.DataFrame(columns=["local_segment"] + ["location_" + time_segment + "_" + x for x in features_to_compute]) stationary_data = location_data[location_data["is_stationary"] == 1]
location_features = location_features.reset_index(drop=True) stationary_data_without_outliers = stationary_data[stationary_data["cluster_label"] != -1]
return location_features
location_data['timeInSeconds'] = (location_data.timestamp.diff(-1)* -1)/1000 location_features["numberofsignificantplaces"] = stationary_data_without_outliers.groupby(["local_segment"])["cluster_label"].nunique()
if "locationvariance" in features_to_compute: # number of location transitions: ignores transitions from moving to static and vice-versa, but counts transitions from outliers to major location clusters
location_features["locationvariance"] = location_data.groupby(['local_segment'])['double_latitude'].var() + location_data.groupby(['local_segment'])['double_longitude'].var() location_features["numberlocationtransitions"] = stationary_data[["local_segment", "cluster_label"]].groupby(["local_segment"])["cluster_label"].apply(lambda x: np.sum(x != x.shift()) - 1)
location_features["radiusgyration"] = radius_of_gyration(stationary_data_without_outliers)
if "loglocationvariance" in features_to_compute: # stay at topn clusters features
location_features["loglocationvariance"] = (location_data.groupby(['local_segment'])['double_latitude'].var() + location_data.groupby(['local_segment'])['double_longitude'].var()).apply(lambda x: np.log10(x) if x > 0 else None) location_features = location_features.merge(stay_at_topn_clusters(stationary_data_without_outliers), how="outer", left_index=True, right_index=True)
# moving to static ratio
static_time = stationary_data.groupby(["local_segment"])["duration_in_seconds"].sum()
total_time = location_data.groupby(["local_segment"])["duration_in_seconds"].sum()
location_features["movingtostaticratio"] = static_time / total_time
preComputedDistanceandSpeed = pd.DataFrame() # outliers time percent
for localDate in location_data['local_segment'].unique(): outliers_time = stationary_data[stationary_data["cluster_label"] == -1].groupby(["local_segment"])["duration_in_seconds"].sum()
speeddf = get_all_travel_distances_meters_speed(location_data[location_data['local_segment']==localDate],threshold_static,maximum_gap_allowed) location_features["outlierstimepercent"] = outliers_time / static_time
preComputedDistanceandSpeed.loc[localDate,"distance"] = speeddf['distances'].sum()
preComputedDistanceandSpeed.loc[localDate,"avgspeed"] = speeddf[speeddf['speedTag'] == 'Moving']['speed'].mean()
preComputedDistanceandSpeed.loc[localDate,"varspeed"] = speeddf[speeddf['speedTag'] == 'Moving']['speed'].var()
if "totaldistance" in features_to_compute: # entropy features
for localDate in location_data['local_segment'].unique(): location_features = location_features.merge(location_entropy(stationary_data_without_outliers), how="outer", left_index=True, right_index=True)
location_features.loc[localDate,"totaldistance"] = preComputedDistanceandSpeed.loc[localDate,"distance"]
if "averagespeed" in features_to_compute: # time at home
for localDate in location_data['local_segment'].unique(): location_features["timeathome"] = stationary_data[stationary_data["distance_from_home"] <= radius_from_home].groupby(["local_segment"])["duration_in_seconds"].sum() / 60
location_features.loc[localDate,"averagespeed"] = preComputedDistanceandSpeed.loc[localDate,"avgspeed"]
if "varspeed" in features_to_compute: # home label
for localDate in location_data['local_segment'].unique(): location_features["homelabel"] = stationary_data[["local_segment", "home_label"]].groupby(["local_segment"]).agg(lambda x: pd.Series.mode(x)[0])
location_features.loc[localDate,"varspeed"] = preComputedDistanceandSpeed.loc[localDate,"varspeed"]
if "circadianmovement" in features_to_compute: location_features = location_features[features_to_compute].reset_index()
for localDate in location_data['local_segment'].unique():
location_features.loc[localDate,"circadianmovement"] = circadian_movement(location_data[location_data['local_segment']==localDate])
stationaryLocations = location_data[location_data['stationary_or_not'] == 1]
if "numberofsignificantplaces" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"numberofsignificantplaces"] = number_of_significant_places(stationaryLocations[stationaryLocations['local_segment']==localDate])
if "numberlocationtransitions" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"numberlocationtransitions"] = number_location_transitions(stationaryLocations[stationaryLocations['local_segment']==localDate])
if "radiusgyration" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"radiusgyration"] = radius_of_gyration(stationaryLocations[stationaryLocations['local_segment']==localDate])
preComputedTimeArray = pd.DataFrame()
for localDate in stationaryLocations["local_segment"].unique():
top1,top2,top3,smax,smin,sstd,smean = len_stay_timeattopn(stationaryLocations[stationaryLocations["local_segment"]==localDate],maximum_gap_allowed,maximum_row_duration)
preComputedTimeArray.loc[localDate,"timeattop1"] = top1
preComputedTimeArray.loc[localDate,"timeattop2"] = top2
preComputedTimeArray.loc[localDate,"timeattop3"] = top3
preComputedTimeArray.loc[localDate,"maxlengthstayatclusters"] = smax
preComputedTimeArray.loc[localDate,"minlengthstayatclusters"] = smin
preComputedTimeArray.loc[localDate,"stdlengthstayatclusters"] = sstd
preComputedTimeArray.loc[localDate,"meanlengthstayatclusters"] = smean
if "timeattop1location" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"timeattop1"] = preComputedTimeArray.loc[localDate,"timeattop1"]
if "timeattop2location" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"timeattop2"] = preComputedTimeArray.loc[localDate,"timeattop2"]
if "timeattop3location" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"timeattop3"] = preComputedTimeArray.loc[localDate,"timeattop3"]
if "movingtostaticratio" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"movingtostaticratio"] = (stationaryLocations[stationaryLocations['local_segment']==localDate]['timeInSeconds'].sum()) / (location_data[location_data['local_segment']==localDate]['timeInSeconds'].sum())
if "outlierstimepercent" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"outlierstimepercent"] = outlier_time_percent_new(stationaryLocations[stationaryLocations['local_segment']==localDate])
if "maxlengthstayatclusters" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"maxlengthstayatclusters"] = preComputedTimeArray.loc[localDate,"maxlengthstayatclusters"]
if "minlengthstayatclusters" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"minlengthstayatclusters"] = preComputedTimeArray.loc[localDate,"minlengthstayatclusters"]
if "stdlengthstayatclusters" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"stdlengthstayatclusters"] = preComputedTimeArray.loc[localDate,"stdlengthstayatclusters"]
if "meanlengthstayatclusters" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"meanlengthstayatclusters"] = preComputedTimeArray.loc[localDate,"meanlengthstayatclusters"]
if "locationentropy" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"locationentropy"] = location_entropy(stationaryLocations[stationaryLocations['local_segment']==localDate])
if "normalizedlocationentropy" in features_to_compute:
for localDate in stationaryLocations['local_segment'].unique():
location_features.loc[localDate,"normalizedlocationentropy"] = location_entropy_normalized(stationaryLocations[stationaryLocations['local_segment']==localDate])
if "timeathome" in features_to_compute:
calculationDf = stationaryLocations[['local_segment','distancefromhome','timeInSeconds']].copy()
calculationDf.loc[calculationDf.timeInSeconds >= maximum_gap_allowed,'timeInSeconds'] = maximum_row_duration
location_features["timeathome"] = calculationDf[calculationDf["distancefromhome"] <= radius_from_home].groupby("local_segment")["timeInSeconds"].sum()/60
location_features = location_features.reset_index()
return location_features return location_features
def len_stay_timeattopn(locationData,maximum_gap_allowed,maximum_row_duration):
if locationData is None or len(locationData) == 0:
return (None, None, None,None, None, None, None)
calculationDf = locationData[locationData["location_label"] >= 1][['location_label','timeInSeconds']].copy()
calculationDf.loc[calculationDf.timeInSeconds >= maximum_gap_allowed,'timeInSeconds'] = maximum_row_duration
timeArray = calculationDf.groupby('location_label')['timeInSeconds'].sum().reset_index()['timeInSeconds'].sort_values(ascending=False)/60
if len(timeArray) == 3:
return (timeArray[0],timeArray[1],timeArray[2],timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean())
elif len(timeArray)==2:
return (timeArray[0],timeArray[1],None,timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean())
elif len(timeArray)==1:
return (timeArray[0],None,None,timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean())
else:
return (None,None,None,timeArray.max(),timeArray.min(),timeArray.std(),timeArray.mean())
def getMinutesData(locationData):
return locationData[['local_hour','local_minute']].drop_duplicates(inplace = False).shape[0]
def distance_to_degrees(d):
#Just an approximation, but speeds up clustering by a huge amount and doesnt introduce much error
#over small distances
d = d / 1852
d = d / 60
return d
def get_all_travel_distances_meters_speed(locationData,threshold,maximum_gap_allowed):
lat_lon_temp = locationData[locationData['timeInSeconds'] <= maximum_gap_allowed][['double_latitude','double_longitude','timeInSeconds']]
if lat_lon_temp.empty:
return pd.DataFrame({"speed": [], "speedTag": [],"distances": []})
lat_lon_temp['distances'] = haversine(lat_lon_temp['double_longitude'],lat_lon_temp['double_latitude'],lat_lon_temp['double_longitude'].shift(-1),lat_lon_temp['double_latitude'].shift(-1))
lat_lon_temp['speed'] = (lat_lon_temp['distances'] / lat_lon_temp['timeInSeconds'] ) # meter/second
lat_lon_temp['speed'] = lat_lon_temp['speed'].replace(np.inf, np.nan) * 3.6
lat_lon_temp['speedTag'] = np.where(lat_lon_temp['speed'] >= threshold,"Moving","Static")
return lat_lon_temp[['speed','speedTag','distances']]
def vincenty_row(x):
"""
:param x: A row from a dataframe
:return: The distance in meters between
"""
try:
return vincenty((x['_lat_before'], x['_lon_before']),(x['_lat_after'], x['_lon_after'])).meters
except:
return 0
def haversine(lon1,lat1,lon2,lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = np.radians([lon1, lat1, lon2, lat2])
# haversine formula
a = np.sin((lat2-lat1)/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin((lon2-lon1)/2.0)**2
r = 6371 # Radius of earth in kilometers. Use 3956 for miles
return (r * 2 * np.arcsin(np.sqrt(a)) * 1000)
def circadian_movement_energies(locationData):
time = (locationData["timestamp"].values / 1000.0) # seconds
ylat = locationData["double_latitude"].values
ylong = locationData["double_longitude"].values
hours_intervals = np.arange(23.5, 24.51, 0.01) # hours
seconds_intervals = hours_intervals * 60 * 60 # seconds
frequency = 1 / seconds_intervals
power_latitude = LombScargle(time, ylat).power(frequency=frequency, normalization='psd')
power_longitude = LombScargle(time, ylong).power(frequency=frequency, normalization='psd')
energy_latitude = np.sum(power_latitude)
energy_longitude = np.sum(power_longitude)
return (energy_latitude, energy_longitude)
def circadian_movement(locationData):
energy_latitude, energy_longitude = circadian_movement_energies(locationData)
return np.log10(energy_latitude + energy_longitude)
def cluster_and_label(df,clustering_algorithm,threshold_static,**kwargs):
"""
:param df: a df with columns "latitude", "longitude", and "datetime"
or
a df with comlumns "latitude","longitude" and a datetime index
:param kwargs: arguments for sklearn's DBSCAN
:return: a new df of labeled locations with moving points removed, where the cluster
labeled as "1" is the largest, "2" the second largest, and so on
"""
if not df.empty:
location_data = df
if not isinstance(df.index, pd.DatetimeIndex):
location_data = df.set_index("local_date_time")
stationary = mark_moving(location_data,threshold_static)
counts_df = stationary[["double_latitude" ,"double_longitude"]].groupby(["double_latitude" ,"double_longitude"]).size().reset_index()
counts = counts_df[0]
lat_lon = counts_df[["double_latitude","double_longitude"]].values
if clustering_algorithm == "DBSCAN":
clusterer = DBSCAN(**kwargs)
cluster_results = clusterer.fit_predict(lat_lon, sample_weight= counts)
else:
clusterer = OPTICS(**kwargs)
cluster_results = clusterer.fit_predict(lat_lon)
#Need to extend labels back to original df without weights
counts_df["location_label"] = cluster_results
# remove the old count column
del counts_df[0]
merged = pd.merge(stationary,counts_df, on = ["double_latitude" ,"double_longitude"])
#Now compute the label mapping:
cluster_results = merged["location_label"].values
valid_clusters = cluster_results[np.where(cluster_results != -1)]
label_map = rank_count_map(valid_clusters)
#And remap the labels:
merged.index = stationary.index
stationary = stationary.assign(location_label = merged["location_label"].map(label_map).values)
stationary.loc[:, "location_label"] = merged["location_label"].map(label_map)
return stationary
else:
return df
def rank_count_map(clusters):
""" Returns a function which will map each element of a list 'l' to its rank,
such that the most common element maps to 1
Is used in this context to sort the cluster labels so that cluster with rank 1 is the most
visited.
If return_dict, return a mapping dict rather than a function
If a function, if the value can't be found label as -1
"""
labels, counts = tuple(np.unique(clusters, return_counts = True))
sorted_by_count = [x for (y,x) in sorted(zip(counts, labels), reverse = True)]
label_to_rank = {label : rank + 1 for (label, rank) in [(sorted_by_count[i],i) for i in range(len(sorted_by_count))]}
return lambda x: label_to_rank.get(x, -1)
def mark_moving(df, threshold_static):
if not df.index.is_monotonic:
df = df.sort_index()
distance = haversine(df.double_longitude,df.double_latitude,df.double_longitude.shift(-1),df.double_latitude.shift(-1))/ 1000
time = (df.timestamp.diff(-1) * -1) / (1000*60*60)
df['stationary_or_not'] = np.where((distance / time) < threshold_static,1,0) # 1 being stationary,0 for moving
return df
def number_of_significant_places(locationData):
uniquelst = locationData[locationData["location_label"] >= 1]["location_label"].unique()
return len(uniquelst)
def number_location_transitions(locationData):
# ignores transitions from moving to static and vice-versa, but counts transitions from outliers to major location clusters
df = pd.DataFrame()
df['boolCol'] = (locationData.location_label == locationData.location_label.shift())
return df[df['boolCol'] == False].shape[0] - 1
def radius_of_gyration(locationData):
if locationData is None or len(locationData) == 0:
return None
# Center is the centroid, not the home location
valid_clusters = locationData[locationData["location_label"] != -1]
centroid_all_clusters = (valid_clusters.groupby('location_label')[['double_latitude','double_longitude']].mean()).mean()
clusters_centroid = valid_clusters.groupby('location_label')[['double_latitude','double_longitude']].mean()
rog = 0
for labels in clusters_centroid.index:
distance = haversine(clusters_centroid.loc[labels].double_longitude,clusters_centroid.loc[labels].double_latitude,
centroid_all_clusters.double_longitude,centroid_all_clusters.double_latitude) ** 2
time_in_cluster = locationData[locationData["location_label"]==labels]['timeInSeconds'].sum()
rog = rog + (time_in_cluster * distance)
time_all_clusters = valid_clusters['timeInSeconds'].sum()
if time_all_clusters == 0:
return 0
final_rog = (1/time_all_clusters) * rog
return np.sqrt(final_rog)
def outlier_time_percent_new(locationData):
if locationData is None or len(locationData)==0:
return None
clustersDf = locationData[["location_label","timeInSeconds"]]
numoutliers = clustersDf[clustersDf["location_label"]== -1]["timeInSeconds"].sum()
numtotal = clustersDf.timeInSeconds.sum()
return numoutliers/numtotal
def location_entropy(locationData):
if locationData is None or len(locationData) == 0:
return None
clusters = locationData[locationData["location_label"] >= 1] # remove outliers/ cluster noise
if len(clusters) > 0:
# Get percentages for each location
percents = clusters.groupby(['location_label'])['timeInSeconds'].sum() / clusters['timeInSeconds'].sum()
entropy = -1 * percents.map(lambda x: x * np.log(x)).sum()
return entropy
else:
return None
def location_entropy_normalized(locationData):
if locationData is None or len(locationData) == 0:
return None
locationData = locationData[locationData["location_label"] >= 1] # remove outliers/ cluster noise
entropy = location_entropy(locationData)
unique_clusters = locationData["location_label"].unique()
num_clusters = len(unique_clusters)
if num_clusters == 0 or len(locationData) == 0 or entropy is None:
return None
elif np.log(num_clusters)==0:
return None
else:
return entropy / np.log(num_clusters)

View File

@ -0,0 +1,53 @@
timestamp,device_id,double_latitude,double_longitude,double_bearing,double_speed,double_altitude,provider,accuracy
1583596560000,android,-100.0,-100.0,0.0,0.0,100,gps,800
1583596620000,android,-100.0,-100.0,0.0,0.0,100,gps,800
1583596680000,android,-100.000001,-100.000001,0.0,0.0,100,gps,10
1583596740000,android,-100.000001,-100.0,0.0,0.0,100,gps,10
1583596800000,android,-100.000001,-100.0,0.0,0.0,100,gps,800
1583596860000,android,-100.0,-100.0,0.0,0.0,100,gps,10
1583596920000,android,-99.999999,-100.0,0.0,0.0,100,gps,800
1583596980000,android,-99.999999,-99.999999,0.0,0.0,100,gps,10
1583597040000,android,-99.999999,-99.999999,0.0,0.0,100,gps,10
1583652600000,android,1.0,1.0000120000000001,0.0,0.0,100,gps,10
1583652660000,android,1.0,1.0000120000000001,0.0,0.0,100,gps,10
1583652720000,android,1.000001,1.0,0.0,0.0,100,gps,10
1583652780000,android,1.0,1.0,0.0,0.0,100,gps,10
1583652840000,android,1.0,1.0,0.0,0.0,100,gps,10
1583652900000,android,0.999999,1.0,0.0,0.0,100,gps,10
1583652960000,android,1.0,0.9999879999999999,0.0,0.0,100,gps,10
1583653620000,android,1.0,0.9999899999999999,0.0,0.0,100,gps,800
1583681400000,android,100.0,100.0,0.0,0.0,100,gps,800
1583681460000,android,100.0,100.0,0.0,0.0,100,gps,800
1583681520000,android,100.000001,100.000001,0.0,0.0,100,gps,10
1583681580000,android,100.000001,100.0,0.0,0.0,100,gps,10
1583681640000,android,100.000001,100.0,0.0,0.0,100,gps,800
1583681700000,android,100.0,100.0,0.0,0.0,100,gps,10
1583681760000,android,99.999999,100.0,0.0,0.0,100,gps,800
1583681820000,android,99.999999,99.999999,0.0,0.0,100,gps,10
1583681880000,android,99.999999,99.999999,0.0,0.0,100,gps,10
1604156160000,android,-100.0,-100.0,0.0,0.0,100,gps,800
1604156220000,android,-100.0,-100.0,0.0,0.0,100,gps,800
1604156280000,android,-100.000001,-100.000001,0.0,0.0,100,gps,10
1604156340000,android,-100.000001,-100.0,0.0,0.0,100,gps,10
1604156400000,android,-100.000001,-100.0,0.0,0.0,100,gps,800
1604156460000,android,-100.0,-100.0,0.0,0.0,100,gps,10
1604156520000,android,-99.999999,-100.0,0.0,0.0,100,gps,800
1604156580000,android,-99.999999,-99.999999,0.0,0.0,100,gps,10
1604156640000,android,-99.999999,-99.999999,0.0,0.0,100,gps,10
1604219400000,android,1.0,1.0000120000000001,0.0,0.0,100,gps,10
1604219460000,android,1.0,1.0000120000000001,0.0,0.0,100,gps,10
1604219520000,android,1.000001,1.0,0.0,0.0,100,gps,10
1604219580000,android,1.0,1.0,0.0,0.0,100,gps,10
1604219640000,android,1.0,1.0,0.0,0.0,100,gps,10
1604219700000,android,0.999999,1.0,0.0,0.0,100,gps,10
1604219760000,android,1.0,0.9999879999999999,0.0,0.0,100,gps,10
1604220420000,android,1.0,0.9999899999999999,0.0,0.0,100,gps,800
1604248200000,android,100.0,100.0,0.0,0.0,100,gps,800
1604248260000,android,100.0,100.0,0.0,0.0,100,gps,800
1604248320000,android,100.000001,100.000001,0.0,0.0,100,gps,10
1604248380000,android,100.000001,100.0,0.0,0.0,100,gps,10
1604248440000,android,100.000001,100.0,0.0,0.0,100,gps,800
1604248500000,android,100.0,100.0,0.0,0.0,100,gps,10
1604248560000,android,99.999999,100.0,0.0,0.0,100,gps,800
1604248620000,android,99.999999,99.999999,0.0,0.0,100,gps,10
1604248680000,android,99.999999,99.999999,0.0,0.0,100,gps,10
1 timestamp device_id double_latitude double_longitude double_bearing double_speed double_altitude provider accuracy
2 1583596560000 android -100.0 -100.0 0.0 0.0 100 gps 800
3 1583596620000 android -100.0 -100.0 0.0 0.0 100 gps 800
4 1583596680000 android -100.000001 -100.000001 0.0 0.0 100 gps 10
5 1583596740000 android -100.000001 -100.0 0.0 0.0 100 gps 10
6 1583596800000 android -100.000001 -100.0 0.0 0.0 100 gps 800
7 1583596860000 android -100.0 -100.0 0.0 0.0 100 gps 10
8 1583596920000 android -99.999999 -100.0 0.0 0.0 100 gps 800
9 1583596980000 android -99.999999 -99.999999 0.0 0.0 100 gps 10
10 1583597040000 android -99.999999 -99.999999 0.0 0.0 100 gps 10
11 1583652600000 android 1.0 1.0000120000000001 0.0 0.0 100 gps 10
12 1583652660000 android 1.0 1.0000120000000001 0.0 0.0 100 gps 10
13 1583652720000 android 1.000001 1.0 0.0 0.0 100 gps 10
14 1583652780000 android 1.0 1.0 0.0 0.0 100 gps 10
15 1583652840000 android 1.0 1.0 0.0 0.0 100 gps 10
16 1583652900000 android 0.999999 1.0 0.0 0.0 100 gps 10
17 1583652960000 android 1.0 0.9999879999999999 0.0 0.0 100 gps 10
18 1583653620000 android 1.0 0.9999899999999999 0.0 0.0 100 gps 800
19 1583681400000 android 100.0 100.0 0.0 0.0 100 gps 800
20 1583681460000 android 100.0 100.0 0.0 0.0 100 gps 800
21 1583681520000 android 100.000001 100.000001 0.0 0.0 100 gps 10
22 1583681580000 android 100.000001 100.0 0.0 0.0 100 gps 10
23 1583681640000 android 100.000001 100.0 0.0 0.0 100 gps 800
24 1583681700000 android 100.0 100.0 0.0 0.0 100 gps 10
25 1583681760000 android 99.999999 100.0 0.0 0.0 100 gps 800
26 1583681820000 android 99.999999 99.999999 0.0 0.0 100 gps 10
27 1583681880000 android 99.999999 99.999999 0.0 0.0 100 gps 10
28 1604156160000 android -100.0 -100.0 0.0 0.0 100 gps 800
29 1604156220000 android -100.0 -100.0 0.0 0.0 100 gps 800
30 1604156280000 android -100.000001 -100.000001 0.0 0.0 100 gps 10
31 1604156340000 android -100.000001 -100.0 0.0 0.0 100 gps 10
32 1604156400000 android -100.000001 -100.0 0.0 0.0 100 gps 800
33 1604156460000 android -100.0 -100.0 0.0 0.0 100 gps 10
34 1604156520000 android -99.999999 -100.0 0.0 0.0 100 gps 800
35 1604156580000 android -99.999999 -99.999999 0.0 0.0 100 gps 10
36 1604156640000 android -99.999999 -99.999999 0.0 0.0 100 gps 10
37 1604219400000 android 1.0 1.0000120000000001 0.0 0.0 100 gps 10
38 1604219460000 android 1.0 1.0000120000000001 0.0 0.0 100 gps 10
39 1604219520000 android 1.000001 1.0 0.0 0.0 100 gps 10
40 1604219580000 android 1.0 1.0 0.0 0.0 100 gps 10
41 1604219640000 android 1.0 1.0 0.0 0.0 100 gps 10
42 1604219700000 android 0.999999 1.0 0.0 0.0 100 gps 10
43 1604219760000 android 1.0 0.9999879999999999 0.0 0.0 100 gps 10
44 1604220420000 android 1.0 0.9999899999999999 0.0 0.0 100 gps 800
45 1604248200000 android 100.0 100.0 0.0 0.0 100 gps 800
46 1604248260000 android 100.0 100.0 0.0 0.0 100 gps 800
47 1604248320000 android 100.000001 100.000001 0.0 0.0 100 gps 10
48 1604248380000 android 100.000001 100.0 0.0 0.0 100 gps 10
49 1604248440000 android 100.000001 100.0 0.0 0.0 100 gps 800
50 1604248500000 android 100.0 100.0 0.0 0.0 100 gps 10
51 1604248560000 android 99.999999 100.0 0.0 0.0 100 gps 800
52 1604248620000 android 99.999999 99.999999 0.0 0.0 100 gps 10
53 1604248680000 android 99.999999 99.999999 0.0 0.0 100 gps 10

View File

@ -0,0 +1,29 @@
test_time,device_id,double_latitude,double_longitude,double_bearing,double_speed,double_altitude,provider,accuracy
Sat 10:56:00.000,android,-100,-100,0.0,0.0,100,gps,800
Sat 10:57:00.000,android,-100,-100,0.0,0.0,100,gps,800
Sat 10:58:00.000,android,-100.000001,-100.000001,0.0,0.0,100,gps,10
Sat 10:59:00.000,android,-100.000001,-100,0.0,0.0,100,gps,10
Sat 11:00:00.000,android,-100.000001,-100,0.0,0.0,100,gps,800
Sat 11:01:00.000,android,-100,-100,0.0,0.0,100,gps,10
Sat 11:02:00.000,android,-99.999999,-100,0.0,0.0,100,gps,800
Sat 11:03:00.000,android,-99.999999,-99.999999,0.0,0.0,100,gps,10
Sat 11:04:00.000,android,-99.999999,-99.999999,0.0,0.0,100,gps,10
Sun 03:30:00.000,android,1,1.000012,0.0,0.0,100,gps,10
Sun 03:31:00.000,android,1,1.000012,0.0,0.0,100,gps,10
Sun 03:32:00.000,android,1.000001,1,0.0,0.0,100,gps,10
Sun 03:33:00.000,android,1,1,0.0,0.0,100,gps,10
Sun 03:34:00.000,android,1,1,0.0,0.0,100,gps,10
Sun 03:35:00.000,android,0.999999,1,0.0,0.0,100,gps,10
Sun 03:36:00.000,android,1,0.999988,0.0,0.0,100,gps,10
Sun 03:47:00.000,android,1,0.999990,0.0,0.0,100,gps,800
Sun 11:30:00.000,android,100,100,0.0,0.0,100,gps,800
Sun 11:31:00.000,android,100,100,0.0,0.0,100,gps,800
Sun 11:32:00.000,android,100.000001,100.000001,0.0,0.0,100,gps,10
Sun 11:33:00.000,android,100.000001,100,0.0,0.0,100,gps,10
Sun 11:34:00.000,android,100.000001,100,0.0,0.0,100,gps,800
Sun 11:35:00.000,android,100,100,0.0,0.0,100,gps,10
Sun 11:36:00.000,android,99.999999,100,0.0,0.0,100,gps,800
Sun 11:37:00.000,android,99.999999,99.999999,0.0,0.0,100,gps,10
Sun 11:38:00.000,android,99.999999,99.999999,0.0,0.0,100,gps,10
1 test_time device_id double_latitude double_longitude double_bearing double_speed double_altitude provider accuracy
2 Sat 10:56:00.000 android -100 -100 0.0 0.0 100 gps 800
3 Sat 10:57:00.000 android -100 -100 0.0 0.0 100 gps 800
4 Sat 10:58:00.000 android -100.000001 -100.000001 0.0 0.0 100 gps 10
5 Sat 10:59:00.000 android -100.000001 -100 0.0 0.0 100 gps 10
6 Sat 11:00:00.000 android -100.000001 -100 0.0 0.0 100 gps 800
7 Sat 11:01:00.000 android -100 -100 0.0 0.0 100 gps 10
8 Sat 11:02:00.000 android -99.999999 -100 0.0 0.0 100 gps 800
9 Sat 11:03:00.000 android -99.999999 -99.999999 0.0 0.0 100 gps 10
10 Sat 11:04:00.000 android -99.999999 -99.999999 0.0 0.0 100 gps 10
11 Sun 03:30:00.000 android 1 1.000012 0.0 0.0 100 gps 10
12 Sun 03:31:00.000 android 1 1.000012 0.0 0.0 100 gps 10
13 Sun 03:32:00.000 android 1.000001 1 0.0 0.0 100 gps 10
14 Sun 03:33:00.000 android 1 1 0.0 0.0 100 gps 10
15 Sun 03:34:00.000 android 1 1 0.0 0.0 100 gps 10
16 Sun 03:35:00.000 android 0.999999 1 0.0 0.0 100 gps 10
17 Sun 03:36:00.000 android 1 0.999988 0.0 0.0 100 gps 10
18 Sun 03:47:00.000 android 1 0.999990 0.0 0.0 100 gps 800
19 Sun 11:30:00.000 android 100 100 0.0 0.0 100 gps 800
20 Sun 11:31:00.000 android 100 100 0.0 0.0 100 gps 800
21 Sun 11:32:00.000 android 100.000001 100.000001 0.0 0.0 100 gps 10
22 Sun 11:33:00.000 android 100.000001 100 0.0 0.0 100 gps 10
23 Sun 11:34:00.000 android 100.000001 100 0.0 0.0 100 gps 800
24 Sun 11:35:00.000 android 100 100 0.0 0.0 100 gps 10
25 Sun 11:36:00.000 android 99.999999 100 0.0 0.0 100 gps 800
26 Sun 11:37:00.000 android 99.999999 99.999999 0.0 0.0 100 gps 10
27 Sun 11:38:00.000 android 99.999999 99.999999 0.0 0.0 100 gps 10

View File

@ -0,0 +1,3 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_loglocationvariance","phone_locations_doryab_totaldistance","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_varspeed","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_timeattop2location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeattop1location","phone_locations_doryab_timeathome","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_avgspeed","phone_locations_doryab_locationvariance"
"beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00","beforeMarchEvent","2020-03-07 16:00:00","2020-03-08 15:00:00",1,3.71826910068082,NA,6,NA,0.346573590279973,0,0.693147180559945,6,6,2,6268829.80206745,6,6,1,0,6,NA,NA,5227.19980200003
"beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00","beforeNovemberEvent","2020-10-31 16:00:00","2020-11-01 13:00:00",1,3.71826910068082,NA,6,NA,0.346573590279973,0,0.693147180559945,6,6,2,6268829.80206745,6,6,1,0,6,NA,NA,5227.19980200003
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_numberlocationtransitions phone_locations_doryab_loglocationvariance phone_locations_doryab_totaldistance phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_varspeed phone_locations_doryab_normalizedlocationentropy phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_timeattop2location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeattop1location phone_locations_doryab_timeathome phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_avgspeed phone_locations_doryab_locationvariance
2 beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00 beforeMarchEvent 2020-03-07 16:00:00 2020-03-08 15:00:00 1 3.71826910068082 NA 6 NA 0.346573590279973 0 0.693147180559945 6 6 2 6268829.80206745 6 6 1 0 6 NA NA 5227.19980200003
3 beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00 beforeNovemberEvent 2020-10-31 16:00:00 2020-11-01 13:00:00 1 3.71826910068082 NA 6 NA 0.346573590279973 0 0.693147180559945 6 6 2 6268829.80206745 6 6 1 0 6 NA NA 5227.19980200003

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1,8 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop3location","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_timeattop1location","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_loglocationvariance","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_timeathome","phone_locations_doryab_locationentropy","phone_locations_doryab_radiusgyration","phone_locations_doryab_totaldistance","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_timeattop2location","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_avgspeed","phone_locations_doryab_varspeed","phone_locations_doryab_minlengthstayatclusters"
"thirtyminutes0017#2020-03-08 08:30:00,2020-03-08 08:59:59","thirtyminutes0017","2020-03-08 08:30:00","2020-03-08 08:59:59",1.58333332533827e-12,0,6,0,0,6,-11.8004276472878,1,NA,0,0,NA,NA,6,0,1,0,NA,NA,6
"thirtyminutes0017#2020-11-01 08:30:00,2020-11-01 08:59:59","thirtyminutes0017","2020-11-01 08:30:00","2020-11-01 08:59:59",1.58333332533827e-12,0,6,0,0,6,-11.8004276472878,1,NA,0,0,NA,NA,6,0,1,0,NA,NA,6
"thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59","thirtyminutes0021","2020-03-07 10:30:00","2020-03-07 10:59:59",4.99999997475243e-13,3,3,0,0,3,-12.301029997857,1,NA,0,0,NA,NA,0,0,1,0,NA,NA,3
"thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59","thirtyminutes0021","2020-10-31 10:30:00","2020-10-31 10:59:59",4.99999997475243e-13,3,3,0,0,3,-12.301029997857,1,NA,0,0,NA,NA,0,0,1,0,NA,NA,3
"thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59","thirtyminutes0022","2020-03-07 11:00:00","2020-03-07 11:29:59",9.99999994950485e-13,3,3,0,0,3,-12.000000002193,1,NA,0,0,NA,NA,0,0,1,0,NA,NA,3
"thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59","thirtyminutes0022","2020-10-31 11:00:00","2020-10-31 11:29:59",9.99999994950485e-13,3,3,0,0,3,-12.000000002193,1,NA,0,0,NA,NA,0,0,1,0,NA,NA,3
"thirtyminutes0047#2020-03-07 23:30:00,2020-03-07 23:59:59","thirtyminutes0047","2020-03-07 23:30:00","2020-03-07 23:59:59",3.8799999999484e-11,0,6,0,6,6,-10.4111682744116,1,6,0,0,NA,NA,0,0,1,0,NA,NA,6
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_locationvariance phone_locations_doryab_timeattop3location phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_timeattop1location phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_loglocationvariance phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_timeathome phone_locations_doryab_locationentropy phone_locations_doryab_radiusgyration phone_locations_doryab_totaldistance phone_locations_doryab_outlierstimepercent phone_locations_doryab_timeattop2location phone_locations_doryab_normalizedlocationentropy phone_locations_doryab_movingtostaticratio phone_locations_doryab_numberlocationtransitions phone_locations_doryab_avgspeed phone_locations_doryab_varspeed phone_locations_doryab_minlengthstayatclusters
2 thirtyminutes0017#2020-03-08 08:30:00,2020-03-08 08:59:59 thirtyminutes0017 2020-03-08 08:30:00 2020-03-08 08:59:59 1.58333332533827e-12 0 6 0 0 6 -11.8004276472878 1 NA 0 0 NA NA 6 0 1 0 NA NA 6
3 thirtyminutes0017#2020-11-01 08:30:00,2020-11-01 08:59:59 thirtyminutes0017 2020-11-01 08:30:00 2020-11-01 08:59:59 1.58333332533827e-12 0 6 0 0 6 -11.8004276472878 1 NA 0 0 NA NA 6 0 1 0 NA NA 6
4 thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59 thirtyminutes0021 2020-03-07 10:30:00 2020-03-07 10:59:59 4.99999997475243e-13 3 3 0 0 3 -12.301029997857 1 NA 0 0 NA NA 0 0 1 0 NA NA 3
5 thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59 thirtyminutes0021 2020-10-31 10:30:00 2020-10-31 10:59:59 4.99999997475243e-13 3 3 0 0 3 -12.301029997857 1 NA 0 0 NA NA 0 0 1 0 NA NA 3
6 thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59 thirtyminutes0022 2020-03-07 11:00:00 2020-03-07 11:29:59 9.99999994950485e-13 3 3 0 0 3 -12.000000002193 1 NA 0 0 NA NA 0 0 1 0 NA NA 3
7 thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59 thirtyminutes0022 2020-10-31 11:00:00 2020-10-31 11:29:59 9.99999994950485e-13 3 3 0 0 3 -12.000000002193 1 NA 0 0 NA NA 0 0 1 0 NA NA 3
8 thirtyminutes0047#2020-03-07 23:30:00,2020-03-07 23:59:59 thirtyminutes0047 2020-03-07 23:30:00 2020-03-07 23:59:59 3.8799999999484e-11 0 6 0 6 6 -10.4111682744116 1 6 0 0 NA NA 0 0 1 0 NA NA 6

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1,16 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_timeattop1location","phone_locations_doryab_totaldistance","phone_locations_doryab_timeattop3location","phone_locations_doryab_avgspeed","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_radiusgyration","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_locationvariance"
"daily#2020-03-07 00:00:00,2020-03-07 23:59:59","daily","2020-03-07 00:00:00","2020-03-07 23:59:59",6,0.693147180559945,NA,6,NA,6,NA,1,6416036.08057409,1,0,NA,3.73564149377632,0,6,2,6,6,0.346573590279973,5440.53356226669
"daily#2020-03-08 00:00:00,2020-03-08 23:59:59","daily","2020-03-08 00:00:00","2020-03-08 23:59:59",6,0,NA,0,NA,0,NA,0,0,1,6,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
"daily#2020-10-31 00:00:00,2020-10-31 23:59:59","daily","2020-10-31 00:00:00","2020-10-31 23:59:59",6,0,NA,0,NA,6,NA,0,0,1,0,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
"daily#2020-11-01 00:00:00,2020-11-01 23:59:59","daily","2020-11-01 00:00:00","2020-11-01 23:59:59",6,0.693147180559945,NA,6,NA,0,NA,1,6268829.80206745,1,6,NA,3.71826910068082,0,6,2,6,6,0.346573590279973,5227.19980200003
"morning#2020-03-07 06:00:00,2020-03-07 11:59:59","morning","2020-03-07 06:00:00","2020-03-07 11:59:59",6,0,NA,0,NA,6,NA,0,0,1,0,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
"morning#2020-03-08 06:00:00,2020-03-08 11:59:59","morning","2020-03-08 06:00:00","2020-03-08 11:59:59",6,0,NA,0,NA,0,NA,0,0,1,6,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
"morning#2020-10-31 06:00:00,2020-10-31 11:59:59","morning","2020-10-31 06:00:00","2020-10-31 11:59:59",6,0,NA,0,NA,6,NA,0,0,1,0,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
"morning#2020-11-01 06:00:00,2020-11-01 11:59:59","morning","2020-11-01 06:00:00","2020-11-01 11:59:59",6,0,NA,0,NA,0,NA,0,0,1,6,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
"threeday#2020-03-07 00:00:00,2020-03-09 23:59:59","threeday","2020-03-07 00:00:00","2020-03-09 23:59:59",6,1.09861228866811,NA,6,NA,6,NA,2,8014514.68387131,1,6,NA,4.09019524373105,0,6,3,6,6,0.366204096222703,12308.2198130989
"threeday#2020-03-08 00:00:00,2020-03-10 23:59:59","threeday","2020-03-08 00:00:00","2020-03-10 23:59:59",6,0,NA,0,NA,0,NA,0,0,1,6,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
"threeday#2020-10-29 00:00:00,2020-10-31 23:59:59","threeday","2020-10-29 00:00:00","2020-10-31 23:59:59",6,0,NA,0,NA,6,NA,0,0,1,0,NA,-11.8004276472878,0,NA,1,6,6,0,1.58333332533827e-12
"threeday#2020-10-30 00:00:00,2020-11-01 23:59:59","threeday","2020-10-30 00:00:00","2020-11-01 23:59:59",6,1.09861228866811,NA,6,NA,6,NA,2,8014514.68387131,1,6,NA,4.09019524373105,0,6,3,6,6,0.366204096222703,12308.2198130989
"threeday#2020-10-31 00:00:00,2020-11-02 23:59:59","threeday","2020-10-31 00:00:00","2020-11-02 23:59:59",6,1.09861228866811,NA,6,NA,6,NA,2,8014514.68387131,1,6,NA,4.09019524373105,0,6,3,6,6,0.366204096222703,12308.2198130989
"threeday#2020-11-01 00:00:00,2020-11-03 23:59:59","threeday","2020-11-01 00:00:00","2020-11-03 23:59:59",6,0.693147180559945,NA,6,NA,0,NA,1,6268829.80206745,1,6,NA,3.71826910068082,0,6,2,6,6,0.346573590279973,5227.19980200003
"weekend#2020-10-30 00:00:00,2020-11-01 23:59:59","weekend","2020-10-30 00:00:00","2020-11-01 23:59:59",6,1.09861228866811,NA,6,NA,6,NA,2,8014514.68387131,1,6,NA,4.09019524373105,0,6,3,6,6,0.366204096222703,12308.2198130989
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_outlierstimepercent phone_locations_doryab_timeattop1location phone_locations_doryab_totaldistance phone_locations_doryab_timeattop3location phone_locations_doryab_avgspeed phone_locations_doryab_numberlocationtransitions phone_locations_doryab_radiusgyration phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_timeathome phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_normalizedlocationentropy phone_locations_doryab_locationvariance
2 daily#2020-03-07 00:00:00,2020-03-07 23:59:59 daily 2020-03-07 00:00:00 2020-03-07 23:59:59 6 0.693147180559945 NA 6 NA 6 NA 1 6416036.08057409 1 0 NA 3.73564149377632 0 6 2 6 6 0.346573590279973 5440.53356226669
3 daily#2020-03-08 00:00:00,2020-03-08 23:59:59 daily 2020-03-08 00:00:00 2020-03-08 23:59:59 6 0 NA 0 NA 0 NA 0 0 1 6 NA -11.8004276472878 0 NA 1 6 6 0 1.58333332533827e-12
4 daily#2020-10-31 00:00:00,2020-10-31 23:59:59 daily 2020-10-31 00:00:00 2020-10-31 23:59:59 6 0 NA 0 NA 6 NA 0 0 1 0 NA -11.8004276472878 0 NA 1 6 6 0 1.58333332533827e-12
5 daily#2020-11-01 00:00:00,2020-11-01 23:59:59 daily 2020-11-01 00:00:00 2020-11-01 23:59:59 6 0.693147180559945 NA 6 NA 0 NA 1 6268829.80206745 1 6 NA 3.71826910068082 0 6 2 6 6 0.346573590279973 5227.19980200003
6 morning#2020-03-07 06:00:00,2020-03-07 11:59:59 morning 2020-03-07 06:00:00 2020-03-07 11:59:59 6 0 NA 0 NA 6 NA 0 0 1 0 NA -11.8004276472878 0 NA 1 6 6 0 1.58333332533827e-12
7 morning#2020-03-08 06:00:00,2020-03-08 11:59:59 morning 2020-03-08 06:00:00 2020-03-08 11:59:59 6 0 NA 0 NA 0 NA 0 0 1 6 NA -11.8004276472878 0 NA 1 6 6 0 1.58333332533827e-12
8 morning#2020-10-31 06:00:00,2020-10-31 11:59:59 morning 2020-10-31 06:00:00 2020-10-31 11:59:59 6 0 NA 0 NA 6 NA 0 0 1 0 NA -11.8004276472878 0 NA 1 6 6 0 1.58333332533827e-12
9 morning#2020-11-01 06:00:00,2020-11-01 11:59:59 morning 2020-11-01 06:00:00 2020-11-01 11:59:59 6 0 NA 0 NA 0 NA 0 0 1 6 NA -11.8004276472878 0 NA 1 6 6 0 1.58333332533827e-12
10 threeday#2020-03-07 00:00:00,2020-03-09 23:59:59 threeday 2020-03-07 00:00:00 2020-03-09 23:59:59 6 1.09861228866811 NA 6 NA 6 NA 2 8014514.68387131 1 6 NA 4.09019524373105 0 6 3 6 6 0.366204096222703 12308.2198130989
11 threeday#2020-03-08 00:00:00,2020-03-10 23:59:59 threeday 2020-03-08 00:00:00 2020-03-10 23:59:59 6 0 NA 0 NA 0 NA 0 0 1 6 NA -11.8004276472878 0 NA 1 6 6 0 1.58333332533827e-12
12 threeday#2020-10-29 00:00:00,2020-10-31 23:59:59 threeday 2020-10-29 00:00:00 2020-10-31 23:59:59 6 0 NA 0 NA 6 NA 0 0 1 0 NA -11.8004276472878 0 NA 1 6 6 0 1.58333332533827e-12
13 threeday#2020-10-30 00:00:00,2020-11-01 23:59:59 threeday 2020-10-30 00:00:00 2020-11-01 23:59:59 6 1.09861228866811 NA 6 NA 6 NA 2 8014514.68387131 1 6 NA 4.09019524373105 0 6 3 6 6 0.366204096222703 12308.2198130989
14 threeday#2020-10-31 00:00:00,2020-11-02 23:59:59 threeday 2020-10-31 00:00:00 2020-11-02 23:59:59 6 1.09861228866811 NA 6 NA 6 NA 2 8014514.68387131 1 6 NA 4.09019524373105 0 6 3 6 6 0.366204096222703 12308.2198130989
15 threeday#2020-11-01 00:00:00,2020-11-03 23:59:59 threeday 2020-11-01 00:00:00 2020-11-03 23:59:59 6 0.693147180559945 NA 6 NA 0 NA 1 6268829.80206745 1 6 NA 3.71826910068082 0 6 2 6 6 0.346573590279973 5227.19980200003
16 weekend#2020-10-30 00:00:00,2020-11-01 23:59:59 weekend 2020-10-30 00:00:00 2020-11-01 23:59:59 6 1.09861228866811 NA 6 NA 6 NA 2 8014514.68387131 1 6 NA 4.09019524373105 0 6 3 6 6 0.366204096222703 12308.2198130989

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1,3 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_loglocationvariance","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_locationvariance","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop2location","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_timeattop3location","phone_locations_doryab_locationentropy","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeattop1location","phone_locations_doryab_totaldistance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_timeathome"
"beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00","beforeMarchEvent","2020-03-07 16:00:00","2020-03-08 15:00:00",3.71826910068082,2,1,6,NA,NA,6,5227.19980200003,NA,6,0.346573590279973,1,0,0.693147180559945,6,6268829.80206745,6,NA,0,6
"beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00","beforeNovemberEvent","2020-10-31 16:00:00","2020-11-01 13:00:00",3.71826910068082,2,1,6,NA,NA,6,5227.19980200003,NA,6,0.346573590279973,1,0,0.693147180559945,6,6268829.80206745,6,NA,0,6
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_loglocationvariance phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_movingtostaticratio phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_outlierstimepercent phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_locationvariance phone_locations_doryab_varspeed phone_locations_doryab_timeattop2location phone_locations_doryab_normalizedlocationentropy phone_locations_doryab_numberlocationtransitions phone_locations_doryab_timeattop3location phone_locations_doryab_locationentropy phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_radiusgyration phone_locations_doryab_timeattop1location phone_locations_doryab_totaldistance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_timeathome
2 beforeMarchEvent#2020-03-07 16:00:00,2020-03-08 15:00:00 beforeMarchEvent 2020-03-07 16:00:00 2020-03-08 15:00:00 3.71826910068082 2 1 6 NA NA 6 5227.19980200003 NA 6 0.346573590279973 1 0 0.693147180559945 6 6268829.80206745 6 NA 0 6
3 beforeNovemberEvent#2020-10-31 16:00:00,2020-11-01 13:00:00 beforeNovemberEvent 2020-10-31 16:00:00 2020-11-01 13:00:00 3.71826910068082 2 1 6 NA NA 6 5227.19980200003 NA 6 0.346573590279973 1 0 0.693147180559945 6 6268829.80206745 6 NA 0 6

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1,9 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_timeattop1location","phone_locations_doryab_varspeed","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_locationvariance","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_loglocationvariance","phone_locations_doryab_totaldistance","phone_locations_doryab_radiusgyration","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_timeattop2location","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_timeattop3location"
"thirtyminutes0007#2020-03-08 03:30:00,2020-03-08 03:59:59","thirtyminutes0007","2020-03-08 03:30:00","2020-03-08 03:59:59",6,NA,6,0,3.8799999999484e-11,NA,6,6,1,1,-10.4111682744116,NA,0,0,0,0,0,6,NA,0
"thirtyminutes0007#2020-11-01 03:30:00,2020-11-01 03:59:59","thirtyminutes0007","2020-11-01 03:30:00","2020-11-01 03:59:59",6,NA,6,0,3.8799999999484e-11,NA,6,6,1,1,-10.4111682744116,NA,0,0,0,0,0,6,NA,0
"thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59","thirtyminutes0021","2020-03-07 10:30:00","2020-03-07 10:59:59",0,NA,3,0,4.99999997475243e-13,NA,3,NA,1,1,-12.301029997857,NA,0,0,0,0,0,3,NA,3
"thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59","thirtyminutes0021","2020-10-31 10:30:00","2020-10-31 10:59:59",0,NA,3,0,4.99999997475243e-13,NA,3,NA,1,1,-12.301029997857,NA,0,0,0,0,0,3,NA,3
"thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59","thirtyminutes0022","2020-03-07 11:00:00","2020-03-07 11:29:59",0,NA,3,0,9.99999994950485e-13,NA,3,NA,1,1,-12.000000002193,NA,0,0,0,0,0,3,NA,3
"thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59","thirtyminutes0022","2020-10-31 11:00:00","2020-10-31 11:29:59",0,NA,3,0,9.99999994950485e-13,NA,3,NA,1,1,-12.000000002193,NA,0,0,0,0,0,3,NA,3
"thirtyminutes0023#2020-03-08 11:30:00,2020-03-08 11:59:59","thirtyminutes0023","2020-03-08 11:30:00","2020-03-08 11:59:59",0,NA,6,0,1.58333332533827e-12,NA,6,NA,1,1,-11.8004276472878,NA,0,0,0,0,6,6,NA,0
"thirtyminutes0023#2020-11-01 11:30:00,2020-11-01 11:59:59","thirtyminutes0023","2020-11-01 11:30:00","2020-11-01 11:59:59",0,NA,6,0,1.58333332533827e-12,NA,6,NA,1,1,-11.8004276472878,NA,0,0,0,0,6,6,NA,0
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_timeattop1location phone_locations_doryab_varspeed phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_numberlocationtransitions phone_locations_doryab_locationvariance phone_locations_doryab_outlierstimepercent phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_timeathome phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_movingtostaticratio phone_locations_doryab_loglocationvariance phone_locations_doryab_totaldistance phone_locations_doryab_radiusgyration phone_locations_doryab_normalizedlocationentropy phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_timeattop2location phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_timeattop3location
2 thirtyminutes0007#2020-03-08 03:30:00,2020-03-08 03:59:59 thirtyminutes0007 2020-03-08 03:30:00 2020-03-08 03:59:59 6 NA 6 0 3.8799999999484e-11 NA 6 6 1 1 -10.4111682744116 NA 0 0 0 0 0 6 NA 0
3 thirtyminutes0007#2020-11-01 03:30:00,2020-11-01 03:59:59 thirtyminutes0007 2020-11-01 03:30:00 2020-11-01 03:59:59 6 NA 6 0 3.8799999999484e-11 NA 6 6 1 1 -10.4111682744116 NA 0 0 0 0 0 6 NA 0
4 thirtyminutes0021#2020-03-07 10:30:00,2020-03-07 10:59:59 thirtyminutes0021 2020-03-07 10:30:00 2020-03-07 10:59:59 0 NA 3 0 4.99999997475243e-13 NA 3 NA 1 1 -12.301029997857 NA 0 0 0 0 0 3 NA 3
5 thirtyminutes0021#2020-10-31 10:30:00,2020-10-31 10:59:59 thirtyminutes0021 2020-10-31 10:30:00 2020-10-31 10:59:59 0 NA 3 0 4.99999997475243e-13 NA 3 NA 1 1 -12.301029997857 NA 0 0 0 0 0 3 NA 3
6 thirtyminutes0022#2020-03-07 11:00:00,2020-03-07 11:29:59 thirtyminutes0022 2020-03-07 11:00:00 2020-03-07 11:29:59 0 NA 3 0 9.99999994950485e-13 NA 3 NA 1 1 -12.000000002193 NA 0 0 0 0 0 3 NA 3
7 thirtyminutes0022#2020-10-31 11:00:00,2020-10-31 11:29:59 thirtyminutes0022 2020-10-31 11:00:00 2020-10-31 11:29:59 0 NA 3 0 9.99999994950485e-13 NA 3 NA 1 1 -12.000000002193 NA 0 0 0 0 0 3 NA 3
8 thirtyminutes0023#2020-03-08 11:30:00,2020-03-08 11:59:59 thirtyminutes0023 2020-03-08 11:30:00 2020-03-08 11:59:59 0 NA 6 0 1.58333332533827e-12 NA 6 NA 1 1 -11.8004276472878 NA 0 0 0 0 6 6 NA 0
9 thirtyminutes0023#2020-11-01 11:30:00,2020-11-01 11:59:59 thirtyminutes0023 2020-11-01 11:30:00 2020-11-01 11:59:59 0 NA 6 0 1.58333332533827e-12 NA 6 NA 1 1 -11.8004276472878 NA 0 0 0 0 6 6 NA 0

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1,16 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_timeathome","phone_locations_doryab_radiusgyration","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_locationvariance","phone_locations_doryab_normalizedlocationentropy","phone_locations_doryab_timeattop2location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_totaldistance","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop3location","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_locationentropy","phone_locations_doryab_loglocationvariance","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_timeattop1location","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_avgspeed"
"daily#2020-03-07 00:00:00,2020-03-07 23:59:59","daily","2020-03-07 00:00:00","2020-03-07 23:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA
"daily#2020-03-08 00:00:00,2020-03-08 23:59:59","daily","2020-03-08 00:00:00","2020-03-08 23:59:59",0,6,6268829.80206745,NA,5227.19980200003,0.346573590279973,6,1,NA,NA,0,6,1,2,0.693147180559945,3.71826910068082,6,6,6,NA
"daily#2020-10-31 00:00:00,2020-10-31 23:59:59","daily","2020-10-31 00:00:00","2020-10-31 23:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA
"daily#2020-11-01 00:00:00,2020-11-01 23:59:59","daily","2020-11-01 00:00:00","2020-11-01 23:59:59",0,6,6268829.80206745,NA,5227.19980200003,0.346573590279973,6,1,NA,NA,0,6,1,2,0.693147180559945,3.71826910068082,6,6,6,NA
"morning#2020-03-07 06:00:00,2020-03-07 11:59:59","morning","2020-03-07 06:00:00","2020-03-07 11:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA
"morning#2020-03-08 06:00:00,2020-03-08 11:59:59","morning","2020-03-08 06:00:00","2020-03-08 11:59:59",0,NA,0,NA,1.58333332533827e-12,0,6,0,NA,NA,0,6,1,1,0,-11.8004276472878,6,0,6,NA
"morning#2020-10-31 06:00:00,2020-10-31 11:59:59","morning","2020-10-31 06:00:00","2020-10-31 11:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA
"morning#2020-11-01 06:00:00,2020-11-01 11:59:59","morning","2020-11-01 06:00:00","2020-11-01 11:59:59",0,NA,0,NA,1.58333332533827e-12,0,6,0,NA,NA,0,6,1,1,0,-11.8004276472878,6,0,6,NA
"threeday#2020-03-07 00:00:00,2020-03-09 23:59:59","threeday","2020-03-07 00:00:00","2020-03-09 23:59:59",0,6,8014514.68387131,NA,12308.2198130989,0.366204096222703,6,2,NA,NA,6,6,1,3,1.09861228866811,4.09019524373105,6,6,6,NA
"threeday#2020-03-08 00:00:00,2020-03-10 23:59:59","threeday","2020-03-08 00:00:00","2020-03-10 23:59:59",0,6,6268829.80206745,NA,5227.19980200003,0.346573590279973,6,1,NA,NA,0,6,1,2,0.693147180559945,3.71826910068082,6,6,6,NA
"threeday#2020-10-29 00:00:00,2020-10-31 23:59:59","threeday","2020-10-29 00:00:00","2020-10-31 23:59:59",0,NA,0,NA,1.58333332533827e-12,0,0,0,NA,NA,6,6,1,1,0,-11.8004276472878,6,0,6,NA
"threeday#2020-10-30 00:00:00,2020-11-01 23:59:59","threeday","2020-10-30 00:00:00","2020-11-01 23:59:59",0,6,8014514.68387131,NA,12308.2198130989,0.366204096222703,6,2,NA,NA,6,6,1,3,1.09861228866811,4.09019524373105,6,6,6,NA
"threeday#2020-10-31 00:00:00,2020-11-02 23:59:59","threeday","2020-10-31 00:00:00","2020-11-02 23:59:59",0,6,8014514.68387131,NA,12308.2198130989,0.366204096222703,6,2,NA,NA,6,6,1,3,1.09861228866811,4.09019524373105,6,6,6,NA
"threeday#2020-11-01 00:00:00,2020-11-03 23:59:59","threeday","2020-11-01 00:00:00","2020-11-03 23:59:59",0,6,6268829.80206745,NA,5227.19980200003,0.346573590279973,6,1,NA,NA,0,6,1,2,0.693147180559945,3.71826910068082,6,6,6,NA
"weekend#2020-10-30 00:00:00,2020-11-01 23:59:59","weekend","2020-10-30 00:00:00","2020-11-01 23:59:59",0,6,8014514.68387131,NA,12308.2198130989,0.366204096222703,6,2,NA,NA,6,6,1,3,1.09861228866811,4.09019524373105,6,6,6,NA
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_timeathome phone_locations_doryab_radiusgyration phone_locations_doryab_outlierstimepercent phone_locations_doryab_locationvariance phone_locations_doryab_normalizedlocationentropy phone_locations_doryab_timeattop2location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_totaldistance phone_locations_doryab_varspeed phone_locations_doryab_timeattop3location phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_locationentropy phone_locations_doryab_loglocationvariance phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_timeattop1location phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_avgspeed
2 daily#2020-03-07 00:00:00,2020-03-07 23:59:59 daily 2020-03-07 00:00:00 2020-03-07 23:59:59 0 NA 0 NA 1.58333332533827e-12 0 0 0 NA NA 6 6 1 1 0 -11.8004276472878 6 0 6 NA
3 daily#2020-03-08 00:00:00,2020-03-08 23:59:59 daily 2020-03-08 00:00:00 2020-03-08 23:59:59 0 6 6268829.80206745 NA 5227.19980200003 0.346573590279973 6 1 NA NA 0 6 1 2 0.693147180559945 3.71826910068082 6 6 6 NA
4 daily#2020-10-31 00:00:00,2020-10-31 23:59:59 daily 2020-10-31 00:00:00 2020-10-31 23:59:59 0 NA 0 NA 1.58333332533827e-12 0 0 0 NA NA 6 6 1 1 0 -11.8004276472878 6 0 6 NA
5 daily#2020-11-01 00:00:00,2020-11-01 23:59:59 daily 2020-11-01 00:00:00 2020-11-01 23:59:59 0 6 6268829.80206745 NA 5227.19980200003 0.346573590279973 6 1 NA NA 0 6 1 2 0.693147180559945 3.71826910068082 6 6 6 NA
6 morning#2020-03-07 06:00:00,2020-03-07 11:59:59 morning 2020-03-07 06:00:00 2020-03-07 11:59:59 0 NA 0 NA 1.58333332533827e-12 0 0 0 NA NA 6 6 1 1 0 -11.8004276472878 6 0 6 NA
7 morning#2020-03-08 06:00:00,2020-03-08 11:59:59 morning 2020-03-08 06:00:00 2020-03-08 11:59:59 0 NA 0 NA 1.58333332533827e-12 0 6 0 NA NA 0 6 1 1 0 -11.8004276472878 6 0 6 NA
8 morning#2020-10-31 06:00:00,2020-10-31 11:59:59 morning 2020-10-31 06:00:00 2020-10-31 11:59:59 0 NA 0 NA 1.58333332533827e-12 0 0 0 NA NA 6 6 1 1 0 -11.8004276472878 6 0 6 NA
9 morning#2020-11-01 06:00:00,2020-11-01 11:59:59 morning 2020-11-01 06:00:00 2020-11-01 11:59:59 0 NA 0 NA 1.58333332533827e-12 0 6 0 NA NA 0 6 1 1 0 -11.8004276472878 6 0 6 NA
10 threeday#2020-03-07 00:00:00,2020-03-09 23:59:59 threeday 2020-03-07 00:00:00 2020-03-09 23:59:59 0 6 8014514.68387131 NA 12308.2198130989 0.366204096222703 6 2 NA NA 6 6 1 3 1.09861228866811 4.09019524373105 6 6 6 NA
11 threeday#2020-03-08 00:00:00,2020-03-10 23:59:59 threeday 2020-03-08 00:00:00 2020-03-10 23:59:59 0 6 6268829.80206745 NA 5227.19980200003 0.346573590279973 6 1 NA NA 0 6 1 2 0.693147180559945 3.71826910068082 6 6 6 NA
12 threeday#2020-10-29 00:00:00,2020-10-31 23:59:59 threeday 2020-10-29 00:00:00 2020-10-31 23:59:59 0 NA 0 NA 1.58333332533827e-12 0 0 0 NA NA 6 6 1 1 0 -11.8004276472878 6 0 6 NA
13 threeday#2020-10-30 00:00:00,2020-11-01 23:59:59 threeday 2020-10-30 00:00:00 2020-11-01 23:59:59 0 6 8014514.68387131 NA 12308.2198130989 0.366204096222703 6 2 NA NA 6 6 1 3 1.09861228866811 4.09019524373105 6 6 6 NA
14 threeday#2020-10-31 00:00:00,2020-11-02 23:59:59 threeday 2020-10-31 00:00:00 2020-11-02 23:59:59 0 6 8014514.68387131 NA 12308.2198130989 0.366204096222703 6 2 NA NA 6 6 1 3 1.09861228866811 4.09019524373105 6 6 6 NA
15 threeday#2020-11-01 00:00:00,2020-11-03 23:59:59 threeday 2020-11-01 00:00:00 2020-11-03 23:59:59 0 6 6268829.80206745 NA 5227.19980200003 0.346573590279973 6 1 NA NA 0 6 1 2 0.693147180559945 3.71826910068082 6 6 6 NA
16 weekend#2020-10-30 00:00:00,2020-11-01 23:59:59 weekend 2020-10-30 00:00:00 2020-11-01 23:59:59 0 6 8014514.68387131 NA 12308.2198130989 0.366204096222703 6 2 NA NA 6 6 1 3 1.09861228866811 4.09019524373105 6 6 6 NA

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -0,0 +1 @@
"local_segment","local_segment_label","local_segment_start_datetime","local_segment_end_datetime","phone_locations_doryab_avglengthstayatclusters","phone_locations_doryab_outlierstimepercent","phone_locations_doryab_maxlengthstayatclusters","phone_locations_doryab_locationentropy","phone_locations_doryab_locationvariance","phone_locations_doryab_timeattop2location","phone_locations_doryab_varspeed","phone_locations_doryab_timeattop1location","phone_locations_doryab_numberlocationtransitions","phone_locations_doryab_minlengthstayatclusters","phone_locations_doryab_movingtostaticratio","phone_locations_doryab_timeattop3location","phone_locations_doryab_numberofsignificantplaces","phone_locations_doryab_radiusgyration","phone_locations_doryab_timeathome","phone_locations_doryab_totaldistance","phone_locations_doryab_loglocationvariance","phone_locations_doryab_stdlengthstayatclusters","phone_locations_doryab_avgspeed","phone_locations_doryab_normalizedlocationentropy"
1 local_segment local_segment_label local_segment_start_datetime local_segment_end_datetime phone_locations_doryab_avglengthstayatclusters phone_locations_doryab_outlierstimepercent phone_locations_doryab_maxlengthstayatclusters phone_locations_doryab_locationentropy phone_locations_doryab_locationvariance phone_locations_doryab_timeattop2location phone_locations_doryab_varspeed phone_locations_doryab_timeattop1location phone_locations_doryab_numberlocationtransitions phone_locations_doryab_minlengthstayatclusters phone_locations_doryab_movingtostaticratio phone_locations_doryab_timeattop3location phone_locations_doryab_numberofsignificantplaces phone_locations_doryab_radiusgyration phone_locations_doryab_timeathome phone_locations_doryab_totaldistance phone_locations_doryab_loglocationvariance phone_locations_doryab_stdlengthstayatclusters phone_locations_doryab_avgspeed phone_locations_doryab_normalizedlocationentropy

View File

@ -203,7 +203,7 @@ PHONE_CONVERSATION:
# See https://www.rapids.science/latest/features/phone-data-yield/ # See https://www.rapids.science/latest/features/phone-data-yield/
PHONE_DATA_YIELD: PHONE_DATA_YIELD:
SENSORS: [] SENSORS: [PHONE_LOCATIONS]
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
@ -231,28 +231,24 @@ PHONE_LIGHT:
# See https://www.rapids.science/latest/features/phone-locations/ # See https://www.rapids.science/latest/features/phone-locations/
PHONE_LOCATIONS: PHONE_LOCATIONS:
CONTAINER: locations CONTAINER: phone_locations_raw.csv
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
HOME_INFERENCE:
DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: False COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5 DBSCAN_MINSAMPLES: 3
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
MAXIMUM_ROW_GAP: 300 MAXIMUM_ROW_GAP: 300 # seconds
MAXIMUM_ROW_DURATION: 60
MINUTES_DATA_USED: False MINUTES_DATA_USED: False
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET, TIME_SEGMENT, TIME_SEGMENT_INSTANCE
INFER_HOME_LOCATION_STRATEGY: DORYAB_STRATEGY # DORYAB_STRATEGY, SUN_LI_VEGA_STRATEGY
MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3
CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS
RADIUS_FOR_HOME: 100 RADIUS_FOR_HOME: 100
SRC_SCRIPT: src/features/phone_locations/doryab/main.py SRC_SCRIPT: src/features/phone_locations/doryab/main.py

View File

@ -203,7 +203,7 @@ PHONE_CONVERSATION:
# See https://www.rapids.science/latest/features/phone-data-yield/ # See https://www.rapids.science/latest/features/phone-data-yield/
PHONE_DATA_YIELD: PHONE_DATA_YIELD:
SENSORS: [] SENSORS: [PHONE_LOCATIONS]
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
@ -231,28 +231,24 @@ PHONE_LIGHT:
# See https://www.rapids.science/latest/features/phone-locations/ # See https://www.rapids.science/latest/features/phone-locations/
PHONE_LOCATIONS: PHONE_LOCATIONS:
CONTAINER: locations CONTAINER: phone_locations_raw.csv
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
HOME_INFERENCE:
DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: False COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5 DBSCAN_MINSAMPLES: 3
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
MAXIMUM_ROW_GAP: 300 MAXIMUM_ROW_GAP: 300 # seconds
MAXIMUM_ROW_DURATION: 60
MINUTES_DATA_USED: False MINUTES_DATA_USED: False
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET, TIME_SEGMENT, TIME_SEGMENT_INSTANCE
INFER_HOME_LOCATION_STRATEGY: DORYAB_STRATEGY # DORYAB_STRATEGY, SUN_LI_VEGA_STRATEGY
MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3
CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS
RADIUS_FOR_HOME: 100 RADIUS_FOR_HOME: 100
SRC_SCRIPT: src/features/phone_locations/doryab/main.py SRC_SCRIPT: src/features/phone_locations/doryab/main.py

View File

@ -203,7 +203,7 @@ PHONE_CONVERSATION:
# See https://www.rapids.science/latest/features/phone-data-yield/ # See https://www.rapids.science/latest/features/phone-data-yield/
PHONE_DATA_YIELD: PHONE_DATA_YIELD:
SENSORS: [] SENSORS: [PHONE_LOCATIONS]
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
@ -231,28 +231,24 @@ PHONE_LIGHT:
# See https://www.rapids.science/latest/features/phone-locations/ # See https://www.rapids.science/latest/features/phone-locations/
PHONE_LOCATIONS: PHONE_LOCATIONS:
CONTAINER: locations CONTAINER: phone_locations_raw.csv
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
HOME_INFERENCE:
DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: False COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5 DBSCAN_MINSAMPLES: 3
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
MAXIMUM_ROW_GAP: 300 MAXIMUM_ROW_GAP: 300 # seconds
MAXIMUM_ROW_DURATION: 60
MINUTES_DATA_USED: False MINUTES_DATA_USED: False
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET, TIME_SEGMENT, TIME_SEGMENT_INSTANCE
INFER_HOME_LOCATION_STRATEGY: DORYAB_STRATEGY # DORYAB_STRATEGY, SUN_LI_VEGA_STRATEGY
MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3
CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS
RADIUS_FOR_HOME: 100 RADIUS_FOR_HOME: 100
SRC_SCRIPT: src/features/phone_locations/doryab/main.py SRC_SCRIPT: src/features/phone_locations/doryab/main.py

View File

@ -203,7 +203,7 @@ PHONE_CONVERSATION:
# See https://www.rapids.science/latest/features/phone-data-yield/ # See https://www.rapids.science/latest/features/phone-data-yield/
PHONE_DATA_YIELD: PHONE_DATA_YIELD:
SENSORS: [] SENSORS: [PHONE_LOCATIONS]
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
@ -231,28 +231,24 @@ PHONE_LIGHT:
# See https://www.rapids.science/latest/features/phone-locations/ # See https://www.rapids.science/latest/features/phone-locations/
PHONE_LOCATIONS: PHONE_LOCATIONS:
CONTAINER: locations CONTAINER: phone_locations_raw.csv
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
HOME_INFERENCE:
DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: False COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5 DBSCAN_MINSAMPLES: 3
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
MAXIMUM_ROW_GAP: 300 MAXIMUM_ROW_GAP: 300 # seconds
MAXIMUM_ROW_DURATION: 60
MINUTES_DATA_USED: False MINUTES_DATA_USED: False
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET, TIME_SEGMENT, TIME_SEGMENT_INSTANCE
INFER_HOME_LOCATION_STRATEGY: DORYAB_STRATEGY # DORYAB_STRATEGY, SUN_LI_VEGA_STRATEGY
MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3
CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS
RADIUS_FOR_HOME: 100 RADIUS_FOR_HOME: 100
SRC_SCRIPT: src/features/phone_locations/doryab/main.py SRC_SCRIPT: src/features/phone_locations/doryab/main.py

View File

@ -203,7 +203,7 @@ PHONE_CONVERSATION:
# See https://www.rapids.science/latest/features/phone-data-yield/ # See https://www.rapids.science/latest/features/phone-data-yield/
PHONE_DATA_YIELD: PHONE_DATA_YIELD:
SENSORS: [] SENSORS: [PHONE_LOCATIONS]
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
@ -231,28 +231,24 @@ PHONE_LIGHT:
# See https://www.rapids.science/latest/features/phone-locations/ # See https://www.rapids.science/latest/features/phone-locations/
PHONE_LOCATIONS: PHONE_LOCATIONS:
CONTAINER: locations CONTAINER: phone_locations_raw.csv
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
HOME_INFERENCE:
DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: False COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5 DBSCAN_MINSAMPLES: 3
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
MAXIMUM_ROW_GAP: 300 MAXIMUM_ROW_GAP: 300 # seconds
MAXIMUM_ROW_DURATION: 60
MINUTES_DATA_USED: False MINUTES_DATA_USED: False
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET, TIME_SEGMENT, TIME_SEGMENT_INSTANCE
INFER_HOME_LOCATION_STRATEGY: DORYAB_STRATEGY # DORYAB_STRATEGY, SUN_LI_VEGA_STRATEGY
MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3
CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS
RADIUS_FOR_HOME: 100 RADIUS_FOR_HOME: 100
SRC_SCRIPT: src/features/phone_locations/doryab/main.py SRC_SCRIPT: src/features/phone_locations/doryab/main.py

View File

@ -203,7 +203,7 @@ PHONE_CONVERSATION:
# See https://www.rapids.science/latest/features/phone-data-yield/ # See https://www.rapids.science/latest/features/phone-data-yield/
PHONE_DATA_YIELD: PHONE_DATA_YIELD:
SENSORS: [] SENSORS: [PHONE_LOCATIONS]
PROVIDERS: PROVIDERS:
RAPIDS: RAPIDS:
COMPUTE: False COMPUTE: False
@ -231,28 +231,24 @@ PHONE_LIGHT:
# See https://www.rapids.science/latest/features/phone-locations/ # See https://www.rapids.science/latest/features/phone-locations/
PHONE_LOCATIONS: PHONE_LOCATIONS:
CONTAINER: locations CONTAINER: phone_locations_raw.csv
LOCATIONS_TO_USE: ALL_RESAMPLED # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED LOCATIONS_TO_USE: GPS # ALL, GPS, ALL_RESAMPLED, OR FUSED_RESAMPLED
FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold FUSED_RESAMPLED_CONSECUTIVE_THRESHOLD: 30 # minutes, only replicate location samples to the next sensed bin if the phone did not stop collecting data for more than this threshold
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: 720 # minutes, only replicate location samples to consecutive sensed bins if they were logged within this threshold after a valid location row
HOME_INFERENCE:
DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5
THRESHOLD_STATIC : 1 # km/h
CLUSTERING_ALGORITHM: DBSCAN #DBSCAN,OPTICS
PROVIDERS: PROVIDERS:
DORYAB: DORYAB:
COMPUTE: False COMPUTE: True
FEATURES: ["locationvariance","loglocationvariance","totaldistance","averagespeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","meanlengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"] FEATURES: ["locationvariance","loglocationvariance","totaldistance","avgspeed","varspeed", "numberofsignificantplaces","numberlocationtransitions","radiusgyration","timeattop1location","timeattop2location","timeattop3location","movingtostaticratio","outlierstimepercent","maxlengthstayatclusters","minlengthstayatclusters","avglengthstayatclusters","stdlengthstayatclusters","locationentropy","normalizedlocationentropy","timeathome"]
ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius ACCURACY_LIMIT: 100 # meters, drops location coordinates with an accuracy higher than this. This number means there's a 68% probability the true location is within this radius
DBSCAN_EPS: 10 # meters DBSCAN_EPS: 10 # meters
DBSCAN_MINSAMPLES: 5 DBSCAN_MINSAMPLES: 3
THRESHOLD_STATIC : 1 # km/h THRESHOLD_STATIC : 1 # km/h
MAXIMUM_ROW_GAP: 300 MAXIMUM_ROW_GAP: 300 # seconds
MAXIMUM_ROW_DURATION: 60
MINUTES_DATA_USED: False MINUTES_DATA_USED: False
CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET,TIME_SEGMENT CLUSTER_ON: PARTICIPANT_DATASET # PARTICIPANT_DATASET, TIME_SEGMENT, TIME_SEGMENT_INSTANCE
INFER_HOME_LOCATION_STRATEGY: DORYAB_STRATEGY # DORYAB_STRATEGY, SUN_LI_VEGA_STRATEGY
MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3
CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS
RADIUS_FOR_HOME: 100 RADIUS_FOR_HOME: 100
SRC_SCRIPT: src/features/phone_locations/doryab/main.py SRC_SCRIPT: src/features/phone_locations/doryab/main.py

View File

@ -612,22 +612,6 @@ properties:
FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION: FUSED_RESAMPLED_TIME_SINCE_VALID_LOCATION:
type: integer type: integer
exclusiveMinimum: 0 exclusiveMinimum: 0
HOME_INFERENCE:
type: object
required: [DBSCAN_EPS, DBSCAN_MINSAMPLES, THRESHOLD_STATIC, CLUSTERING_ALGORITHM]
properties:
DBSCAN_EPS:
type: integer
exclusiveMinimum: 0
DBSCAN_MINSAMPLES:
type: integer
exclusiveMinimum: 0
THRESHOLD_STATIC:
type: integer
exclusiveMinimum: 0
CLUSTERING_ALGORITHM:
type: string
enum: ["DBSCAN", "OPTICS"]
PROVIDERS: PROVIDERS:
type: ["null", object] type: ["null", object]
properties: properties:
@ -640,7 +624,7 @@ properties:
uniqueItems: True uniqueItems: True
items: items:
type: string type: string
enum: [locationvariance,loglocationvariance,totaldistance,averagespeed,varspeed,circadianmovement,numberofsignificantplaces,numberlocationtransitions,radiusgyration,timeattop1location,timeattop2location,timeattop3location,movingtostaticratio,outlierstimepercent,maxlengthstayatclusters,minlengthstayatclusters,meanlengthstayatclusters,stdlengthstayatclusters,locationentropy,normalizedlocationentropy,timeathome] enum: [locationvariance,loglocationvariance,totaldistance,avgspeed,varspeed,numberofsignificantplaces,numberlocationtransitions,radiusgyration,timeattop1location,timeattop2location,timeattop3location,movingtostaticratio,outlierstimepercent,maxlengthstayatclusters,minlengthstayatclusters,avglengthstayatclusters,stdlengthstayatclusters,locationentropy,normalizedlocationentropy,timeathome,homelabel]
ACCURACY_LIMIT: ACCURACY_LIMIT:
type: integer type: integer
exclusiveMinimum: 0 exclusiveMinimum: 0
@ -656,17 +640,23 @@ properties:
MAXIMUM_ROW_GAP: MAXIMUM_ROW_GAP:
type: integer type: integer
exclusiveMinimum: 0 exclusiveMinimum: 0
MAXIMUM_ROW_DURATION:
type: integer
exclusiveMinimum: 0
MINUTES_DATA_USED: MINUTES_DATA_USED:
type: boolean type: boolean
CLUSTER_ON: CLUSTER_ON:
type: string type: string
enum: ["PARTICIPANT_DATASET", "TIME_SEGMENT"] enum: ["PARTICIPANT_DATASET", "TIME_SEGMENT", "TIME_SEGMENT_INSTANCE"]
INFER_HOME_LOCATION_STRATEGY:
type: string
enum: ["DORYAB_STRATEGY", "SUN_LI_VEGA_STRATEGY"]
MINIMUM_DAYS_TO_DETECT_HOME_CHANGES:
type: integer
minimum: 0
CLUSTERING_ALGORITHM: CLUSTERING_ALGORITHM:
type: string type: string
enum: ["DBSCAN", "OPTICS"] enum: ["DBSCAN", "OPTICS"]
RADIUS_FOR_HOME:
type: integer
exclusiveMinimum: 0
BARNETT: BARNETT:
allOf: allOf: