diff --git a/features/proximity.py b/features/proximity.py index 1884f29..36c9569 100644 --- a/features/proximity.py +++ b/features/proximity.py @@ -5,7 +5,12 @@ import pandas as pd from config.models import Participant, Proximity from setup import db_engine, session -FEATURES_PROXIMITY = ["freq_prox_near", "prop_prox_near"] +FILL_NA_PROXIMITY = { + "freq_prox_near": 0, + "prop_prox_near": 1/2 # Of the form of a / (a + b). +} + +FEATURES_PROXIMITY = list(FILL_NA_PROXIMITY.keys()) def get_proximity_data(usernames: Collection) -> pd.DataFrame: diff --git a/machine_learning/pipeline.py b/machine_learning/pipeline.py index 56c1d0c..9b3cb72 100644 --- a/machine_learning/pipeline.py +++ b/machine_learning/pipeline.py @@ -72,6 +72,11 @@ class SensorFeatures: self.df_features_all = safe_outer_merge_on_index( self.df_features_all, self.df_proximity_counts ) + self.df_features_all.fillna( + value= proximity.FILL_NA_PROXIMITY, + inplace=True, + downcast="infer", + ) print("Calculated proximity features.") if "communication" in self.data_types: