Bug fixed: sort bt_address alphabetically before picking the most frequent bt_address

pull/144/head
Weiyu 2021-06-22 15:27:03 -04:00
parent 5924f251d9
commit 3e4d167adc
1 changed files with 3 additions and 3 deletions

View File

@ -95,7 +95,7 @@ def ownership_based_on_clustering(bt_frequency):
return bt_frequency[["bt_address", "own_device"]] return bt_frequency[["bt_address", "own_device"]]
def mostLeastScannedDevices(devices): def mostLeastScannedDevices(devices):
device_counts = devices["bt_address"].value_counts() device_counts = devices["bt_address"].value_counts().sort_index(ascending=False).sort_values(ascending=False)
return ("","") if (len(device_counts) == 0) else (device_counts.idxmax(), device_counts.idxmin()) return ("","") if (len(device_counts) == 0) else (device_counts.idxmax(), device_counts.idxmin())
def validate_requested_features(provider): def validate_requested_features(provider):
@ -120,8 +120,8 @@ def doryab_features(sensor_data_files, time_segment, provider, filter_data_by_se
feature_prefix = {"DEVICES":"", "SCANS_MOST_FREQUENT_DEVICE":"countscansmostfrequentdevice", "SCANS_LEAST_FREQUENT_DEVICE":"countscansleastfrequentdevice"} feature_prefix = {"DEVICES":"", "SCANS_MOST_FREQUENT_DEVICE":"countscansmostfrequentdevice", "SCANS_LEAST_FREQUENT_DEVICE":"countscansleastfrequentdevice"}
validate_requested_features(provider) validate_requested_features(provider)
device_ownership = ownership_based_on_clustering(deviceFrequency(bt_data)).set_index("bt_address") device_ownership = ownership_based_on_clustering(deviceFrequency(bt_data))
bt_data = bt_data.set_index("bt_address").join(device_ownership, how="left").reset_index() bt_data = bt_data.merge(device_ownership, how="left", on="bt_address")
bt_data["own_device"].fillna(0, inplace=True) bt_data["own_device"].fillna(0, inplace=True)
dataset_most_common_device, dataset_least_common_device = mostLeastScannedDevices(bt_data) dataset_most_common_device, dataset_least_common_device = mostLeastScannedDevices(bt_data)
segment_bt_data = filter_data_by_segment(bt_data, time_segment) segment_bt_data = filter_data_by_segment(bt_data, time_segment)