Add a simple function for recoding proximity.

communication
junos 2021-08-11 15:04:27 +02:00
parent a2180aee54
commit af65d0864f
2 changed files with 46 additions and 8 deletions

View File

@ -42,7 +42,9 @@ df_app_categories.head()
df_app_categories["play_store_genre"].value_counts()
# %%
df_category_not_found = df_app_categories[df_app_categories["play_store_genre"] == "not_found"]
df_category_not_found = df_app_categories[
df_app_categories["play_store_genre"] == "not_found"
]
# %%
df_category_not_found["play_store_response"].value_counts()
@ -51,13 +53,24 @@ df_category_not_found["play_store_response"].value_counts()
df_category_not_found["package_name"].value_counts()
# %%
manufacturers = ["samsung","oneplus","huawei","xiaomi","lge","motorola","miui","lenovo","oppo","mediatek"]
manufacturers = [
"samsung",
"oneplus",
"huawei",
"xiaomi",
"lge",
"motorola",
"miui",
"lenovo",
"oppo",
"mediatek",
]
custom_rom = ["coloros", "lineageos", "myos", "cyanogenmod", "foundation.e"]
other = ["android", "wssyncmldm"]
rows_os_manufacturer = df_category_not_found["package_name"].str.contains("|".join(manufacturers + custom_rom + other), case=False)
rows_os_manufacturer = df_category_not_found["package_name"].str.contains(
"|".join(manufacturers + custom_rom + other), case=False
)
# %%
with pd.option_context(
"display.max_rows", None, "display.max_columns", None
):
with pd.option_context("display.max_rows", None, "display.max_columns", None):
display(df_category_not_found.loc[~rows_os_manufacturer])

View File

@ -28,3 +28,28 @@ def get_proximity_data(usernames: Collection) -> pd.DataFrame:
with db_engine.connect() as connection:
df_proximity = pd.read_sql(query_proximity.statement, connection)
return df_proximity
def recode_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
"""
This function recodes proximity from a double to a boolean value.
Different proximity sensors report different values,
but in our data only several distinct values have ever been found.
These are therefore converted into "near" and "far" binary values.
See expl_proximity.ipynb for additional info.
Parameters
----------
df_proximity: pd.DataFrame
A dataframe of proximity data.
Returns
-------
df_proximity: pd.DataFrame
The same dataframe with an additional column bool_prox_near,
indicating whether "near" proximity was reported.
False values correspond to "far" reported by this sensor.
"""
df_proximity = df_proximity.assign(bool_prox_near=lambda x: x.double_proximity == 0)
return df_proximity