Add a simple function for recoding proximity.
parent
a2180aee54
commit
af65d0864f
|
@ -33,7 +33,7 @@ from setup import db_engine, session
|
|||
# %%
|
||||
query_app_categories = session.query(AppCategories)
|
||||
with db_engine.connect() as connection:
|
||||
df_app_categories = pd.read_sql(query_app_categories.statement, connection)
|
||||
df_app_categories = pd.read_sql(query_app_categories.statement, connection)
|
||||
|
||||
# %%
|
||||
df_app_categories.head()
|
||||
|
@ -42,7 +42,9 @@ df_app_categories.head()
|
|||
df_app_categories["play_store_genre"].value_counts()
|
||||
|
||||
# %%
|
||||
df_category_not_found = df_app_categories[df_app_categories["play_store_genre"] == "not_found"]
|
||||
df_category_not_found = df_app_categories[
|
||||
df_app_categories["play_store_genre"] == "not_found"
|
||||
]
|
||||
|
||||
# %%
|
||||
df_category_not_found["play_store_response"].value_counts()
|
||||
|
@ -51,13 +53,24 @@ df_category_not_found["play_store_response"].value_counts()
|
|||
df_category_not_found["package_name"].value_counts()
|
||||
|
||||
# %%
|
||||
manufacturers = ["samsung","oneplus","huawei","xiaomi","lge","motorola","miui","lenovo","oppo","mediatek"]
|
||||
custom_rom = ["coloros","lineageos","myos","cyanogenmod", "foundation.e"]
|
||||
manufacturers = [
|
||||
"samsung",
|
||||
"oneplus",
|
||||
"huawei",
|
||||
"xiaomi",
|
||||
"lge",
|
||||
"motorola",
|
||||
"miui",
|
||||
"lenovo",
|
||||
"oppo",
|
||||
"mediatek",
|
||||
]
|
||||
custom_rom = ["coloros", "lineageos", "myos", "cyanogenmod", "foundation.e"]
|
||||
other = ["android", "wssyncmldm"]
|
||||
rows_os_manufacturer = df_category_not_found["package_name"].str.contains("|".join(manufacturers + custom_rom + other), case=False)
|
||||
rows_os_manufacturer = df_category_not_found["package_name"].str.contains(
|
||||
"|".join(manufacturers + custom_rom + other), case=False
|
||||
)
|
||||
|
||||
# %%
|
||||
with pd.option_context(
|
||||
"display.max_rows", None, "display.max_columns", None
|
||||
):
|
||||
with pd.option_context("display.max_rows", None, "display.max_columns", None):
|
||||
display(df_category_not_found.loc[~rows_os_manufacturer])
|
||||
|
|
|
@ -28,3 +28,28 @@ def get_proximity_data(usernames: Collection) -> pd.DataFrame:
|
|||
with db_engine.connect() as connection:
|
||||
df_proximity = pd.read_sql(query_proximity.statement, connection)
|
||||
return df_proximity
|
||||
|
||||
|
||||
def recode_proximity(df_proximity: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
This function recodes proximity from a double to a boolean value.
|
||||
Different proximity sensors report different values,
|
||||
but in our data only several distinct values have ever been found.
|
||||
These are therefore converted into "near" and "far" binary values.
|
||||
See expl_proximity.ipynb for additional info.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
df_proximity: pd.DataFrame
|
||||
A dataframe of proximity data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
df_proximity: pd.DataFrame
|
||||
The same dataframe with an additional column bool_prox_near,
|
||||
indicating whether "near" proximity was reported.
|
||||
False values correspond to "far" reported by this sensor.
|
||||
|
||||
"""
|
||||
df_proximity = df_proximity.assign(bool_prox_near=lambda x: x.double_proximity == 0)
|
||||
return df_proximity
|
||||
|
|
Loading…
Reference in New Issue